From bc94d5ca29ffca988ee11d077c5df0cd7370806e Mon Sep 17 00:00:00 2001
From: 100gle <569590461@qq.com>
Date: Wed, 25 May 2022 22:41:49 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9EVol11=E7=AB=A0?=
=?UTF-8?q?=E8=8A=82=E7=A4=BA=E4=BE=8B=E6=BA=90=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
code/11/dataframe.ipynb | 2048 +++++++++++++++++++++++++++++++++++++++
1 file changed, 2048 insertions(+)
create mode 100644 code/11/dataframe.ipynb
diff --git a/code/11/dataframe.ipynb b/code/11/dataframe.ipynb
new file mode 100644
index 0000000..3b795a1
--- /dev/null
+++ b/code/11/dataframe.ipynb
@@ -0,0 +1,2048 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# 什么是 DataFrame?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Alice | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Bob | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Carol | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name age\n",
+ "0 Alice 10\n",
+ "1 Bob 20\n",
+ "2 Carol 30"
+ ]
+ },
+ "execution_count": 112,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "data = dict(\n",
+ " name=[\"Alice\", \"Bob\", \"Carol\"],\n",
+ " age=[10, 20, 30],\n",
+ ")\n",
+ "df = pd.DataFrame(data)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 3 entries, 0 to 2\n",
+ "Data columns (total 2 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 name 3 non-null object\n",
+ " 1 age 3 non-null int64 \n",
+ "dtypes: int64(1), object(1)\n",
+ "memory usage: 176.0+ bytes\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 Alice\n",
+ "1 Bob\n",
+ "2 Carol\n",
+ "Name: name, dtype: object"
+ ]
+ },
+ "execution_count": 114,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[\"name\"] # 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 115,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.series.Series"
+ ]
+ },
+ "execution_count": 115,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(df[\"name\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# 常用的 DataFrame 属性"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RangeIndex(start=0, stop=3, step=1)"
+ ]
+ },
+ "execution_count": 116,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "name object\n",
+ "age int64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 117,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(3, 2)"
+ ]
+ },
+ "execution_count": 118,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6"
+ ]
+ },
+ "execution_count": 119,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.size"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 120,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([['Alice', 10],\n",
+ " ['Bob', 20],\n",
+ " ['Carol', 30]], dtype=object)"
+ ]
+ },
+ "execution_count": 120,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['name', 'age'], dtype='object')"
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[RangeIndex(start=0, stop=3, step=1), Index(['name', 'age'], dtype='object')]"
+ ]
+ },
+ "execution_count": 122,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.axes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2"
+ ]
+ },
+ "execution_count": 123,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.ndim"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "df is not empty\n"
+ ]
+ }
+ ],
+ "source": [
+ "if df.empty:\n",
+ " print(\"df is empty\")\n",
+ "else:\n",
+ " print(\"df is not empty\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# 常用的 DataFrame 方法"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "## 查看数据信息"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 125,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 3 entries, 0 to 2\n",
+ "Data columns (total 2 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 name 3 non-null object\n",
+ " 1 age 3 non-null int64 \n",
+ "dtypes: int64(1), object(1)\n",
+ "memory usage: 176.0+ bytes\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 88 | \n",
+ " 69 | \n",
+ " 31 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 81 | \n",
+ " 87 | \n",
+ " 40 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 23 | \n",
+ " 18 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 68 | \n",
+ " 52 | \n",
+ " 89 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 30 | \n",
+ " 90 | \n",
+ " 73 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 95 | \n",
+ " 95 | \n",
+ " 4 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | 96 | \n",
+ " 93 | \n",
+ " 14 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 97 | \n",
+ " 18 | \n",
+ " 91 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " | 98 | \n",
+ " 27 | \n",
+ " 31 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " | 99 | \n",
+ " 63 | \n",
+ " 73 | \n",
+ " 85 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b c\n",
+ "0 88 69 31\n",
+ "1 81 87 40\n",
+ "2 23 18 15\n",
+ "3 68 52 89\n",
+ "4 30 90 73\n",
+ ".. .. .. ..\n",
+ "95 95 4 8\n",
+ "96 93 14 1\n",
+ "97 18 91 63\n",
+ "98 27 31 62\n",
+ "99 63 73 85\n",
+ "\n",
+ "[100 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 126,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import random\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "random.seed(233)\n",
+ "\n",
+ "data = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [random.randrange(1, 100) for _ in range(100)],\n",
+ " \"b\": [random.randrange(1, 100) for _ in range(100)],\n",
+ " \"c\": [random.randrange(1, 100) for _ in range(100)],\n",
+ " }\n",
+ ")\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 127,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 88 | \n",
+ " 69 | \n",
+ " 31 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 81 | \n",
+ " 87 | \n",
+ " 40 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 23 | \n",
+ " 18 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 68 | \n",
+ " 52 | \n",
+ " 89 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 30 | \n",
+ " 90 | \n",
+ " 73 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b c\n",
+ "0 88 69 31\n",
+ "1 81 87 40\n",
+ "2 23 18 15\n",
+ "3 68 52 89\n",
+ "4 30 90 73"
+ ]
+ },
+ "execution_count": 127,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 95 | \n",
+ " 95 | \n",
+ " 4 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | 96 | \n",
+ " 93 | \n",
+ " 14 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 97 | \n",
+ " 18 | \n",
+ " 91 | \n",
+ " 63 | \n",
+ "
\n",
+ " \n",
+ " | 98 | \n",
+ " 27 | \n",
+ " 31 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " | 99 | \n",
+ " 63 | \n",
+ " 73 | \n",
+ " 85 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b c\n",
+ "95 95 4 8\n",
+ "96 93 14 1\n",
+ "97 18 91 63\n",
+ "98 27 31 62\n",
+ "99 63 73 85"
+ ]
+ },
+ "execution_count": 128,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "## 索引数据"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### 列表式"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 129,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 88 | \n",
+ " 69 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 81 | \n",
+ " 87 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 23 | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 68 | \n",
+ " 52 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 30 | \n",
+ " 90 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 88 69\n",
+ "1 81 87\n",
+ "2 23 18\n",
+ "3 68 52\n",
+ "4 30 90"
+ ]
+ },
+ "execution_count": 129,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cols = [\"a\", \"b\"]\n",
+ "\n",
+ "filtered = data[cols]\n",
+ "filtered.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### 坐标式"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 130,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | RZOlH | \n",
+ " 39 | \n",
+ " 76 | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " | oZ2EJ | \n",
+ " 95 | \n",
+ " 41 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | nf1gr | \n",
+ " 12 | \n",
+ " 75 | \n",
+ " 86 | \n",
+ "
\n",
+ " \n",
+ " | oSTae | \n",
+ " 4 | \n",
+ " 69 | \n",
+ " 61 | \n",
+ "
\n",
+ " \n",
+ " | 7RpMT | \n",
+ " 51 | \n",
+ " 75 | \n",
+ " 79 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b c\n",
+ "RZOlH 39 76 77\n",
+ "oZ2EJ 95 41 5\n",
+ "nf1gr 12 75 86\n",
+ "oSTae 4 69 61\n",
+ "7RpMT 51 75 79"
+ ]
+ },
+ "execution_count": 130,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import random\n",
+ "import string\n",
+ "\n",
+ "random.seed(233)\n",
+ "\n",
+ "\n",
+ "def make_labels(n):\n",
+ " alphabet = string.ascii_letters\n",
+ " number = string.digits\n",
+ "\n",
+ " return [\"\".join(random.sample(alphabet + number, 5)) for _ in range(n)]\n",
+ "\n",
+ "\n",
+ "labels = make_labels(100)\n",
+ "\n",
+ "data = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [random.randrange(1, 100) for _ in range(100)],\n",
+ " \"b\": [random.randrange(1, 100) for _ in range(100)],\n",
+ " \"c\": [random.randrange(1, 100) for _ in range(100)],\n",
+ " },\n",
+ " index=labels,\n",
+ ")\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 131,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "41"
+ ]
+ },
+ "execution_count": 131,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.at[\"oZ2EJ\", \"b\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 132,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "75"
+ ]
+ },
+ "execution_count": 132,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.iat[2, 1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 133,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# index error\n",
+ "# data.iat[\"oZ2EJ\", 1]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### 列表坐标式"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | nf1gr | \n",
+ " 12 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " | oSTae | \n",
+ " 4 | \n",
+ " 69 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "nf1gr 12 75\n",
+ "oSTae 4 69"
+ ]
+ },
+ "execution_count": 134,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[\"nf1gr\":\"oSTae\", \"a\":\"b\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | nf1gr | \n",
+ " 75 | \n",
+ " 86 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " b c\n",
+ "nf1gr 75 86"
+ ]
+ },
+ "execution_count": 135,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.iloc[2:3, [1, 2]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 136,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | RZOlH | \n",
+ " 39 | \n",
+ " 76 | \n",
+ "
\n",
+ " \n",
+ " | oZ2EJ | \n",
+ " 95 | \n",
+ " 41 | \n",
+ "
\n",
+ " \n",
+ " | nf1gr | \n",
+ " 12 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " | oSTae | \n",
+ " 4 | \n",
+ " 69 | \n",
+ "
\n",
+ " \n",
+ " | 7RpMT | \n",
+ " 51 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "RZOlH 39 76\n",
+ "oZ2EJ 95 41\n",
+ "nf1gr 12 75\n",
+ "oSTae 4 69\n",
+ "7RpMT 51 75"
+ ]
+ },
+ "execution_count": 136,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[:\"7RpMT\", :\"b\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 137,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | RZOlH | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | oZ2EJ | \n",
+ " 95 | \n",
+ "
\n",
+ " \n",
+ " | nf1gr | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " | oSTae | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 7RpMT | \n",
+ " 51 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | E14KY | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " | KDjSI | \n",
+ " 79 | \n",
+ "
\n",
+ " \n",
+ " | tn75V | \n",
+ " 85 | \n",
+ "
\n",
+ " \n",
+ " | 4ITDH | \n",
+ " 55 | \n",
+ "
\n",
+ " \n",
+ " | gxQwc | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
99 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a\n",
+ "RZOlH 39\n",
+ "oZ2EJ 95\n",
+ "nf1gr 12\n",
+ "oSTae 4\n",
+ "7RpMT 51\n",
+ "... ..\n",
+ "E14KY 77\n",
+ "KDjSI 79\n",
+ "tn75V 85\n",
+ "4ITDH 55\n",
+ "gxQwc 30\n",
+ "\n",
+ "[99 rows x 1 columns]"
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.iloc[:-1, 0:1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | nf1gr | \n",
+ " 12 | \n",
+ " 86 | \n",
+ "
\n",
+ " \n",
+ " | oSTae | \n",
+ " 4 | \n",
+ " 61 | \n",
+ "
\n",
+ " \n",
+ " | 7RpMT | \n",
+ " 51 | \n",
+ " 79 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a c\n",
+ "nf1gr 12 86\n",
+ "oSTae 4 61\n",
+ "7RpMT 51 79"
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[\"nf1gr\":\"7RpMT\", [\"a\", \"c\"]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 139,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | RZOlH | \n",
+ " 39 | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " | oZ2EJ | \n",
+ " 95 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a c\n",
+ "RZOlH 39 77\n",
+ "oZ2EJ 95 5"
+ ]
+ },
+ "execution_count": 139,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.iloc[[0, 1], [0, 2]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "\n",
+ "## 拼接数据"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### append"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " B | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 A 1\n",
+ "1 B 2"
+ ]
+ },
+ "execution_count": 140,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data1 = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [\"A\", \"B\"],\n",
+ " \"b\": [1, 2],\n",
+ " }\n",
+ ")\n",
+ "data1.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " C | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " D | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 C 3\n",
+ "1 D 4"
+ ]
+ },
+ "execution_count": 141,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data2 = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [\"C\", \"D\"],\n",
+ " \"b\": [3, 4],\n",
+ " }\n",
+ ")\n",
+ "data2.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 142,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " A | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " B | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " C | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " D | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 A 1\n",
+ "1 B 2\n",
+ "0 C 3\n",
+ "1 D 4"
+ ]
+ },
+ "execution_count": 142,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data1.append(data2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 143,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " A | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " B | \n",
+ " 2.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " C | \n",
+ " 3.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " D | \n",
+ " 4.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " E | \n",
+ " NaN | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " F | \n",
+ " NaN | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b c\n",
+ "0 A 1.0 NaN\n",
+ "1 B 2.0 NaN\n",
+ "0 C 3.0 NaN\n",
+ "1 D 4.0 NaN\n",
+ "0 E NaN 5.0\n",
+ "1 F NaN 6.0"
+ ]
+ },
+ "execution_count": 143,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data3 = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [\"E\", \"F\"],\n",
+ " \"c\": [5, 6],\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "data1.append(data2).append(data3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### merge"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b_x | \n",
+ " b_y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " a | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " b | \n",
+ " d | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b_x b_y\n",
+ "0 1 a c\n",
+ "1 2 b d"
+ ]
+ },
+ "execution_count": 144,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "data1 = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [1, 2],\n",
+ " \"b\": [\"a\", \"b\"],\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "data2 = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [1, 2],\n",
+ " \"b\": [\"c\", \"d\"],\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "data1.merge(data2, on=\"a\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}