From bc94d5ca29ffca988ee11d077c5df0cd7370806e Mon Sep 17 00:00:00 2001 From: 100gle <569590461@qq.com> Date: Wed, 25 May 2022 22:41:49 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9EVol11=E7=AB=A0?= =?UTF-8?q?=E8=8A=82=E7=A4=BA=E4=BE=8B=E6=BA=90=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/11/dataframe.ipynb | 2048 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 2048 insertions(+) create mode 100644 code/11/dataframe.ipynb diff --git a/code/11/dataframe.ipynb b/code/11/dataframe.ipynb new file mode 100644 index 0000000..3b795a1 --- /dev/null +++ b/code/11/dataframe.ipynb @@ -0,0 +1,2048 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# 什么是 DataFrame?" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameage
0Alice10
1Bob20
2Carol30
\n", + "
" + ], + "text/plain": [ + " name age\n", + "0 Alice 10\n", + "1 Bob 20\n", + "2 Carol 30" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "data = dict(\n", + " name=[\"Alice\", \"Bob\", \"Carol\"],\n", + " age=[10, 20, 30],\n", + ")\n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 3 entries, 0 to 2\n", + "Data columns (total 2 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 3 non-null object\n", + " 1 age 3 non-null int64 \n", + "dtypes: int64(1), object(1)\n", + "memory usage: 176.0+ bytes\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Alice\n", + "1 Bob\n", + "2 Carol\n", + "Name: name, dtype: object" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"name\"] # 1" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df[\"name\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# 常用的 DataFrame 属性" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=3, step=1)" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.index" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "name object\n", + "age int64\n", + "dtype: object" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(3, 2)" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.size" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([['Alice', 10],\n", + " ['Bob', 20],\n", + " ['Carol', 30]], dtype=object)" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.values" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['name', 'age'], dtype='object')" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[RangeIndex(start=0, stop=3, step=1), Index(['name', 'age'], dtype='object')]" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.axes" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.ndim" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df is not empty\n" + ] + } + ], + "source": [ + "if df.empty:\n", + " print(\"df is empty\")\n", + "else:\n", + " print(\"df is not empty\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# 常用的 DataFrame 方法" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## 查看数据信息" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 3 entries, 0 to 2\n", + "Data columns (total 2 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 name 3 non-null object\n", + " 1 age 3 non-null int64 \n", + "dtypes: int64(1), object(1)\n", + "memory usage: 176.0+ bytes\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "example:" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0886931
1818740
2231815
3685289
4309073
............
959548
9693141
97189163
98273162
99637385
\n", + "

100 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 88 69 31\n", + "1 81 87 40\n", + "2 23 18 15\n", + "3 68 52 89\n", + "4 30 90 73\n", + ".. .. .. ..\n", + "95 95 4 8\n", + "96 93 14 1\n", + "97 18 91 63\n", + "98 27 31 62\n", + "99 63 73 85\n", + "\n", + "[100 rows x 3 columns]" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import random\n", + "\n", + "import pandas as pd\n", + "\n", + "random.seed(233)\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [random.randrange(1, 100) for _ in range(100)],\n", + " \"b\": [random.randrange(1, 100) for _ in range(100)],\n", + " \"c\": [random.randrange(1, 100) for _ in range(100)],\n", + " }\n", + ")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0886931
1818740
2231815
3685289
4309073
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 88 69 31\n", + "1 81 87 40\n", + "2 23 18 15\n", + "3 68 52 89\n", + "4 30 90 73" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
959548
9693141
97189163
98273162
99637385
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "95 95 4 8\n", + "96 93 14 1\n", + "97 18 91 63\n", + "98 27 31 62\n", + "99 63 73 85" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## 索引数据" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 列表式" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
08869
18187
22318
36852
43090
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 88 69\n", + "1 81 87\n", + "2 23 18\n", + "3 68 52\n", + "4 30 90" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols = [\"a\", \"b\"]\n", + "\n", + "filtered = data[cols]\n", + "filtered.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 坐标式" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
RZOlH397677
oZ2EJ95415
nf1gr127586
oSTae46961
7RpMT517579
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "RZOlH 39 76 77\n", + "oZ2EJ 95 41 5\n", + "nf1gr 12 75 86\n", + "oSTae 4 69 61\n", + "7RpMT 51 75 79" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import random\n", + "import string\n", + "\n", + "random.seed(233)\n", + "\n", + "\n", + "def make_labels(n):\n", + " alphabet = string.ascii_letters\n", + " number = string.digits\n", + "\n", + " return [\"\".join(random.sample(alphabet + number, 5)) for _ in range(n)]\n", + "\n", + "\n", + "labels = make_labels(100)\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [random.randrange(1, 100) for _ in range(100)],\n", + " \"b\": [random.randrange(1, 100) for _ in range(100)],\n", + " \"c\": [random.randrange(1, 100) for _ in range(100)],\n", + " },\n", + " index=labels,\n", + ")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "41" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.at[\"oZ2EJ\", \"b\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "75" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.iat[2, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# index error\n", + "# data.iat[\"oZ2EJ\", 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 列表坐标式" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
nf1gr1275
oSTae469
\n", + "
" + ], + "text/plain": [ + " a b\n", + "nf1gr 12 75\n", + "oSTae 4 69" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[\"nf1gr\":\"oSTae\", \"a\":\"b\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
bc
nf1gr7586
\n", + "
" + ], + "text/plain": [ + " b c\n", + "nf1gr 75 86" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.iloc[2:3, [1, 2]]" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
RZOlH3976
oZ2EJ9541
nf1gr1275
oSTae469
7RpMT5175
\n", + "
" + ], + "text/plain": [ + " a b\n", + "RZOlH 39 76\n", + "oZ2EJ 95 41\n", + "nf1gr 12 75\n", + "oSTae 4 69\n", + "7RpMT 51 75" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[:\"7RpMT\", :\"b\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a
RZOlH39
oZ2EJ95
nf1gr12
oSTae4
7RpMT51
......
E14KY77
KDjSI79
tn75V85
4ITDH55
gxQwc30
\n", + "

99 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " a\n", + "RZOlH 39\n", + "oZ2EJ 95\n", + "nf1gr 12\n", + "oSTae 4\n", + "7RpMT 51\n", + "... ..\n", + "E14KY 77\n", + "KDjSI 79\n", + "tn75V 85\n", + "4ITDH 55\n", + "gxQwc 30\n", + "\n", + "[99 rows x 1 columns]" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.iloc[:-1, 0:1]" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ac
nf1gr1286
oSTae461
7RpMT5179
\n", + "
" + ], + "text/plain": [ + " a c\n", + "nf1gr 12 86\n", + "oSTae 4 61\n", + "7RpMT 51 79" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[\"nf1gr\":\"7RpMT\", [\"a\", \"c\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ac
RZOlH3977
oZ2EJ955
\n", + "
" + ], + "text/plain": [ + " a c\n", + "RZOlH 39 77\n", + "oZ2EJ 95 5" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.iloc[[0, 1], [0, 2]]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "\n", + "## 拼接数据" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### append" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0A1
1B2
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 A 1\n", + "1 B 2" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data1 = pd.DataFrame(\n", + " {\n", + " \"a\": [\"A\", \"B\"],\n", + " \"b\": [1, 2],\n", + " }\n", + ")\n", + "data1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0C3
1D4
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 C 3\n", + "1 D 4" + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data2 = pd.DataFrame(\n", + " {\n", + " \"a\": [\"C\", \"D\"],\n", + " \"b\": [3, 4],\n", + " }\n", + ")\n", + "data2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0A1
1B2
0C3
1D4
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 A 1\n", + "1 B 2\n", + "0 C 3\n", + "1 D 4" + ] + }, + "execution_count": 142, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data1.append(data2)" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0A1.0NaN
1B2.0NaN
0C3.0NaN
1D4.0NaN
0ENaN5.0
1FNaN6.0
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 A 1.0 NaN\n", + "1 B 2.0 NaN\n", + "0 C 3.0 NaN\n", + "1 D 4.0 NaN\n", + "0 E NaN 5.0\n", + "1 F NaN 6.0" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data3 = pd.DataFrame(\n", + " {\n", + " \"a\": [\"E\", \"F\"],\n", + " \"c\": [5, 6],\n", + " }\n", + ")\n", + "\n", + "data1.append(data2).append(data3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### merge" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab_xb_y
01ac
12bd
\n", + "
" + ], + "text/plain": [ + " a b_x b_y\n", + "0 1 a c\n", + "1 2 b d" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "data1 = pd.DataFrame(\n", + " {\n", + " \"a\": [1, 2],\n", + " \"b\": [\"a\", \"b\"],\n", + " }\n", + ")\n", + "\n", + "data2 = pd.DataFrame(\n", + " {\n", + " \"a\": [1, 2],\n", + " \"b\": [\"c\", \"d\"],\n", + " }\n", + ")\n", + "\n", + "data1.merge(data2, on=\"a\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}