{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true, "pycharm": { "name": "#%% md\n" } }, "source": [ "# 什么是 DataFrame?" ] }, { "cell_type": "code", "execution_count": 112, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameage
0Alice10
1Bob20
2Carol30
\n", "
" ], "text/plain": [ " name age\n", "0 Alice 10\n", "1 Bob 20\n", "2 Carol 30" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "data = dict(\n", " name=[\"Alice\", \"Bob\", \"Carol\"],\n", " age=[10, 20, 30],\n", ")\n", "df = pd.DataFrame(data)\n", "df" ] }, { "cell_type": "code", "execution_count": 113, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3 entries, 0 to 2\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 name 3 non-null object\n", " 1 age 3 non-null int64 \n", "dtypes: int64(1), object(1)\n", "memory usage: 176.0+ bytes\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 114, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "0 Alice\n", "1 Bob\n", "2 Carol\n", "Name: name, dtype: object" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[\"name\"] # 1" ] }, { "cell_type": "code", "execution_count": 115, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "pandas.core.series.Series" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df[\"name\"])" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# 常用的 DataFrame 属性" ] }, { "cell_type": "code", "execution_count": 116, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "RangeIndex(start=0, stop=3, step=1)" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.index" ] }, { "cell_type": "code", "execution_count": 117, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "name object\n", "age int64\n", "dtype: object" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "(3, 2)" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 119, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.size" ] }, { "cell_type": "code", "execution_count": 120, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "array([['Alice', 10],\n", " ['Bob', 20],\n", " ['Carol', 30]], dtype=object)" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.values" ] }, { "cell_type": "code", "execution_count": 121, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Index(['name', 'age'], dtype='object')" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 122, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "[RangeIndex(start=0, stop=3, step=1), Index(['name', 'age'], dtype='object')]" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.axes" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.ndim" ] }, { "cell_type": "code", "execution_count": 124, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df is not empty\n" ] } ], "source": [ "if df.empty:\n", " print(\"df is empty\")\n", "else:\n", " print(\"df is not empty\")" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# 常用的 DataFrame 方法" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "## 查看数据信息" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3 entries, 0 to 2\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 name 3 non-null object\n", " 1 age 3 non-null int64 \n", "dtypes: int64(1), object(1)\n", "memory usage: 176.0+ bytes\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "example:" ] }, { "cell_type": "code", "execution_count": 126, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0886931
1818740
2231815
3685289
4309073
............
959548
9693141
97189163
98273162
99637385
\n", "

100 rows × 3 columns

\n", "
" ], "text/plain": [ " a b c\n", "0 88 69 31\n", "1 81 87 40\n", "2 23 18 15\n", "3 68 52 89\n", "4 30 90 73\n", ".. .. .. ..\n", "95 95 4 8\n", "96 93 14 1\n", "97 18 91 63\n", "98 27 31 62\n", "99 63 73 85\n", "\n", "[100 rows x 3 columns]" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import random\n", "\n", "import pandas as pd\n", "\n", "random.seed(233)\n", "\n", "data = pd.DataFrame(\n", " {\n", " \"a\": [random.randrange(1, 100) for _ in range(100)],\n", " \"b\": [random.randrange(1, 100) for _ in range(100)],\n", " \"c\": [random.randrange(1, 100) for _ in range(100)],\n", " }\n", ")\n", "data" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0886931
1818740
2231815
3685289
4309073
\n", "
" ], "text/plain": [ " a b c\n", "0 88 69 31\n", "1 81 87 40\n", "2 23 18 15\n", "3 68 52 89\n", "4 30 90 73" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
959548
9693141
97189163
98273162
99637385
\n", "
" ], "text/plain": [ " a b c\n", "95 95 4 8\n", "96 93 14 1\n", "97 18 91 63\n", "98 27 31 62\n", "99 63 73 85" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.tail()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "## 索引数据" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "### 列表式" ] }, { "cell_type": "code", "execution_count": 129, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
08869
18187
22318
36852
43090
\n", "
" ], "text/plain": [ " a b\n", "0 88 69\n", "1 81 87\n", "2 23 18\n", "3 68 52\n", "4 30 90" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols = [\"a\", \"b\"]\n", "\n", "filtered = data[cols]\n", "filtered.head()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "### 坐标式" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
RZOlH397677
oZ2EJ95415
nf1gr127586
oSTae46961
7RpMT517579
\n", "
" ], "text/plain": [ " a b c\n", "RZOlH 39 76 77\n", "oZ2EJ 95 41 5\n", "nf1gr 12 75 86\n", "oSTae 4 69 61\n", "7RpMT 51 75 79" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import random\n", "import string\n", "\n", "random.seed(233)\n", "\n", "\n", "def make_labels(n):\n", " alphabet = string.ascii_letters\n", " number = string.digits\n", "\n", " return [\"\".join(random.sample(alphabet + number, 5)) for _ in range(n)]\n", "\n", "\n", "labels = make_labels(100)\n", "\n", "data = pd.DataFrame(\n", " {\n", " \"a\": [random.randrange(1, 100) for _ in range(100)],\n", " \"b\": [random.randrange(1, 100) for _ in range(100)],\n", " \"c\": [random.randrange(1, 100) for _ in range(100)],\n", " },\n", " index=labels,\n", ")\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 131, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "41" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.at[\"oZ2EJ\", \"b\"]" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "75" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.iat[2, 1]" ] }, { "cell_type": "code", "execution_count": 133, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# index error\n", "# data.iat[\"oZ2EJ\", 1]" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "### 列表坐标式" ] }, { "cell_type": "code", "execution_count": 134, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
nf1gr1275
oSTae469
\n", "
" ], "text/plain": [ " a b\n", "nf1gr 12 75\n", "oSTae 4 69" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.loc[\"nf1gr\":\"oSTae\", \"a\":\"b\"]" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bc
nf1gr7586
\n", "
" ], "text/plain": [ " b c\n", "nf1gr 75 86" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.iloc[2:3, [1, 2]]" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
RZOlH3976
oZ2EJ9541
nf1gr1275
oSTae469
7RpMT5175
\n", "
" ], "text/plain": [ " a b\n", "RZOlH 39 76\n", "oZ2EJ 95 41\n", "nf1gr 12 75\n", "oSTae 4 69\n", "7RpMT 51 75" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.loc[:\"7RpMT\", :\"b\"]" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a
RZOlH39
oZ2EJ95
nf1gr12
oSTae4
7RpMT51
......
E14KY77
KDjSI79
tn75V85
4ITDH55
gxQwc30
\n", "

99 rows × 1 columns

\n", "
" ], "text/plain": [ " a\n", "RZOlH 39\n", "oZ2EJ 95\n", "nf1gr 12\n", "oSTae 4\n", "7RpMT 51\n", "... ..\n", "E14KY 77\n", "KDjSI 79\n", "tn75V 85\n", "4ITDH 55\n", "gxQwc 30\n", "\n", "[99 rows x 1 columns]" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.iloc[:-1, 0:1]" ] }, { "cell_type": "code", "execution_count": 138, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ac
nf1gr1286
oSTae461
7RpMT5179
\n", "
" ], "text/plain": [ " a c\n", "nf1gr 12 86\n", "oSTae 4 61\n", "7RpMT 51 79" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.loc[\"nf1gr\":\"7RpMT\", [\"a\", \"c\"]]" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ac
RZOlH3977
oZ2EJ955
\n", "
" ], "text/plain": [ " a c\n", "RZOlH 39 77\n", "oZ2EJ 95 5" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.iloc[[0, 1], [0, 2]]" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "\n", "## 拼接数据" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "### append" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
0A1
1B2
\n", "
" ], "text/plain": [ " a b\n", "0 A 1\n", "1 B 2" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1 = pd.DataFrame(\n", " {\n", " \"a\": [\"A\", \"B\"],\n", " \"b\": [1, 2],\n", " }\n", ")\n", "data1.head()" ] }, { "cell_type": "code", "execution_count": 141, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
0C3
1D4
\n", "
" ], "text/plain": [ " a b\n", "0 C 3\n", "1 D 4" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2 = pd.DataFrame(\n", " {\n", " \"a\": [\"C\", \"D\"],\n", " \"b\": [3, 4],\n", " }\n", ")\n", "data2.head()" ] }, { "cell_type": "code", "execution_count": 142, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
0A1
1B2
0C3
1D4
\n", "
" ], "text/plain": [ " a b\n", "0 A 1\n", "1 B 2\n", "0 C 3\n", "1 D 4" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1.append(data2)" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0A1.0NaN
1B2.0NaN
0C3.0NaN
1D4.0NaN
0ENaN5.0
1FNaN6.0
\n", "
" ], "text/plain": [ " a b c\n", "0 A 1.0 NaN\n", "1 B 2.0 NaN\n", "0 C 3.0 NaN\n", "1 D 4.0 NaN\n", "0 E NaN 5.0\n", "1 F NaN 6.0" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data3 = pd.DataFrame(\n", " {\n", " \"a\": [\"E\", \"F\"],\n", " \"c\": [5, 6],\n", " }\n", ")\n", "\n", "data1.append(data2).append(data3)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "### merge" ] }, { "cell_type": "code", "execution_count": 144, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab_xb_y
01ac
12bd
\n", "
" ], "text/plain": [ " a b_x b_y\n", "0 1 a c\n", "1 2 b d" ] }, "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "data1 = pd.DataFrame(\n", " {\n", " \"a\": [1, 2],\n", " \"b\": [\"a\", \"b\"],\n", " }\n", ")\n", "\n", "data2 = pd.DataFrame(\n", " {\n", " \"a\": [1, 2],\n", " \"b\": [\"c\", \"d\"],\n", " }\n", ")\n", "\n", "data1.merge(data2, on=\"a\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }