{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# 什么是 DataFrame?"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" age | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alice | \n",
" 10 | \n",
"
\n",
" \n",
" | 1 | \n",
" Bob | \n",
" 20 | \n",
"
\n",
" \n",
" | 2 | \n",
" Carol | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name age\n",
"0 Alice 10\n",
"1 Bob 20\n",
"2 Carol 30"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = dict(\n",
" name=[\"Alice\", \"Bob\", \"Carol\"],\n",
" age=[10, 20, 30],\n",
")\n",
"df = pd.DataFrame(data)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 3 entries, 0 to 2\n",
"Data columns (total 2 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 name 3 non-null object\n",
" 1 age 3 non-null int64 \n",
"dtypes: int64(1), object(1)\n",
"memory usage: 176.0+ bytes\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 Alice\n",
"1 Bob\n",
"2 Carol\n",
"Name: name, dtype: object"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"name\"] # 1"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df[\"name\"])"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# 常用的 DataFrame 属性"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=3, step=1)"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.index"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"name object\n",
"age int64\n",
"dtype: object"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(3, 2)"
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.size"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array([['Alice', 10],\n",
" ['Bob', 20],\n",
" ['Carol', 30]], dtype=object)"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.values"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['name', 'age'], dtype='object')"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[RangeIndex(start=0, stop=3, step=1), Index(['name', 'age'], dtype='object')]"
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.axes"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.ndim"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df is not empty\n"
]
}
],
"source": [
"if df.empty:\n",
" print(\"df is empty\")\n",
"else:\n",
" print(\"df is not empty\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# 常用的 DataFrame 方法"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## 查看数据信息"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 3 entries, 0 to 2\n",
"Data columns (total 2 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 name 3 non-null object\n",
" 1 age 3 non-null int64 \n",
"dtypes: int64(1), object(1)\n",
"memory usage: 176.0+ bytes\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"example:"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 88 | \n",
" 69 | \n",
" 31 | \n",
"
\n",
" \n",
" | 1 | \n",
" 81 | \n",
" 87 | \n",
" 40 | \n",
"
\n",
" \n",
" | 2 | \n",
" 23 | \n",
" 18 | \n",
" 15 | \n",
"
\n",
" \n",
" | 3 | \n",
" 68 | \n",
" 52 | \n",
" 89 | \n",
"
\n",
" \n",
" | 4 | \n",
" 30 | \n",
" 90 | \n",
" 73 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 95 | \n",
" 95 | \n",
" 4 | \n",
" 8 | \n",
"
\n",
" \n",
" | 96 | \n",
" 93 | \n",
" 14 | \n",
" 1 | \n",
"
\n",
" \n",
" | 97 | \n",
" 18 | \n",
" 91 | \n",
" 63 | \n",
"
\n",
" \n",
" | 98 | \n",
" 27 | \n",
" 31 | \n",
" 62 | \n",
"
\n",
" \n",
" | 99 | \n",
" 63 | \n",
" 73 | \n",
" 85 | \n",
"
\n",
" \n",
"
\n",
"
100 rows × 3 columns
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 88 69 31\n",
"1 81 87 40\n",
"2 23 18 15\n",
"3 68 52 89\n",
"4 30 90 73\n",
".. .. .. ..\n",
"95 95 4 8\n",
"96 93 14 1\n",
"97 18 91 63\n",
"98 27 31 62\n",
"99 63 73 85\n",
"\n",
"[100 rows x 3 columns]"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"\n",
"import pandas as pd\n",
"\n",
"random.seed(233)\n",
"\n",
"data = pd.DataFrame(\n",
" {\n",
" \"a\": [random.randrange(1, 100) for _ in range(100)],\n",
" \"b\": [random.randrange(1, 100) for _ in range(100)],\n",
" \"c\": [random.randrange(1, 100) for _ in range(100)],\n",
" }\n",
")\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 88 | \n",
" 69 | \n",
" 31 | \n",
"
\n",
" \n",
" | 1 | \n",
" 81 | \n",
" 87 | \n",
" 40 | \n",
"
\n",
" \n",
" | 2 | \n",
" 23 | \n",
" 18 | \n",
" 15 | \n",
"
\n",
" \n",
" | 3 | \n",
" 68 | \n",
" 52 | \n",
" 89 | \n",
"
\n",
" \n",
" | 4 | \n",
" 30 | \n",
" 90 | \n",
" 73 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 88 69 31\n",
"1 81 87 40\n",
"2 23 18 15\n",
"3 68 52 89\n",
"4 30 90 73"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | 95 | \n",
" 95 | \n",
" 4 | \n",
" 8 | \n",
"
\n",
" \n",
" | 96 | \n",
" 93 | \n",
" 14 | \n",
" 1 | \n",
"
\n",
" \n",
" | 97 | \n",
" 18 | \n",
" 91 | \n",
" 63 | \n",
"
\n",
" \n",
" | 98 | \n",
" 27 | \n",
" 31 | \n",
" 62 | \n",
"
\n",
" \n",
" | 99 | \n",
" 63 | \n",
" 73 | \n",
" 85 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"95 95 4 8\n",
"96 93 14 1\n",
"97 18 91 63\n",
"98 27 31 62\n",
"99 63 73 85"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## 索引数据"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### 列表式"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 88 | \n",
" 69 | \n",
"
\n",
" \n",
" | 1 | \n",
" 81 | \n",
" 87 | \n",
"
\n",
" \n",
" | 2 | \n",
" 23 | \n",
" 18 | \n",
"
\n",
" \n",
" | 3 | \n",
" 68 | \n",
" 52 | \n",
"
\n",
" \n",
" | 4 | \n",
" 30 | \n",
" 90 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 88 69\n",
"1 81 87\n",
"2 23 18\n",
"3 68 52\n",
"4 30 90"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols = [\"a\", \"b\"]\n",
"\n",
"filtered = data[cols]\n",
"filtered.head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### 坐标式"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | RZOlH | \n",
" 39 | \n",
" 76 | \n",
" 77 | \n",
"
\n",
" \n",
" | oZ2EJ | \n",
" 95 | \n",
" 41 | \n",
" 5 | \n",
"
\n",
" \n",
" | nf1gr | \n",
" 12 | \n",
" 75 | \n",
" 86 | \n",
"
\n",
" \n",
" | oSTae | \n",
" 4 | \n",
" 69 | \n",
" 61 | \n",
"
\n",
" \n",
" | 7RpMT | \n",
" 51 | \n",
" 75 | \n",
" 79 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"RZOlH 39 76 77\n",
"oZ2EJ 95 41 5\n",
"nf1gr 12 75 86\n",
"oSTae 4 69 61\n",
"7RpMT 51 75 79"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"import string\n",
"\n",
"random.seed(233)\n",
"\n",
"\n",
"def make_labels(n):\n",
" alphabet = string.ascii_letters\n",
" number = string.digits\n",
"\n",
" return [\"\".join(random.sample(alphabet + number, 5)) for _ in range(n)]\n",
"\n",
"\n",
"labels = make_labels(100)\n",
"\n",
"data = pd.DataFrame(\n",
" {\n",
" \"a\": [random.randrange(1, 100) for _ in range(100)],\n",
" \"b\": [random.randrange(1, 100) for _ in range(100)],\n",
" \"c\": [random.randrange(1, 100) for _ in range(100)],\n",
" },\n",
" index=labels,\n",
")\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"41"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.at[\"oZ2EJ\", \"b\"]"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"75"
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.iat[2, 1]"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# index error\n",
"# data.iat[\"oZ2EJ\", 1]"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### 列表坐标式"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | nf1gr | \n",
" 12 | \n",
" 75 | \n",
"
\n",
" \n",
" | oSTae | \n",
" 4 | \n",
" 69 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"nf1gr 12 75\n",
"oSTae 4 69"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[\"nf1gr\":\"oSTae\", \"a\":\"b\"]"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | nf1gr | \n",
" 75 | \n",
" 86 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" b c\n",
"nf1gr 75 86"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.iloc[2:3, [1, 2]]"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | RZOlH | \n",
" 39 | \n",
" 76 | \n",
"
\n",
" \n",
" | oZ2EJ | \n",
" 95 | \n",
" 41 | \n",
"
\n",
" \n",
" | nf1gr | \n",
" 12 | \n",
" 75 | \n",
"
\n",
" \n",
" | oSTae | \n",
" 4 | \n",
" 69 | \n",
"
\n",
" \n",
" | 7RpMT | \n",
" 51 | \n",
" 75 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"RZOlH 39 76\n",
"oZ2EJ 95 41\n",
"nf1gr 12 75\n",
"oSTae 4 69\n",
"7RpMT 51 75"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[:\"7RpMT\", :\"b\"]"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
"
\n",
" \n",
" \n",
" \n",
" | RZOlH | \n",
" 39 | \n",
"
\n",
" \n",
" | oZ2EJ | \n",
" 95 | \n",
"
\n",
" \n",
" | nf1gr | \n",
" 12 | \n",
"
\n",
" \n",
" | oSTae | \n",
" 4 | \n",
"
\n",
" \n",
" | 7RpMT | \n",
" 51 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
"
\n",
" \n",
" | E14KY | \n",
" 77 | \n",
"
\n",
" \n",
" | KDjSI | \n",
" 79 | \n",
"
\n",
" \n",
" | tn75V | \n",
" 85 | \n",
"
\n",
" \n",
" | 4ITDH | \n",
" 55 | \n",
"
\n",
" \n",
" | gxQwc | \n",
" 30 | \n",
"
\n",
" \n",
"
\n",
"
99 rows × 1 columns
\n",
"
"
],
"text/plain": [
" a\n",
"RZOlH 39\n",
"oZ2EJ 95\n",
"nf1gr 12\n",
"oSTae 4\n",
"7RpMT 51\n",
"... ..\n",
"E14KY 77\n",
"KDjSI 79\n",
"tn75V 85\n",
"4ITDH 55\n",
"gxQwc 30\n",
"\n",
"[99 rows x 1 columns]"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.iloc[:-1, 0:1]"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | nf1gr | \n",
" 12 | \n",
" 86 | \n",
"
\n",
" \n",
" | oSTae | \n",
" 4 | \n",
" 61 | \n",
"
\n",
" \n",
" | 7RpMT | \n",
" 51 | \n",
" 79 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a c\n",
"nf1gr 12 86\n",
"oSTae 4 61\n",
"7RpMT 51 79"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[\"nf1gr\":\"7RpMT\", [\"a\", \"c\"]]"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | RZOlH | \n",
" 39 | \n",
" 77 | \n",
"
\n",
" \n",
" | oZ2EJ | \n",
" 95 | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a c\n",
"RZOlH 39 77\n",
"oZ2EJ 95 5"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.iloc[[0, 1], [0, 2]]"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"\n",
"## 拼接数据"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### append"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" B | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 A 1\n",
"1 B 2"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data1 = pd.DataFrame(\n",
" {\n",
" \"a\": [\"A\", \"B\"],\n",
" \"b\": [1, 2],\n",
" }\n",
")\n",
"data1.head()"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" C | \n",
" 3 | \n",
"
\n",
" \n",
" | 1 | \n",
" D | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 C 3\n",
"1 D 4"
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data2 = pd.DataFrame(\n",
" {\n",
" \"a\": [\"C\", \"D\"],\n",
" \"b\": [3, 4],\n",
" }\n",
")\n",
"data2.head()"
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" B | \n",
" 2 | \n",
"
\n",
" \n",
" | 0 | \n",
" C | \n",
" 3 | \n",
"
\n",
" \n",
" | 1 | \n",
" D | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 A 1\n",
"1 B 2\n",
"0 C 3\n",
"1 D 4"
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data1.append(data2)"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A | \n",
" 1.0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" B | \n",
" 2.0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 0 | \n",
" C | \n",
" 3.0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" D | \n",
" 4.0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 0 | \n",
" E | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" F | \n",
" NaN | \n",
" 6.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 A 1.0 NaN\n",
"1 B 2.0 NaN\n",
"0 C 3.0 NaN\n",
"1 D 4.0 NaN\n",
"0 E NaN 5.0\n",
"1 F NaN 6.0"
]
},
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data3 = pd.DataFrame(\n",
" {\n",
" \"a\": [\"E\", \"F\"],\n",
" \"c\": [5, 6],\n",
" }\n",
")\n",
"\n",
"data1.append(data2).append(data3)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### merge"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b_x | \n",
" b_y | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" a | \n",
" c | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" b | \n",
" d | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b_x b_y\n",
"0 1 a c\n",
"1 2 b d"
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"data1 = pd.DataFrame(\n",
" {\n",
" \"a\": [1, 2],\n",
" \"b\": [\"a\", \"b\"],\n",
" }\n",
")\n",
"\n",
"data2 = pd.DataFrame(\n",
" {\n",
" \"a\": [1, 2],\n",
" \"b\": [\"c\", \"d\"],\n",
" }\n",
")\n",
"\n",
"data1.merge(data2, on=\"a\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}