2049 lines
54 KiB
Plaintext
2049 lines
54 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"# 什么是 DataFrame?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 112,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>age</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Alice</td>\n",
|
||
" <td>10</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Bob</td>\n",
|
||
" <td>20</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Carol</td>\n",
|
||
" <td>30</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" name age\n",
|
||
"0 Alice 10\n",
|
||
"1 Bob 20\n",
|
||
"2 Carol 30"
|
||
]
|
||
},
|
||
"execution_count": 112,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"data = dict(\n",
|
||
" name=[\"Alice\", \"Bob\", \"Carol\"],\n",
|
||
" age=[10, 20, 30],\n",
|
||
")\n",
|
||
"df = pd.DataFrame(data)\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 113,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 3 entries, 0 to 2\n",
|
||
"Data columns (total 2 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 name 3 non-null object\n",
|
||
" 1 age 3 non-null int64 \n",
|
||
"dtypes: int64(1), object(1)\n",
|
||
"memory usage: 176.0+ bytes\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 114,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0 Alice\n",
|
||
"1 Bob\n",
|
||
"2 Carol\n",
|
||
"Name: name, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 114,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df[\"name\"] # 1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 115,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"pandas.core.series.Series"
|
||
]
|
||
},
|
||
"execution_count": 115,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type(df[\"name\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"# 常用的 DataFrame 属性"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 116,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"RangeIndex(start=0, stop=3, step=1)"
|
||
]
|
||
},
|
||
"execution_count": 116,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.index"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 117,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"name object\n",
|
||
"age int64\n",
|
||
"dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 117,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.dtypes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 118,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(3, 2)"
|
||
]
|
||
},
|
||
"execution_count": 118,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 119,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"6"
|
||
]
|
||
},
|
||
"execution_count": 119,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.size"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 120,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([['Alice', 10],\n",
|
||
" ['Bob', 20],\n",
|
||
" ['Carol', 30]], dtype=object)"
|
||
]
|
||
},
|
||
"execution_count": 120,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.values"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 121,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Index(['name', 'age'], dtype='object')"
|
||
]
|
||
},
|
||
"execution_count": 121,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 122,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[RangeIndex(start=0, stop=3, step=1), Index(['name', 'age'], dtype='object')]"
|
||
]
|
||
},
|
||
"execution_count": 122,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.axes"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 123,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"2"
|
||
]
|
||
},
|
||
"execution_count": 123,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.ndim"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 124,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"df is not empty\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"if df.empty:\n",
|
||
" print(\"df is empty\")\n",
|
||
"else:\n",
|
||
" print(\"df is not empty\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"# 常用的 DataFrame 方法"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"## 查看数据信息"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 125,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 3 entries, 0 to 2\n",
|
||
"Data columns (total 2 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 name 3 non-null object\n",
|
||
" 1 age 3 non-null int64 \n",
|
||
"dtypes: int64(1), object(1)\n",
|
||
"memory usage: 176.0+ bytes\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"example:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 126,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>88</td>\n",
|
||
" <td>69</td>\n",
|
||
" <td>31</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>81</td>\n",
|
||
" <td>87</td>\n",
|
||
" <td>40</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>23</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>68</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>89</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>30</td>\n",
|
||
" <td>90</td>\n",
|
||
" <td>73</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>95</th>\n",
|
||
" <td>95</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>8</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96</th>\n",
|
||
" <td>93</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>97</th>\n",
|
||
" <td>18</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>63</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98</th>\n",
|
||
" <td>27</td>\n",
|
||
" <td>31</td>\n",
|
||
" <td>62</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>99</th>\n",
|
||
" <td>63</td>\n",
|
||
" <td>73</td>\n",
|
||
" <td>85</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>100 rows × 3 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b c\n",
|
||
"0 88 69 31\n",
|
||
"1 81 87 40\n",
|
||
"2 23 18 15\n",
|
||
"3 68 52 89\n",
|
||
"4 30 90 73\n",
|
||
".. .. .. ..\n",
|
||
"95 95 4 8\n",
|
||
"96 93 14 1\n",
|
||
"97 18 91 63\n",
|
||
"98 27 31 62\n",
|
||
"99 63 73 85\n",
|
||
"\n",
|
||
"[100 rows x 3 columns]"
|
||
]
|
||
},
|
||
"execution_count": 126,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import random\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"random.seed(233)\n",
|
||
"\n",
|
||
"data = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"a\": [random.randrange(1, 100) for _ in range(100)],\n",
|
||
" \"b\": [random.randrange(1, 100) for _ in range(100)],\n",
|
||
" \"c\": [random.randrange(1, 100) for _ in range(100)],\n",
|
||
" }\n",
|
||
")\n",
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 127,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>88</td>\n",
|
||
" <td>69</td>\n",
|
||
" <td>31</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>81</td>\n",
|
||
" <td>87</td>\n",
|
||
" <td>40</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>23</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>68</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>89</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>30</td>\n",
|
||
" <td>90</td>\n",
|
||
" <td>73</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b c\n",
|
||
"0 88 69 31\n",
|
||
"1 81 87 40\n",
|
||
"2 23 18 15\n",
|
||
"3 68 52 89\n",
|
||
"4 30 90 73"
|
||
]
|
||
},
|
||
"execution_count": 127,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 128,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>95</th>\n",
|
||
" <td>95</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>8</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>96</th>\n",
|
||
" <td>93</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>97</th>\n",
|
||
" <td>18</td>\n",
|
||
" <td>91</td>\n",
|
||
" <td>63</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>98</th>\n",
|
||
" <td>27</td>\n",
|
||
" <td>31</td>\n",
|
||
" <td>62</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>99</th>\n",
|
||
" <td>63</td>\n",
|
||
" <td>73</td>\n",
|
||
" <td>85</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b c\n",
|
||
"95 95 4 8\n",
|
||
"96 93 14 1\n",
|
||
"97 18 91 63\n",
|
||
"98 27 31 62\n",
|
||
"99 63 73 85"
|
||
]
|
||
},
|
||
"execution_count": 128,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.tail()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"## 索引数据"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### 列表式"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 129,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>88</td>\n",
|
||
" <td>69</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>81</td>\n",
|
||
" <td>87</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>23</td>\n",
|
||
" <td>18</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>68</td>\n",
|
||
" <td>52</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>30</td>\n",
|
||
" <td>90</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b\n",
|
||
"0 88 69\n",
|
||
"1 81 87\n",
|
||
"2 23 18\n",
|
||
"3 68 52\n",
|
||
"4 30 90"
|
||
]
|
||
},
|
||
"execution_count": 129,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"cols = [\"a\", \"b\"]\n",
|
||
"\n",
|
||
"filtered = data[cols]\n",
|
||
"filtered.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### 坐标式"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 130,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>RZOlH</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>76</td>\n",
|
||
" <td>77</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oZ2EJ</th>\n",
|
||
" <td>95</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>nf1gr</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>75</td>\n",
|
||
" <td>86</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oSTae</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>69</td>\n",
|
||
" <td>61</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7RpMT</th>\n",
|
||
" <td>51</td>\n",
|
||
" <td>75</td>\n",
|
||
" <td>79</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b c\n",
|
||
"RZOlH 39 76 77\n",
|
||
"oZ2EJ 95 41 5\n",
|
||
"nf1gr 12 75 86\n",
|
||
"oSTae 4 69 61\n",
|
||
"7RpMT 51 75 79"
|
||
]
|
||
},
|
||
"execution_count": 130,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import random\n",
|
||
"import string\n",
|
||
"\n",
|
||
"random.seed(233)\n",
|
||
"\n",
|
||
"\n",
|
||
"def make_labels(n):\n",
|
||
" alphabet = string.ascii_letters\n",
|
||
" number = string.digits\n",
|
||
"\n",
|
||
" return [\"\".join(random.sample(alphabet + number, 5)) for _ in range(n)]\n",
|
||
"\n",
|
||
"\n",
|
||
"labels = make_labels(100)\n",
|
||
"\n",
|
||
"data = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"a\": [random.randrange(1, 100) for _ in range(100)],\n",
|
||
" \"b\": [random.randrange(1, 100) for _ in range(100)],\n",
|
||
" \"c\": [random.randrange(1, 100) for _ in range(100)],\n",
|
||
" },\n",
|
||
" index=labels,\n",
|
||
")\n",
|
||
"data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 131,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"41"
|
||
]
|
||
},
|
||
"execution_count": 131,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.at[\"oZ2EJ\", \"b\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 132,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"75"
|
||
]
|
||
},
|
||
"execution_count": 132,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.iat[2, 1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 133,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# index error\n",
|
||
"# data.iat[\"oZ2EJ\", 1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### 列表坐标式"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 134,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>nf1gr</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>75</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oSTae</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>69</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b\n",
|
||
"nf1gr 12 75\n",
|
||
"oSTae 4 69"
|
||
]
|
||
},
|
||
"execution_count": 134,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.loc[\"nf1gr\":\"oSTae\", \"a\":\"b\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 135,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>b</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>nf1gr</th>\n",
|
||
" <td>75</td>\n",
|
||
" <td>86</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" b c\n",
|
||
"nf1gr 75 86"
|
||
]
|
||
},
|
||
"execution_count": 135,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.iloc[2:3, [1, 2]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 136,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>RZOlH</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>76</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oZ2EJ</th>\n",
|
||
" <td>95</td>\n",
|
||
" <td>41</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>nf1gr</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>75</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oSTae</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>69</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7RpMT</th>\n",
|
||
" <td>51</td>\n",
|
||
" <td>75</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b\n",
|
||
"RZOlH 39 76\n",
|
||
"oZ2EJ 95 41\n",
|
||
"nf1gr 12 75\n",
|
||
"oSTae 4 69\n",
|
||
"7RpMT 51 75"
|
||
]
|
||
},
|
||
"execution_count": 136,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.loc[:\"7RpMT\", :\"b\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 137,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>RZOlH</th>\n",
|
||
" <td>39</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oZ2EJ</th>\n",
|
||
" <td>95</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>nf1gr</th>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oSTae</th>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7RpMT</th>\n",
|
||
" <td>51</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>E14KY</th>\n",
|
||
" <td>77</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>KDjSI</th>\n",
|
||
" <td>79</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>tn75V</th>\n",
|
||
" <td>85</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4ITDH</th>\n",
|
||
" <td>55</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>gxQwc</th>\n",
|
||
" <td>30</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>99 rows × 1 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a\n",
|
||
"RZOlH 39\n",
|
||
"oZ2EJ 95\n",
|
||
"nf1gr 12\n",
|
||
"oSTae 4\n",
|
||
"7RpMT 51\n",
|
||
"... ..\n",
|
||
"E14KY 77\n",
|
||
"KDjSI 79\n",
|
||
"tn75V 85\n",
|
||
"4ITDH 55\n",
|
||
"gxQwc 30\n",
|
||
"\n",
|
||
"[99 rows x 1 columns]"
|
||
]
|
||
},
|
||
"execution_count": 137,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.iloc[:-1, 0:1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 138,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>nf1gr</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>86</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oSTae</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>61</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7RpMT</th>\n",
|
||
" <td>51</td>\n",
|
||
" <td>79</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a c\n",
|
||
"nf1gr 12 86\n",
|
||
"oSTae 4 61\n",
|
||
"7RpMT 51 79"
|
||
]
|
||
},
|
||
"execution_count": 138,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.loc[\"nf1gr\":\"7RpMT\", [\"a\", \"c\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 139,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>RZOlH</th>\n",
|
||
" <td>39</td>\n",
|
||
" <td>77</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>oZ2EJ</th>\n",
|
||
" <td>95</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a c\n",
|
||
"RZOlH 39 77\n",
|
||
"oZ2EJ 95 5"
|
||
]
|
||
},
|
||
"execution_count": 139,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.iloc[[0, 1], [0, 2]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"\n",
|
||
"## 拼接数据"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### append"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 140,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>A</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>B</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b\n",
|
||
"0 A 1\n",
|
||
"1 B 2"
|
||
]
|
||
},
|
||
"execution_count": 140,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data1 = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"a\": [\"A\", \"B\"],\n",
|
||
" \"b\": [1, 2],\n",
|
||
" }\n",
|
||
")\n",
|
||
"data1.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 141,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b\n",
|
||
"0 C 3\n",
|
||
"1 D 4"
|
||
]
|
||
},
|
||
"execution_count": 141,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data2 = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"a\": [\"C\", \"D\"],\n",
|
||
" \"b\": [3, 4],\n",
|
||
" }\n",
|
||
")\n",
|
||
"data2.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 142,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>A</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>B</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b\n",
|
||
"0 A 1\n",
|
||
"1 B 2\n",
|
||
"0 C 3\n",
|
||
"1 D 4"
|
||
]
|
||
},
|
||
"execution_count": 142,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data1.append(data2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 143,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b</th>\n",
|
||
" <th>c</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>A</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>B</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>C</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>E</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>F</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b c\n",
|
||
"0 A 1.0 NaN\n",
|
||
"1 B 2.0 NaN\n",
|
||
"0 C 3.0 NaN\n",
|
||
"1 D 4.0 NaN\n",
|
||
"0 E NaN 5.0\n",
|
||
"1 F NaN 6.0"
|
||
]
|
||
},
|
||
"execution_count": 143,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data3 = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"a\": [\"E\", \"F\"],\n",
|
||
" \"c\": [5, 6],\n",
|
||
" }\n",
|
||
")\n",
|
||
"\n",
|
||
"data1.append(data2).append(data3)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### merge"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 144,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>a</th>\n",
|
||
" <th>b_x</th>\n",
|
||
" <th>b_y</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>a</td>\n",
|
||
" <td>c</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>b</td>\n",
|
||
" <td>d</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" a b_x b_y\n",
|
||
"0 1 a c\n",
|
||
"1 2 b d"
|
||
]
|
||
},
|
||
"execution_count": 144,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"\n",
|
||
"data1 = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"a\": [1, 2],\n",
|
||
" \"b\": [\"a\", \"b\"],\n",
|
||
" }\n",
|
||
")\n",
|
||
"\n",
|
||
"data2 = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"a\": [1, 2],\n",
|
||
" \"b\": [\"c\", \"d\"],\n",
|
||
" }\n",
|
||
")\n",
|
||
"\n",
|
||
"data1.merge(data2, on=\"a\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
}
|