From 66d7b75d83b5f127326b30b1e99f83514f3a7014 Mon Sep 17 00:00:00 2001
From: 100gle <569590461@qq.com>
Date: Wed, 15 Jun 2022 15:03:01 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9EVol14=E7=AB=A0?=
=?UTF-8?q?=E8=8A=82=E7=A4=BA=E4=BE=8B=E6=BA=90=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
code/14/concat_style_combination.ipynb | 911 +++++++
code/14/database_style_combination.ipynb | 2997 ++++++++++++++++++++++
2 files changed, 3908 insertions(+)
create mode 100644 code/14/concat_style_combination.ipynb
create mode 100644 code/14/database_style_combination.ipynb
diff --git a/code/14/concat_style_combination.ipynb b/code/14/concat_style_combination.ipynb
new file mode 100644
index 0000000..ad467c8
--- /dev/null
+++ b/code/14/concat_style_combination.ipynb
@@ -0,0 +1,911 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# concat"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "s1 = pd.Series([\"red\", \"black\"], name=\"color\")\n",
+ "s2 = pd.Series([\"white\", \"gray\"], name=\"colorname\")\n",
+ "df1 = pd.DataFrame(\n",
+ " {\n",
+ " \"color\": [\"red\", \"orange\", \"blue\", \"green\"],\n",
+ " }\n",
+ ")\n",
+ "df2 = pd.DataFrame(\n",
+ " {\n",
+ " \"color\": [\"black\", \"white\", \"yellow\", \"purple\"],\n",
+ " \"shape\": [\"circle\", \"square\", \"triangle\", \"star\"],\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 red\n",
+ "1 black\n",
+ "0 white\n",
+ "1 gray\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "pd.concat([s1, s2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " red | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " blue | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " black | \n",
+ " circle | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " white | \n",
+ " square | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " yellow | \n",
+ " triangle | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " purple | \n",
+ " star | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color shape\n",
+ "0 red NaN\n",
+ "1 orange NaN\n",
+ "2 blue NaN\n",
+ "3 green NaN\n",
+ "0 black circle\n",
+ "1 white square\n",
+ "2 yellow triangle\n",
+ "3 purple star"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " white | \n",
+ " square | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " purple | \n",
+ " star | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color shape\n",
+ "1 orange NaN\n",
+ "1 white square\n",
+ "3 green NaN\n",
+ "3 purple star"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2]).loc[[1,3], :]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color shape\n",
+ "1 orange NaN\n",
+ "3 green NaN"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2]).iloc[[1,3], :]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color shape\n",
+ "1 orange NaN\n",
+ "3 green NaN"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2], ignore_index=True).loc[[1,3], :]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color shape\n",
+ "1 orange NaN\n",
+ "3 green NaN"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2], ignore_index=True).iloc[[1,3], :]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " color | \n",
+ " shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | df1 | \n",
+ " 0 | \n",
+ " red | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " blue | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | df2 | \n",
+ " 0 | \n",
+ " black | \n",
+ " circle | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " white | \n",
+ " square | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " yellow | \n",
+ " triangle | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " purple | \n",
+ " star | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color shape\n",
+ "df1 0 red NaN\n",
+ " 1 orange NaN\n",
+ " 2 blue NaN\n",
+ " 3 green NaN\n",
+ "df2 0 black circle\n",
+ " 1 white square\n",
+ " 2 yellow triangle\n",
+ " 3 purple star"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2], keys=[\"df1\", \"df2\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MultiIndex([('df1', 0),\n",
+ " ('df1', 1),\n",
+ " ('df1', 2),\n",
+ " ('df1', 3),\n",
+ " ('df2', 0),\n",
+ " ('df2', 1),\n",
+ " ('df2', 2),\n",
+ " ('df2', 3)],\n",
+ " )"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2], keys=[\"df1\", \"df2\"]).index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " color | \n",
+ " shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | df1 | \n",
+ " 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color shape\n",
+ "df1 1 orange NaN\n",
+ " 3 green NaN"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, df2], keys=[\"df1\", \"df2\"]).loc[('df1', [1,3]), :]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " red | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " blue | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " NaN | \n",
+ " white | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " NaN | \n",
+ " gray | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color 0\n",
+ "0 red NaN\n",
+ "1 orange NaN\n",
+ "2 blue NaN\n",
+ "3 green NaN\n",
+ "0 NaN white\n",
+ "1 NaN gray"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, s2]) # default axis=0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " colorname | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " red | \n",
+ " white | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " gray | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " blue | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color colorname\n",
+ "0 red white\n",
+ "1 orange gray\n",
+ "2 blue NaN\n",
+ "3 green NaN"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, s2], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " color | \n",
+ " colorname | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " red | \n",
+ " white | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " orange | \n",
+ " gray | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " blue | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " green | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " color colorname\n",
+ "0 red white\n",
+ "1 orange gray\n",
+ "2 blue NaN\n",
+ "3 green NaN"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1, s2], axis=\"columns\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/code/14/database_style_combination.ipynb b/code/14/database_style_combination.ipynb
new file mode 100644
index 0000000..469bf77
--- /dev/null
+++ b/code/14/database_style_combination.ipynb
@@ -0,0 +1,2997 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import random\n",
+ "from string import ascii_letters\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "random.seed(3.14)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# 数据库风格的聚合方式"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x\n",
+ "0 1 x1\n",
+ "1 2 x2\n",
+ "2 3 x3"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x = pd.DataFrame(\n",
+ " {\n",
+ " \"id\": [1, 2, 3],\n",
+ " \"x\": [\"x1\", \"x2\", \"x3\"],\n",
+ " }\n",
+ ")\n",
+ "x.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " y1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " y3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " y4 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " y5 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " y6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id y\n",
+ "0 1 y1\n",
+ "1 2 y3\n",
+ "2 3 y4\n",
+ "3 3 y5\n",
+ "4 4 y6"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "y = pd.DataFrame(\n",
+ " {\n",
+ " \"id\": [1, 2, 3, 3, 4],\n",
+ " \"y\": [\"y1\", \"y3\", \"y4\", \"y5\", \"y6\"],\n",
+ " }\n",
+ ")\n",
+ "y.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " y1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " y3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y4 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x y\n",
+ "0 1 x1 y1\n",
+ "1 2 x2 y3\n",
+ "2 3 x3 y4\n",
+ "3 3 x3 y5"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, on=\"id\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " y1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " y3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y4 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x y\n",
+ "0 1 x1 y1\n",
+ "1 2 x2 y3\n",
+ "2 3 x3 y4\n",
+ "3 3 x3 y5"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# default join way\n",
+ "x.merge(y, on=\"id\", how=\"inner\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " y1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " y3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y4 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y5 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " y6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x y\n",
+ "0 1 x1 y1\n",
+ "1 2 x2 y3\n",
+ "2 3 x3 y4\n",
+ "3 3 x3 y5\n",
+ "4 4 NaN y6"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, on=\"id\", how=\"outer\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " y1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " y3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y4 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x y\n",
+ "0 1 x1 y1\n",
+ "1 2 x2 y3\n",
+ "2 3 x3 y4\n",
+ "3 3 x3 y5"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, on=\"id\", how=\"left\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " y1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " y3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y4 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " y5 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " NaN | \n",
+ " y6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x y\n",
+ "0 1 x1 y1\n",
+ "1 2 x2 y3\n",
+ "2 3 x3 y4\n",
+ "3 3 x3 y5\n",
+ "4 4 NaN y6"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, on=\"id\", how=\"right\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### 关联条件"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "x[\"uid\"] = [10, 23, 40]\n",
+ "x[\"gender\"] = [\"male\", \"female\", \"male\"]\n",
+ "\n",
+ "y[\"uid\"] = [10, 10, 41, 43, 23]\n",
+ "y[\"sex\"] = [\"male\", None, None, \"female\", \"female\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " uid | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " 10 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " 23 | \n",
+ " female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " 40 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x uid gender\n",
+ "0 1 x1 10 male\n",
+ "1 2 x2 23 female\n",
+ "2 3 x3 40 male"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " y | \n",
+ " uid | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " y1 | \n",
+ " 10 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " y3 | \n",
+ " 10 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " y4 | \n",
+ " 41 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " y5 | \n",
+ " 43 | \n",
+ " female | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " y6 | \n",
+ " 23 | \n",
+ " female | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id y uid sex\n",
+ "0 1 y1 10 male\n",
+ "1 2 y3 10 None\n",
+ "2 3 y4 41 None\n",
+ "3 3 y5 43 female\n",
+ "4 4 y6 23 female"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " uid_x | \n",
+ " gender | \n",
+ " y | \n",
+ " uid_y | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " 10 | \n",
+ " male | \n",
+ " y1 | \n",
+ " 10 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " 23 | \n",
+ " female | \n",
+ " y3 | \n",
+ " 10 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " 40 | \n",
+ " male | \n",
+ " y4 | \n",
+ " 41 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " 40 | \n",
+ " male | \n",
+ " y5 | \n",
+ " 43 | \n",
+ " female | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x uid_x gender y uid_y sex\n",
+ "0 1 x1 10 male y1 10 male\n",
+ "1 2 x2 23 female y3 10 None\n",
+ "2 3 x3 40 male y4 41 None\n",
+ "3 3 x3 40 male y5 43 female"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, on=\"id\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " x | \n",
+ " uid | \n",
+ " gender | \n",
+ " y | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " 10 | \n",
+ " male | \n",
+ " y1 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id x uid gender y sex\n",
+ "0 1 x1 10 male y1 male"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, on=[\"id\", \"uid\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_x | \n",
+ " x | \n",
+ " uid | \n",
+ " gender | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " 10 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " 23 | \n",
+ " female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " 40 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id_x x uid gender\n",
+ "0 1 x1 10 male\n",
+ "1 2 x2 23 female\n",
+ "2 3 x3 40 male"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x = x.rename(columns={\"id\": \"id_x\"})\n",
+ "x.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_x | \n",
+ " x | \n",
+ " uid_x | \n",
+ " gender | \n",
+ " id | \n",
+ " y | \n",
+ " uid_y | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " 10 | \n",
+ " male | \n",
+ " 1 | \n",
+ " y1 | \n",
+ " 10 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " 23 | \n",
+ " female | \n",
+ " 2 | \n",
+ " y3 | \n",
+ " 10 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " 40 | \n",
+ " male | \n",
+ " 3 | \n",
+ " y4 | \n",
+ " 41 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " 40 | \n",
+ " male | \n",
+ " 3 | \n",
+ " y5 | \n",
+ " 43 | \n",
+ " female | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id_x x uid_x gender id y uid_y sex\n",
+ "0 1 x1 10 male 1 y1 10 male\n",
+ "1 2 x2 23 female 2 y3 10 None\n",
+ "2 3 x3 40 male 3 y4 41 None\n",
+ "3 3 x3 40 male 3 y5 43 female"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, left_on=\"id_x\", right_on=\"id\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_x | \n",
+ " x | \n",
+ " uid_x | \n",
+ " gender | \n",
+ " id | \n",
+ " y | \n",
+ " uid_y | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " x1 | \n",
+ " 10 | \n",
+ " male | \n",
+ " 1.0 | \n",
+ " y1 | \n",
+ " 10.0 | \n",
+ " male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " x2 | \n",
+ " 23 | \n",
+ " female | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " x3 | \n",
+ " 40 | \n",
+ " male | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id_x x uid_x gender id y uid_y sex\n",
+ "0 1 x1 10 male 1.0 y1 10.0 male\n",
+ "1 2 x2 23 female NaN NaN NaN NaN\n",
+ "2 3 x3 40 male NaN NaN NaN NaN"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x.merge(y, left_on=[\"id_x\", \"gender\"], right_on=[\"id\", \"sex\"], how=\"left\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### 类型不一致问题"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " datetime | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " O | \n",
+ " 2021-01-01 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " L | \n",
+ " 2021-01-02 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 6 | \n",
+ " F | \n",
+ " 2021-01-04 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 7 | \n",
+ " Z | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b datetime\n",
+ "0 1 O 2021-01-01\n",
+ "1 3 L 2021-01-02\n",
+ "2 5 k 2021-01-03\n",
+ "3 6 F 2021-01-04\n",
+ "4 7 Z 2021-01-05"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [1, 3, 5, 6, 7],\n",
+ " \"b\": random.choices(ascii_letters, k=5),\n",
+ " \"datetime\": pd.date_range(\"20210101\", periods=5, freq=\"D\"),\n",
+ " }\n",
+ ")\n",
+ "left.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " c | \n",
+ " datetime | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " u | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " t | \n",
+ " 2021-01-04 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5 | \n",
+ " V | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7 | \n",
+ " x | \n",
+ " 2021-01-06 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 9 | \n",
+ " m | \n",
+ " 2021-01-07 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a c datetime\n",
+ "0 1 u 2021-01-03\n",
+ "1 3 t 2021-01-04\n",
+ "2 5 V 2021-01-05\n",
+ "3 7 x 2021-01-06\n",
+ "4 9 m 2021-01-07"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "right = pd.DataFrame(\n",
+ " {\n",
+ " \"a\": [\"1\", \"3\", \"5\", \"7\", \"9\"],\n",
+ " \"c\": random.choices(ascii_letters, k=5),\n",
+ " \"datetime\": pd.date_range(\"20210103\", periods=5, freq=\"D\").map(lambda v: str(v.date())),\n",
+ " }\n",
+ ")\n",
+ "right.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# error\n",
+ "# `a` in left is integer type, but in right is string.\n",
+ "\n",
+ "# left.merge(right, on=[\"a\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# error\n",
+ "# `a` in left is datetime type, but in right is string.\n",
+ "\n",
+ "# left.merge(right, on=[\"datetime\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# error\n",
+ "# the type of keys in left isn't same as the right keys.\n",
+ "\n",
+ "# left.merge(right, on=[\"a\", \"datetime\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " datetime_x | \n",
+ " c | \n",
+ " datetime_y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " O | \n",
+ " 2021-01-01 | \n",
+ " u | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " L | \n",
+ " 2021-01-02 | \n",
+ " t | \n",
+ " 2021-01-04 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ " V | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7 | \n",
+ " Z | \n",
+ " 2021-01-05 | \n",
+ " x | \n",
+ " 2021-01-06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b datetime_x c datetime_y\n",
+ "0 1 O 2021-01-01 u 2021-01-03\n",
+ "1 3 L 2021-01-02 t 2021-01-04\n",
+ "2 5 k 2021-01-03 V 2021-01-05\n",
+ "3 7 Z 2021-01-05 x 2021-01-06"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "right[\"a\"] = right[\"a\"].astype(int)\n",
+ "left.merge(right, on=[\"a\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a_x | \n",
+ " b | \n",
+ " datetime | \n",
+ " a_y | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ " 1 | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 6 | \n",
+ " F | \n",
+ " 2021-01-04 | \n",
+ " 3 | \n",
+ " t | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 7 | \n",
+ " Z | \n",
+ " 2021-01-05 | \n",
+ " 5 | \n",
+ " V | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a_x b datetime a_y c\n",
+ "0 5 k 2021-01-03 1 u\n",
+ "1 6 F 2021-01-04 3 t\n",
+ "2 7 Z 2021-01-05 5 V"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "right[\"datetime\"] = pd.to_datetime(right[\"datetime\"])\n",
+ "left.merge(right, on=[\"datetime\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "整型与浮点型关联时的情况:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " c | \n",
+ " datetime | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.0 | \n",
+ " u | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3.0 | \n",
+ " t | \n",
+ " 2021-01-04 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5.0 | \n",
+ " V | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7.0 | \n",
+ " x | \n",
+ " 2021-01-06 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 9.0 | \n",
+ " m | \n",
+ " 2021-01-07 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a c datetime\n",
+ "0 1.0 u 2021-01-03\n",
+ "1 3.0 t 2021-01-04\n",
+ "2 5.0 V 2021-01-05\n",
+ "3 7.0 x 2021-01-06\n",
+ "4 9.0 m 2021-01-07"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "right[\"a\"] = right[\"a\"].astype(float)\n",
+ "right.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " datetime_x | \n",
+ " c | \n",
+ " datetime_y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " O | \n",
+ " 2021-01-01 | \n",
+ " u | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " L | \n",
+ " 2021-01-02 | \n",
+ " t | \n",
+ " 2021-01-04 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ " V | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7 | \n",
+ " Z | \n",
+ " 2021-01-05 | \n",
+ " x | \n",
+ " 2021-01-06 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b datetime_x c datetime_y\n",
+ "0 1 O 2021-01-01 u 2021-01-03\n",
+ "1 3 L 2021-01-02 t 2021-01-04\n",
+ "2 5 k 2021-01-03 V 2021-01-05\n",
+ "3 7 Z 2021-01-05 x 2021-01-06"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.merge(right, on=[\"a\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "a int64\n",
+ "b object\n",
+ "datetime_x datetime64[ns]\n",
+ "c object\n",
+ "datetime_y datetime64[ns]\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.merge(right, on=[\"a\"]).dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " c | \n",
+ " datetime | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.0 | \n",
+ " u | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3.1 | \n",
+ " t | \n",
+ " 2021-01-04 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5.0 | \n",
+ " V | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7.4 | \n",
+ " x | \n",
+ " 2021-01-06 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 9.5 | \n",
+ " m | \n",
+ " 2021-01-07 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a c datetime\n",
+ "0 1.0 u 2021-01-03\n",
+ "1 3.1 t 2021-01-04\n",
+ "2 5.0 V 2021-01-05\n",
+ "3 7.4 x 2021-01-06\n",
+ "4 9.5 m 2021-01-07"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "right[\"a\"] = [1.0, 3.1, 5.0, 7.4, 9.5]\n",
+ "right.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/Bobot/.virtualenvs/sspai-100-hours-series-python/lib/python3.10/site-packages/pandas/core/reshape/merge.py:1215: UserWarning: You are merging on int and float columns where the float values are not equal to their int representation.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " datetime_x | \n",
+ " c | \n",
+ " datetime_y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " O | \n",
+ " 2021-01-01 | \n",
+ " u | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ " V | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b datetime_x c datetime_y\n",
+ "0 1 O 2021-01-01 u 2021-01-03\n",
+ "1 5 k 2021-01-03 V 2021-01-05"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.merge(right, on=[\"a\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a_x | \n",
+ " b | \n",
+ " datetime | \n",
+ " a_y | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ " 1.0 | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 6 | \n",
+ " F | \n",
+ " 2021-01-04 | \n",
+ " 3.1 | \n",
+ " t | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 7 | \n",
+ " Z | \n",
+ " 2021-01-05 | \n",
+ " 5.0 | \n",
+ " V | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a_x b datetime a_y c\n",
+ "0 5 k 2021-01-03 1.0 u\n",
+ "1 6 F 2021-01-04 3.1 t\n",
+ "2 7 Z 2021-01-05 5.0 V"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.merge(right, on=[\"datetime\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/Bobot/.virtualenvs/sspai-100-hours-series-python/lib/python3.10/site-packages/pandas/core/reshape/merge.py:1215: UserWarning: You are merging on int and float columns where the float values are not equal to their int representation.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " datetime | \n",
+ " c | \n",
+ " datetime_right | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " O | \n",
+ " 2021-01-01 | \n",
+ " u | \n",
+ " 2021-01-03 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ " V | \n",
+ " 2021-01-05 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b datetime c datetime_right\n",
+ "0 1 O 2021-01-01 u 2021-01-03\n",
+ "1 5 k 2021-01-03 V 2021-01-05"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.merge(right, on=[\"a\"], suffixes=(\"\", \"_right\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " datetime | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 5 | \n",
+ " k | \n",
+ " 2021-01-03 | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 6 | \n",
+ " F | \n",
+ " 2021-01-04 | \n",
+ " t | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 7 | \n",
+ " Z | \n",
+ " 2021-01-05 | \n",
+ " V | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b datetime c\n",
+ "0 5 k 2021-01-03 u\n",
+ "1 6 F 2021-01-04 t\n",
+ "2 7 Z 2021-01-05 V"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.merge(right.drop(\"a\", axis=1), on=[\"datetime\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "## join"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " letter_left | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " q | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " e | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " letter_left\n",
+ "id \n",
+ "1 r\n",
+ "3 a\n",
+ "3 q\n",
+ "6 e\n",
+ "7 A"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left = pd.DataFrame(\n",
+ " {\n",
+ " \"id\": [1, 3, 3, 6, 7],\n",
+ " \"letter_left\": random.sample(ascii_letters, k=5),\n",
+ " }\n",
+ ").set_index(\"id\")\n",
+ "left.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " letter_right | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " J | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " v | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5 | \n",
+ " e | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 9 | \n",
+ " t | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id letter_right\n",
+ "0 1 J\n",
+ "1 3 v\n",
+ "2 5 e\n",
+ "3 7 r\n",
+ "4 9 t"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "right = pd.DataFrame(\n",
+ " {\n",
+ " \"id\": [\"1\", \"3\", \"5\", \"7\", \"9\"],\n",
+ " \"letter_right\": random.choices(ascii_letters, k=5),\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "right.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " letter_left | \n",
+ " id | \n",
+ " letter_right | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " r | \n",
+ " 3 | \n",
+ " v | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " a | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " q | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " e | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " letter_left id letter_right\n",
+ "1 r 3 v\n",
+ "3 a 7 r\n",
+ "3 q 7 r\n",
+ "6 e NaN NaN\n",
+ "7 A NaN NaN"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# default left join\n",
+ "\n",
+ "left.join(right)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " letter_left | \n",
+ " id | \n",
+ " letter_right | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " r | \n",
+ " 3 | \n",
+ " v | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " a | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " q | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " e | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " letter_left id letter_right\n",
+ "1 r 3 v\n",
+ "3 a 7 r\n",
+ "3 q 7 r\n",
+ "6 e NaN NaN\n",
+ "7 A NaN NaN"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.join(right, how=\"left\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " letter | \n",
+ "
\n",
+ " \n",
+ " | id | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " q | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " e | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " letter\n",
+ "id \n",
+ "1 r\n",
+ "3 a\n",
+ "3 q\n",
+ "6 e\n",
+ "7 A"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left = left.rename(columns={\"letter_left\": \"letter\"})\n",
+ "left.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " letter | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " J | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 3 | \n",
+ " v | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 5 | \n",
+ " e | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 9 | \n",
+ " t | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id letter\n",
+ "0 1 J\n",
+ "1 3 v\n",
+ "2 5 e\n",
+ "3 7 r\n",
+ "4 9 t"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "right = right.rename(columns={\"letter_right\": \"letter\"})\n",
+ "right.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " letter_left | \n",
+ " id | \n",
+ " letter_right | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " r | \n",
+ " 3 | \n",
+ " v | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " a | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " q | \n",
+ " 7 | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " letter_left id letter_right\n",
+ "1 r 3 v\n",
+ "3 a 7 r\n",
+ "3 q 7 r"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "left.join(right, how=\"inner\", lsuffix=\"_left\", rsuffix=\"_right\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}