diff --git a/code/18/pandas_advanced_tricks.ipynb b/code/18/pandas_advanced_tricks.ipynb
new file mode 100644
index 0000000..3ed3f67
--- /dev/null
+++ b/code/18/pandas_advanced_tricks.ipynb
@@ -0,0 +1,2464 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": true,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# applymap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 4 6\n",
+ "1 4 3"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.DataFrame(\n",
+ " dict(\n",
+ " a=[[13, 3, 5, 6], [2, 4, 65, 6]],\n",
+ " b=[[44, 5, 6, 88, 0.5, 1], [\"22\", 40, 100]],\n",
+ " )\n",
+ ")\n",
+ "df.head()\n",
+ "df.applymap(lambda x: len(x))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# pipe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Complex is better than complicated.Flat is better than nested."
+ ]
+ }
+ ],
+ "source": [
+ "%%bash\n",
+ "echo \"\n",
+ "Beautiful is better than ugly.\n",
+ "Explicit is better than implicit.\n",
+ "Simple is better than complex.\n",
+ "Complex is better than complicated.\n",
+ "Flat is better than nested.\n",
+ "Sparse is better than dense.\" | \\\n",
+ "grep ed | \\\n",
+ "tr -d \"\\n\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "## 管道操作模拟"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def echo():\n",
+ " poetry = \"\"\"\n",
+ "Beautiful is better than ugly.\n",
+ "Explicit is better than implicit.\n",
+ "Simple is better than complex.\n",
+ "Complex is better than complicated.\n",
+ "Flat is better than nested.\n",
+ "Sparse is better than dense.\"\n",
+ " \"\"\"\n",
+ " return poetry.strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def grep(content: str, pattern: str):\n",
+ " import re\n",
+ "\n",
+ " filtered = []\n",
+ " content = content.splitlines()\n",
+ " for line in content:\n",
+ " if re.search(pattern, line):\n",
+ " filtered.append(line)\n",
+ "\n",
+ " return \"\".join(filtered)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def tr(content: str, delete: bool, char: str):\n",
+ " final = []\n",
+ "\n",
+ " if delete:\n",
+ " content = content.splitlines()\n",
+ " for line in content:\n",
+ " new_line = line.replace(char, \"\")\n",
+ " final.append(new_line)\n",
+ " if final:\n",
+ " return \"\".join(final)\n",
+ "\n",
+ " return content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Complex is better than complicated.Flat is better than nested.'"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# pipe line\n",
+ "tr(\n",
+ " content=grep(\n",
+ " content=echo(),\n",
+ " pattern=\"ed\",\n",
+ " ),\n",
+ " delete=True,\n",
+ " char=\"\\n\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "## DataFrame.pipe 示例"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " d | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " e | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a\n",
+ "0 a\n",
+ "1 b\n",
+ "2 c\n",
+ "3 d\n",
+ "4 e"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "data = pd.DataFrame(\n",
+ " dict(\n",
+ " a=list(\"abcde\"),\n",
+ " )\n",
+ ")\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def parse(df: pd.DataFrame):\n",
+ " length = df[\"a\"].shape[0]\n",
+ " df[\"b\"] = list(range(1, length + 1))\n",
+ " df[\"c\"] = [v.upper() if v in [\"a\", \"b\", \"c\"] else None for v in df[\"a\"]]\n",
+ "\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "def upper(df: pd.DataFrame, cols=None):\n",
+ " if cols:\n",
+ " df[cols] = df[cols].astype(str).applymap(lambda s: s.upper())\n",
+ " return df\n",
+ " return df.applymap(lambda s: str(s).upper())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " A | \n",
+ " 1 | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " B | \n",
+ " 2 | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " C | \n",
+ " 3 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " D | \n",
+ " 4 | \n",
+ " NONE | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " E | \n",
+ " 5 | \n",
+ " NONE | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b c\n",
+ "0 A 1 A\n",
+ "1 B 2 B\n",
+ "2 C 3 C\n",
+ "3 D 4 NONE\n",
+ "4 E 5 NONE"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.pipe(parse).pipe(upper)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " A | \n",
+ " 1 | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " B | \n",
+ " 2 | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " C | \n",
+ " 3 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " D | \n",
+ " 4 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " E | \n",
+ " 5 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b c\n",
+ "0 A 1 A\n",
+ "1 B 2 B\n",
+ "2 C 3 C\n",
+ "3 D 4 None\n",
+ "4 E 5 None"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.pipe(parse).pipe(upper, cols=[\"a\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# rename"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['a', 'b'], dtype='object')"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))\n",
+ "df = data.copy(deep=True) # 1\n",
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['A', 'B'], dtype='object')"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mapper = [\"A\", \"B\"]\n",
+ "df.columns = mapper\n",
+ "df\n",
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 1 4\n",
+ "1 2 5\n",
+ "2 3 6"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = data.copy(deep=True)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['a', 'B']"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cols = df.columns.tolist() # 1\n",
+ "cols[cols.index(\"b\")] = \"B\" # 2\n",
+ "cols # 3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['a', 'B'], dtype='object')"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns = cols # 4\n",
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a B\n",
+ "0 1 4\n",
+ "1 2 5\n",
+ "2 3 6"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = data.copy(deep=True)\n",
+ "df.rename(columns={\"b\": \"B\"}) # 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['a', 'b'], dtype='object')"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a B\n",
+ "0 1 4\n",
+ "1 2 5\n",
+ "2 3 6"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.rename(columns=lambda v: \"B\" if v == \"b\" else v)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# filter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a1 | \n",
+ " a2 | \n",
+ " a3 | \n",
+ " b1 | \n",
+ " c1 | \n",
+ " d1 | \n",
+ " b2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " k | \n",
+ " U | \n",
+ " H | \n",
+ " p | \n",
+ " Z | \n",
+ " p | \n",
+ " G | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " z | \n",
+ " N | \n",
+ " A | \n",
+ " c | \n",
+ " B | \n",
+ " W | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " X | \n",
+ " R | \n",
+ " B | \n",
+ " z | \n",
+ " K | \n",
+ " I | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Y | \n",
+ " r | \n",
+ " y | \n",
+ " a | \n",
+ " k | \n",
+ " u | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Y | \n",
+ " H | \n",
+ " Q | \n",
+ " D | \n",
+ " J | \n",
+ " j | \n",
+ " J | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a1 a2 a3 b1 c1 d1 b2\n",
+ "0 k U H p Z p G\n",
+ "1 z N A c B W B\n",
+ "2 X R B z K I B\n",
+ "3 Y r y a k u F\n",
+ "4 Y H Q D J j J"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import random\n",
+ "import string\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "data = pd.DataFrame(\n",
+ " dict(\n",
+ " a1=random.choices(string.ascii_letters, k=5),\n",
+ " a2=random.choices(string.ascii_letters, k=5),\n",
+ " a3=random.choices(string.ascii_letters, k=5),\n",
+ " b1=random.choices(string.ascii_letters, k=5),\n",
+ " c1=random.choices(string.ascii_letters, k=5),\n",
+ " d1=random.choices(string.ascii_letters, k=5),\n",
+ " b2=random.choices(string.ascii_letters, k=5),\n",
+ " )\n",
+ ")\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a1 | \n",
+ " b1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " k | \n",
+ " p | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " z | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " X | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Y | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Y | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a1 b1\n",
+ "0 k p\n",
+ "1 z c\n",
+ "2 X z\n",
+ "3 Y a\n",
+ "4 Y D"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[:, [\"a1\", \"b1\"]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a1 | \n",
+ " b1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " k | \n",
+ " p | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " z | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " X | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Y | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Y | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a1 b1\n",
+ "0 k p\n",
+ "1 z c\n",
+ "2 X z\n",
+ "3 Y a\n",
+ "4 Y D"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[[\"a1\", \"b1\"]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['a1', 'a2', 'a3', 'b1', 'c1', 'd1', 'b2']\n"
+ ]
+ }
+ ],
+ "source": [
+ "cols = data.columns.tolist()\n",
+ "print(cols)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['a1', 'b1', 'c1', 'd1']\n"
+ ]
+ }
+ ],
+ "source": [
+ "cols = [col for col in cols if col.endswith(\"1\")]\n",
+ "print(cols)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a1 | \n",
+ " b1 | \n",
+ " c1 | \n",
+ " d1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " k | \n",
+ " p | \n",
+ " Z | \n",
+ " p | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " z | \n",
+ " c | \n",
+ " B | \n",
+ " W | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " X | \n",
+ " z | \n",
+ " K | \n",
+ " I | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Y | \n",
+ " a | \n",
+ " k | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Y | \n",
+ " D | \n",
+ " J | \n",
+ " j | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a1 b1 c1 d1\n",
+ "0 k p Z p\n",
+ "1 z c B W\n",
+ "2 X z K I\n",
+ "3 Y a k u\n",
+ "4 Y D J j"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[cols]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a1 | \n",
+ " b1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " k | \n",
+ " p | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " z | \n",
+ " c | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " X | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Y | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Y | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a1 b1\n",
+ "0 k p\n",
+ "1 z c\n",
+ "2 X z\n",
+ "3 Y a\n",
+ "4 Y D"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.filter(items=[\"a1\", \"b1\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a1 | \n",
+ " b1 | \n",
+ " c1 | \n",
+ " d1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " k | \n",
+ " p | \n",
+ " Z | \n",
+ " p | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " z | \n",
+ " c | \n",
+ " B | \n",
+ " W | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " X | \n",
+ " z | \n",
+ " K | \n",
+ " I | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Y | \n",
+ " a | \n",
+ " k | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Y | \n",
+ " D | \n",
+ " J | \n",
+ " j | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a1 b1 c1 d1\n",
+ "0 k p Z p\n",
+ "1 z c B W\n",
+ "2 X z K I\n",
+ "3 Y a k u\n",
+ "4 Y D J j"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.filter(regex=r\"^[a-d]1$\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a1 | \n",
+ " a2 | \n",
+ " b1 | \n",
+ " c1 | \n",
+ " b2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " k | \n",
+ " U | \n",
+ " p | \n",
+ " Z | \n",
+ " G | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " z | \n",
+ " N | \n",
+ " c | \n",
+ " B | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " X | \n",
+ " R | \n",
+ " z | \n",
+ " K | \n",
+ " B | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Y | \n",
+ " r | \n",
+ " a | \n",
+ " k | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Y | \n",
+ " H | \n",
+ " D | \n",
+ " J | \n",
+ " J | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a1 a2 b1 c1 b2\n",
+ "0 k U p Z G\n",
+ "1 z N c B B\n",
+ "2 X R z K B\n",
+ "3 Y r a k F\n",
+ "4 Y H D J J"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.filter(regex=r\"^(a|b|c)[1-2]$\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# assign"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " a\n",
+ "0 h\n",
+ "1 e\n",
+ "2 l\n",
+ "3 l\n",
+ "4 o\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "data = pd.DataFrame(\n",
+ " dict(\n",
+ " a=list(\"hello\"),\n",
+ " )\n",
+ ")\n",
+ "print(data.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " a b\n",
+ "0 H 1\n",
+ "1 E 2\n",
+ "2 L 3\n",
+ "3 L 4\n",
+ "4 O 5\n"
+ ]
+ }
+ ],
+ "source": [
+ "data[\"a\"] = data[\"a\"].str.upper()\n",
+ "data[\"b\"] = list(range(1, 6))\n",
+ "print(data.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " H | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " E | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " L | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " L | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " O | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 H 1\n",
+ "1 E 2\n",
+ "2 L 3\n",
+ "3 L 4\n",
+ "4 O 5"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = pd.DataFrame(\n",
+ " dict(\n",
+ " a=list(\"hello\"),\n",
+ " )\n",
+ ")\n",
+ "data.assign(\n",
+ " a=data[\"a\"].str.upper(),\n",
+ " b=list(range(1, 6)),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# eval & query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " pid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " O | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " k | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " Z | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid pid\n",
+ "0 1 O\n",
+ "1 2 L\n",
+ "2 3 k\n",
+ "3 4 F\n",
+ "4 5 Z"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import random\n",
+ "import string\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "random.seed(3.14)\n",
+ "\n",
+ "data = pd.DataFrame(\n",
+ " dict(\n",
+ " uid=list(range(1, 11)),\n",
+ " pid=random.choices(string.ascii_letters, k=10),\n",
+ " )\n",
+ ")\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " pid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6 | \n",
+ " u | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 8 | \n",
+ " V | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 10 | \n",
+ " m | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid pid\n",
+ "1 2 L\n",
+ "3 4 F\n",
+ "5 6 u\n",
+ "7 8 V\n",
+ "9 10 m"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[data[\"uid\"] % 2 == 0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " pid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid pid\n",
+ "1 2 L\n",
+ "3 4 F"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# fmt:off\n",
+ "data.loc[\n",
+ " (data[\"uid\"] % 2 == 0)\n",
+ " & (data[\"pid\"].str.isupper())\n",
+ " & (data[\"pid\"] != \"V\")\n",
+ "]\n",
+ "# fmt:on"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " pid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid pid\n",
+ "1 2 L\n",
+ "3 4 F"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cond = data.eval(\n",
+ " \"\"\"\n",
+ " uid % 2 == 0 and \\\n",
+ " pid.str.isupper() and \\\n",
+ " pid != 'V'\n",
+ " \"\"\"\n",
+ ")\n",
+ "data.loc[cond]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " pid | \n",
+ " code | \n",
+ " is_vip | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " O | \n",
+ " 1_O | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " L | \n",
+ " 2_L | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " k | \n",
+ " 3_k | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " F | \n",
+ " 4_F | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " Z | \n",
+ " 5_Z | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6 | \n",
+ " u | \n",
+ " 6_u | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 7 | \n",
+ " t | \n",
+ " 7_t | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 8 | \n",
+ " V | \n",
+ " 8_V | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 9 | \n",
+ " x | \n",
+ " 9_x | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 10 | \n",
+ " m | \n",
+ " 10_m | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid pid code is_vip\n",
+ "0 1 O 1_O True\n",
+ "1 2 L 2_L True\n",
+ "2 3 k 3_k False\n",
+ "3 4 F 4_F True\n",
+ "4 5 Z 5_Z True\n",
+ "5 6 u 6_u False\n",
+ "6 7 t 7_t False\n",
+ "7 8 V 8_V True\n",
+ "8 9 x 9_x False\n",
+ "9 10 m 10_m False"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.eval(\n",
+ " \"\"\"\n",
+ " uid = uid.astype('str')\n",
+ " code = uid.str.cat(pid, sep=\"_\")\n",
+ " is_vip = pid.str.isupper()\n",
+ " \"\"\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " pid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid pid\n",
+ "1 2 L\n",
+ "3 4 F"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.query(\n",
+ " \"\"\"\n",
+ " uid % 2 == 0 and \\\n",
+ " pid.str.isupper() and \\\n",
+ " pid != 'V'\n",
+ " \"\"\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " pid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " L | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid pid\n",
+ "1 2 L\n",
+ "3 4 F"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.query(\"\"\"uid % 2 == 0 and pid.str.isupper() and pid != 'V'\"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/code/18/plot.ipynb b/code/18/plot.ipynb
new file mode 100644
index 0000000..1723be1
--- /dev/null
+++ b/code/18/plot.ipynb
@@ -0,0 +1,460 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "\n",
+ "%matplotlib inline\n",
+ "%config InlineBackend.figure_format = 'svg'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "titanic = pd.read_csv(\"../../data/titanic.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Survived | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Braund, Mr. Owen Harris | \n",
+ " male | \n",
+ " 22.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " A/5 21171 | \n",
+ " 7.2500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n",
+ " female | \n",
+ " 38.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " PC 17599 | \n",
+ " 71.2833 | \n",
+ " C85 | \n",
+ " C | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " Heikkinen, Miss. Laina | \n",
+ " female | \n",
+ " 26.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " STON/O2. 3101282 | \n",
+ " 7.9250 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n",
+ " female | \n",
+ " 35.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 113803 | \n",
+ " 53.1000 | \n",
+ " C123 | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Allen, Mr. William Henry | \n",
+ " male | \n",
+ " 35.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 373450 | \n",
+ " 8.0500 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PassengerId Survived Pclass \\\n",
+ "0 1 0 3 \n",
+ "1 2 1 1 \n",
+ "2 3 1 3 \n",
+ "3 4 1 1 \n",
+ "4 5 0 3 \n",
+ "\n",
+ " Name Sex Age SibSp \\\n",
+ "0 Braund, Mr. Owen Harris male 22.0 1 \n",
+ "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
+ "2 Heikkinen, Miss. Laina female 26.0 0 \n",
+ "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
+ "4 Allen, Mr. William Henry male 35.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 A/5 21171 7.2500 NaN S \n",
+ "1 0 PC 17599 71.2833 C85 C \n",
+ "2 0 STON/O2. 3101282 7.9250 NaN S \n",
+ "3 0 113803 53.1000 C123 S \n",
+ "4 0 373450 8.0500 NaN S "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "titanic.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "titanic[\"Sex\"].value_counts().plot.bar()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Text(0, 0, 'male'), Text(1, 0, 'female')]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "ax = titanic[\"Sex\"].value_counts().plot.bar()\n",
+ "ax.set_title(\"The Bar Chart of Sex\")\n",
+ "ax.set_xlabel(\"Sex\")\n",
+ "ax.set_ylabel(\"Counter\")\n",
+ "ax.set_xticklabels([\"male\", \"female\"], rotation=0)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "titanic[\"Sex\"].value_counts().plot.pie()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "titanic[\"Sex\"].value_counts().plot.bar(\n",
+ " figsize=(8, 6),\n",
+ " title=\"The Bar Chart of Sex\",\n",
+ " rot=45,\n",
+ " xlabel=\"Sex\",\n",
+ " ylabel=\"Number\",\n",
+ " ylim=(0, 550),\n",
+ " legend=True,\n",
+ " fontsize=8.5,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from matplotlib import image\n",
+ "from matplotlib.offsetbox import OffsetImage\n",
+ "\n",
+ "\n",
+ "logo = image.imread(\"../../data/python-logo-master-v3-TM.png\")\n",
+ "\n",
+ "ax = titanic[\"Sex\"].value_counts().plot.pie(\n",
+ " figsize=(6, 8),\n",
+ " ylabel=\"\",\n",
+ " title=\"The Pie Chart of Sex\",\n",
+ " explode=(0, 0.05),\n",
+ " autopct=lambda pct: f\"{round(pct, 2)}%\",\n",
+ " pctdistance=0.4,\n",
+ " shadow=True,\n",
+ " colors=(\"steelblue\", \"orange\"),\n",
+ ")\n",
+ "\n",
+ "for n, tx in enumerate(ax.texts):\n",
+ " if (n+1) % 2==0:\n",
+ " tx.set_color(\"white\")\n",
+ "\n",
+ "logo_image = OffsetImage(logo, zoom=.5)\n",
+ "logo_image.set_offset((35, 25))\n",
+ "ax.add_artist(logo_image)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/data/python-logo-master-v3-TM.png b/data/python-logo-master-v3-TM.png
new file mode 100644
index 0000000..71f9420
Binary files /dev/null and b/data/python-logo-master-v3-TM.png differ