diff --git a/code/18/pandas_advanced_tricks.ipynb b/code/18/pandas_advanced_tricks.ipynb new file mode 100644 index 0000000..3ed3f67 --- /dev/null +++ b/code/18/pandas_advanced_tricks.ipynb @@ -0,0 +1,2464 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# applymap" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
046
143
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 4 6\n", + "1 4 3" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "df = pd.DataFrame(\n", + " dict(\n", + " a=[[13, 3, 5, 6], [2, 4, 65, 6]],\n", + " b=[[44, 5, 6, 88, 0.5, 1], [\"22\", 40, 100]],\n", + " )\n", + ")\n", + "df.head()\n", + "df.applymap(lambda x: len(x))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# pipe" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Complex is better than complicated.Flat is better than nested." + ] + } + ], + "source": [ + "%%bash\n", + "echo \"\n", + "Beautiful is better than ugly.\n", + "Explicit is better than implicit.\n", + "Simple is better than complex.\n", + "Complex is better than complicated.\n", + "Flat is better than nested.\n", + "Sparse is better than dense.\" | \\\n", + "grep ed | \\\n", + "tr -d \"\\n\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## 管道操作模拟" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def echo():\n", + " poetry = \"\"\"\n", + "Beautiful is better than ugly.\n", + "Explicit is better than implicit.\n", + "Simple is better than complex.\n", + "Complex is better than complicated.\n", + "Flat is better than nested.\n", + "Sparse is better than dense.\"\n", + " \"\"\"\n", + " return poetry.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def grep(content: str, pattern: str):\n", + " import re\n", + "\n", + " filtered = []\n", + " content = content.splitlines()\n", + " for line in content:\n", + " if re.search(pattern, line):\n", + " filtered.append(line)\n", + "\n", + " return \"\".join(filtered)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def tr(content: str, delete: bool, char: str):\n", + " final = []\n", + "\n", + " if delete:\n", + " content = content.splitlines()\n", + " for line in content:\n", + " new_line = line.replace(char, \"\")\n", + " final.append(new_line)\n", + " if final:\n", + " return \"\".join(final)\n", + "\n", + " return content" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Complex is better than complicated.Flat is better than nested.'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pipe line\n", + "tr(\n", + " content=grep(\n", + " content=echo(),\n", + " pattern=\"ed\",\n", + " ),\n", + " delete=True,\n", + " char=\"\\n\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## DataFrame.pipe 示例" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a
0a
1b
2c
3d
4e
\n", + "
" + ], + "text/plain": [ + " a\n", + "0 a\n", + "1 b\n", + "2 c\n", + "3 d\n", + "4 e" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " dict(\n", + " a=list(\"abcde\"),\n", + " )\n", + ")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def parse(df: pd.DataFrame):\n", + " length = df[\"a\"].shape[0]\n", + " df[\"b\"] = list(range(1, length + 1))\n", + " df[\"c\"] = [v.upper() if v in [\"a\", \"b\", \"c\"] else None for v in df[\"a\"]]\n", + "\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def upper(df: pd.DataFrame, cols=None):\n", + " if cols:\n", + " df[cols] = df[cols].astype(str).applymap(lambda s: s.upper())\n", + " return df\n", + " return df.applymap(lambda s: str(s).upper())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0A1A
1B2B
2C3C
3D4NONE
4E5NONE
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 A 1 A\n", + "1 B 2 B\n", + "2 C 3 C\n", + "3 D 4 NONE\n", + "4 E 5 NONE" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.pipe(parse).pipe(upper)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0A1A
1B2B
2C3C
3D4None
4E5None
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 A 1 A\n", + "1 B 2 B\n", + "2 C 3 C\n", + "3 D 4 None\n", + "4 E 5 None" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.pipe(parse).pipe(upper, cols=[\"a\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# rename" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b'], dtype='object')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))\n", + "df = data.copy(deep=True) # 1\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['A', 'B'], dtype='object')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapper = [\"A\", \"B\"]\n", + "df.columns = mapper\n", + "df\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
014
125
236
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 4\n", + "1 2 5\n", + "2 3 6" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = data.copy(deep=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['a', 'B']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols = df.columns.tolist() # 1\n", + "cols[cols.index(\"b\")] = \"B\" # 2\n", + "cols # 3" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'B'], dtype='object')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns = cols # 4\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aB
014
125
236
\n", + "
" + ], + "text/plain": [ + " a B\n", + "0 1 4\n", + "1 2 5\n", + "2 3 6" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = data.copy(deep=True)\n", + "df.rename(columns={\"b\": \"B\"}) # 1" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b'], dtype='object')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aB
014
125
236
\n", + "
" + ], + "text/plain": [ + " a B\n", + "0 1 4\n", + "1 2 5\n", + "2 3 6" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns=lambda v: \"B\" if v == \"b\" else v)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# filter" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a1a2a3b1c1d1b2
0kUHpZpG
1zNAcBWB
2XRBzKIB
3YryakuF
4YHQDJjJ
\n", + "
" + ], + "text/plain": [ + " a1 a2 a3 b1 c1 d1 b2\n", + "0 k U H p Z p G\n", + "1 z N A c B W B\n", + "2 X R B z K I B\n", + "3 Y r y a k u F\n", + "4 Y H Q D J j J" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import random\n", + "import string\n", + "\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " dict(\n", + " a1=random.choices(string.ascii_letters, k=5),\n", + " a2=random.choices(string.ascii_letters, k=5),\n", + " a3=random.choices(string.ascii_letters, k=5),\n", + " b1=random.choices(string.ascii_letters, k=5),\n", + " c1=random.choices(string.ascii_letters, k=5),\n", + " d1=random.choices(string.ascii_letters, k=5),\n", + " b2=random.choices(string.ascii_letters, k=5),\n", + " )\n", + ")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a1b1
0kp
1zc
2Xz
3Ya
4YD
\n", + "
" + ], + "text/plain": [ + " a1 b1\n", + "0 k p\n", + "1 z c\n", + "2 X z\n", + "3 Y a\n", + "4 Y D" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[:, [\"a1\", \"b1\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a1b1
0kp
1zc
2Xz
3Ya
4YD
\n", + "
" + ], + "text/plain": [ + " a1 b1\n", + "0 k p\n", + "1 z c\n", + "2 X z\n", + "3 Y a\n", + "4 Y D" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[[\"a1\", \"b1\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['a1', 'a2', 'a3', 'b1', 'c1', 'd1', 'b2']\n" + ] + } + ], + "source": [ + "cols = data.columns.tolist()\n", + "print(cols)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['a1', 'b1', 'c1', 'd1']\n" + ] + } + ], + "source": [ + "cols = [col for col in cols if col.endswith(\"1\")]\n", + "print(cols)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a1b1c1d1
0kpZp
1zcBW
2XzKI
3Yaku
4YDJj
\n", + "
" + ], + "text/plain": [ + " a1 b1 c1 d1\n", + "0 k p Z p\n", + "1 z c B W\n", + "2 X z K I\n", + "3 Y a k u\n", + "4 Y D J j" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[cols]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a1b1
0kp
1zc
2Xz
3Ya
4YD
\n", + "
" + ], + "text/plain": [ + " a1 b1\n", + "0 k p\n", + "1 z c\n", + "2 X z\n", + "3 Y a\n", + "4 Y D" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.filter(items=[\"a1\", \"b1\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a1b1c1d1
0kpZp
1zcBW
2XzKI
3Yaku
4YDJj
\n", + "
" + ], + "text/plain": [ + " a1 b1 c1 d1\n", + "0 k p Z p\n", + "1 z c B W\n", + "2 X z K I\n", + "3 Y a k u\n", + "4 Y D J j" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.filter(regex=r\"^[a-d]1$\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a1a2b1c1b2
0kUpZG
1zNcBB
2XRzKB
3YrakF
4YHDJJ
\n", + "
" + ], + "text/plain": [ + " a1 a2 b1 c1 b2\n", + "0 k U p Z G\n", + "1 z N c B B\n", + "2 X R z K B\n", + "3 Y r a k F\n", + "4 Y H D J J" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.filter(regex=r\"^(a|b|c)[1-2]$\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# assign" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " a\n", + "0 h\n", + "1 e\n", + "2 l\n", + "3 l\n", + "4 o\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " dict(\n", + " a=list(\"hello\"),\n", + " )\n", + ")\n", + "print(data.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " a b\n", + "0 H 1\n", + "1 E 2\n", + "2 L 3\n", + "3 L 4\n", + "4 O 5\n" + ] + } + ], + "source": [ + "data[\"a\"] = data[\"a\"].str.upper()\n", + "data[\"b\"] = list(range(1, 6))\n", + "print(data.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0H1
1E2
2L3
3L4
4O5
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 H 1\n", + "1 E 2\n", + "2 L 3\n", + "3 L 4\n", + "4 O 5" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.DataFrame(\n", + " dict(\n", + " a=list(\"hello\"),\n", + " )\n", + ")\n", + "data.assign(\n", + " a=data[\"a\"].str.upper(),\n", + " b=list(range(1, 6)),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# eval & query" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidpid
01O
12L
23k
34F
45Z
\n", + "
" + ], + "text/plain": [ + " uid pid\n", + "0 1 O\n", + "1 2 L\n", + "2 3 k\n", + "3 4 F\n", + "4 5 Z" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import random\n", + "import string\n", + "\n", + "import pandas as pd\n", + "\n", + "random.seed(3.14)\n", + "\n", + "data = pd.DataFrame(\n", + " dict(\n", + " uid=list(range(1, 11)),\n", + " pid=random.choices(string.ascii_letters, k=10),\n", + " )\n", + ")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidpid
12L
34F
56u
78V
910m
\n", + "
" + ], + "text/plain": [ + " uid pid\n", + "1 2 L\n", + "3 4 F\n", + "5 6 u\n", + "7 8 V\n", + "9 10 m" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[data[\"uid\"] % 2 == 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidpid
12L
34F
\n", + "
" + ], + "text/plain": [ + " uid pid\n", + "1 2 L\n", + "3 4 F" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# fmt:off\n", + "data.loc[\n", + " (data[\"uid\"] % 2 == 0)\n", + " & (data[\"pid\"].str.isupper())\n", + " & (data[\"pid\"] != \"V\")\n", + "]\n", + "# fmt:on" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidpid
12L
34F
\n", + "
" + ], + "text/plain": [ + " uid pid\n", + "1 2 L\n", + "3 4 F" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cond = data.eval(\n", + " \"\"\"\n", + " uid % 2 == 0 and \\\n", + " pid.str.isupper() and \\\n", + " pid != 'V'\n", + " \"\"\"\n", + ")\n", + "data.loc[cond]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidpidcodeis_vip
01O1_OTrue
12L2_LTrue
23k3_kFalse
34F4_FTrue
45Z5_ZTrue
56u6_uFalse
67t7_tFalse
78V8_VTrue
89x9_xFalse
910m10_mFalse
\n", + "
" + ], + "text/plain": [ + " uid pid code is_vip\n", + "0 1 O 1_O True\n", + "1 2 L 2_L True\n", + "2 3 k 3_k False\n", + "3 4 F 4_F True\n", + "4 5 Z 5_Z True\n", + "5 6 u 6_u False\n", + "6 7 t 7_t False\n", + "7 8 V 8_V True\n", + "8 9 x 9_x False\n", + "9 10 m 10_m False" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.eval(\n", + " \"\"\"\n", + " uid = uid.astype('str')\n", + " code = uid.str.cat(pid, sep=\"_\")\n", + " is_vip = pid.str.isupper()\n", + " \"\"\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidpid
12L
34F
\n", + "
" + ], + "text/plain": [ + " uid pid\n", + "1 2 L\n", + "3 4 F" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.query(\n", + " \"\"\"\n", + " uid % 2 == 0 and \\\n", + " pid.str.isupper() and \\\n", + " pid != 'V'\n", + " \"\"\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidpid
12L
34F
\n", + "
" + ], + "text/plain": [ + " uid pid\n", + "1 2 L\n", + "3 4 F" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.query(\"\"\"uid % 2 == 0 and pid.str.isupper() and pid != 'V'\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "vscode": { + "interpreter": { + "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/code/18/plot.ipynb b/code/18/plot.ipynb new file mode 100644 index 0000000..1723be1 --- /dev/null +++ b/code/18/plot.ipynb @@ -0,0 +1,460 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "\n", + "%matplotlib inline\n", + "%config InlineBackend.figure_format = 'svg'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "titanic = pd.read_csv(\"../../data/titanic.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titanic.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:00:52.395848\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "titanic[\"Sex\"].value_counts().plot.bar()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Text(0, 0, 'male'), Text(1, 0, 'female')]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:00:52.523775\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "ax = titanic[\"Sex\"].value_counts().plot.bar()\n", + "ax.set_title(\"The Bar Chart of Sex\")\n", + "ax.set_xlabel(\"Sex\")\n", + "ax.set_ylabel(\"Counter\")\n", + "ax.set_xticklabels([\"male\", \"female\"], rotation=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:00:52.623399\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "titanic[\"Sex\"].value_counts().plot.pie()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:00:52.733823\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "titanic[\"Sex\"].value_counts().plot.bar(\n", + " figsize=(8, 6),\n", + " title=\"The Bar Chart of Sex\",\n", + " rot=45,\n", + " xlabel=\"Sex\",\n", + " ylabel=\"Number\",\n", + " ylim=(0, 550),\n", + " legend=True,\n", + " fontsize=8.5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:00:52.860825\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import image\n", + "from matplotlib.offsetbox import OffsetImage\n", + "\n", + "\n", + "logo = image.imread(\"../../data/python-logo-master-v3-TM.png\")\n", + "\n", + "ax = titanic[\"Sex\"].value_counts().plot.pie(\n", + " figsize=(6, 8),\n", + " ylabel=\"\",\n", + " title=\"The Pie Chart of Sex\",\n", + " explode=(0, 0.05),\n", + " autopct=lambda pct: f\"{round(pct, 2)}%\",\n", + " pctdistance=0.4,\n", + " shadow=True,\n", + " colors=(\"steelblue\", \"orange\"),\n", + ")\n", + "\n", + "for n, tx in enumerate(ax.texts):\n", + " if (n+1) % 2==0:\n", + " tx.set_color(\"white\")\n", + "\n", + "logo_image = OffsetImage(logo, zoom=.5)\n", + "logo_image.set_offset((35, 25))\n", + "ax.add_artist(logo_image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "vscode": { + "interpreter": { + "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/data/python-logo-master-v3-TM.png b/data/python-logo-master-v3-TM.png new file mode 100644 index 0000000..71f9420 Binary files /dev/null and b/data/python-logo-master-v3-TM.png differ