{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true, "pycharm": { "name": "#%% md\n" } }, "source": [ "# applymap" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
046
143
\n", "
" ], "text/plain": [ " a b\n", "0 4 6\n", "1 4 3" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame(\n", " dict(\n", " a=[[13, 3, 5, 6], [2, 4, 65, 6]],\n", " b=[[44, 5, 6, 88, 0.5, 1], [\"22\", 40, 100]],\n", " )\n", ")\n", "df.head()\n", "df.applymap(lambda x: len(x))" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# pipe" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Complex is better than complicated.Flat is better than nested." ] } ], "source": [ "%%bash\n", "echo \"\n", "Beautiful is better than ugly.\n", "Explicit is better than implicit.\n", "Simple is better than complex.\n", "Complex is better than complicated.\n", "Flat is better than nested.\n", "Sparse is better than dense.\" | \\\n", "grep ed | \\\n", "tr -d \"\\n\"" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "## 管道操作模拟" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def echo():\n", " poetry = \"\"\"\n", "Beautiful is better than ugly.\n", "Explicit is better than implicit.\n", "Simple is better than complex.\n", "Complex is better than complicated.\n", "Flat is better than nested.\n", "Sparse is better than dense.\"\n", " \"\"\"\n", " return poetry.strip()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def grep(content: str, pattern: str):\n", " import re\n", "\n", " filtered = []\n", " content = content.splitlines()\n", " for line in content:\n", " if re.search(pattern, line):\n", " filtered.append(line)\n", "\n", " return \"\".join(filtered)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def tr(content: str, delete: bool, char: str):\n", " final = []\n", "\n", " if delete:\n", " content = content.splitlines()\n", " for line in content:\n", " new_line = line.replace(char, \"\")\n", " final.append(new_line)\n", " if final:\n", " return \"\".join(final)\n", "\n", " return content" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "'Complex is better than complicated.Flat is better than nested.'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# pipe line\n", "tr(\n", " content=grep(\n", " content=echo(),\n", " pattern=\"ed\",\n", " ),\n", " delete=True,\n", " char=\"\\n\",\n", ")" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "## DataFrame.pipe 示例" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a
0a
1b
2c
3d
4e
\n", "
" ], "text/plain": [ " a\n", "0 a\n", "1 b\n", "2 c\n", "3 d\n", "4 e" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "data = pd.DataFrame(\n", " dict(\n", " a=list(\"abcde\"),\n", " )\n", ")\n", "data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def parse(df: pd.DataFrame):\n", " length = df[\"a\"].shape[0]\n", " df[\"b\"] = list(range(1, length + 1))\n", " df[\"c\"] = [v.upper() if v in [\"a\", \"b\", \"c\"] else None for v in df[\"a\"]]\n", "\n", " return df" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def upper(df: pd.DataFrame, cols=None):\n", " if cols:\n", " df[cols] = df[cols].astype(str).applymap(lambda s: s.upper())\n", " return df\n", " return df.applymap(lambda s: str(s).upper())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0A1A
1B2B
2C3C
3D4NONE
4E5NONE
\n", "
" ], "text/plain": [ " a b c\n", "0 A 1 A\n", "1 B 2 B\n", "2 C 3 C\n", "3 D 4 NONE\n", "4 E 5 NONE" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.pipe(parse).pipe(upper)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0A1A
1B2B
2C3C
3D4None
4E5None
\n", "
" ], "text/plain": [ " a b c\n", "0 A 1 A\n", "1 B 2 B\n", "2 C 3 C\n", "3 D 4 None\n", "4 E 5 None" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.pipe(parse).pipe(upper, cols=[\"a\"])" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# rename" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Index(['a', 'b'], dtype='object')" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))\n", "df = data.copy(deep=True) # 1\n", "df.columns" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Index(['A', 'B'], dtype='object')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mapper = [\"A\", \"B\"]\n", "df.columns = mapper\n", "df\n", "df.columns" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
014
125
236
\n", "
" ], "text/plain": [ " a b\n", "0 1 4\n", "1 2 5\n", "2 3 6" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = data.copy(deep=True)\n", "df" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "['a', 'B']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols = df.columns.tolist() # 1\n", "cols[cols.index(\"b\")] = \"B\" # 2\n", "cols # 3" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Index(['a', 'B'], dtype='object')" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns = cols # 4\n", "df.columns" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
aB
014
125
236
\n", "
" ], "text/plain": [ " a B\n", "0 1 4\n", "1 2 5\n", "2 3 6" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = data.copy(deep=True)\n", "df.rename(columns={\"b\": \"B\"}) # 1" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Index(['a', 'b'], dtype='object')" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
aB
014
125
236
\n", "
" ], "text/plain": [ " a B\n", "0 1 4\n", "1 2 5\n", "2 3 6" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.rename(columns=lambda v: \"B\" if v == \"b\" else v)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# filter" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a1a2a3b1c1d1b2
0kUHpZpG
1zNAcBWB
2XRBzKIB
3YryakuF
4YHQDJjJ
\n", "
" ], "text/plain": [ " a1 a2 a3 b1 c1 d1 b2\n", "0 k U H p Z p G\n", "1 z N A c B W B\n", "2 X R B z K I B\n", "3 Y r y a k u F\n", "4 Y H Q D J j J" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import random\n", "import string\n", "\n", "import pandas as pd\n", "\n", "data = pd.DataFrame(\n", " dict(\n", " a1=random.choices(string.ascii_letters, k=5),\n", " a2=random.choices(string.ascii_letters, k=5),\n", " a3=random.choices(string.ascii_letters, k=5),\n", " b1=random.choices(string.ascii_letters, k=5),\n", " c1=random.choices(string.ascii_letters, k=5),\n", " d1=random.choices(string.ascii_letters, k=5),\n", " b2=random.choices(string.ascii_letters, k=5),\n", " )\n", ")\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a1b1
0kp
1zc
2Xz
3Ya
4YD
\n", "
" ], "text/plain": [ " a1 b1\n", "0 k p\n", "1 z c\n", "2 X z\n", "3 Y a\n", "4 Y D" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.loc[:, [\"a1\", \"b1\"]]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a1b1
0kp
1zc
2Xz
3Ya
4YD
\n", "
" ], "text/plain": [ " a1 b1\n", "0 k p\n", "1 z c\n", "2 X z\n", "3 Y a\n", "4 Y D" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[[\"a1\", \"b1\"]]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['a1', 'a2', 'a3', 'b1', 'c1', 'd1', 'b2']\n" ] } ], "source": [ "cols = data.columns.tolist()\n", "print(cols)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['a1', 'b1', 'c1', 'd1']\n" ] } ], "source": [ "cols = [col for col in cols if col.endswith(\"1\")]\n", "print(cols)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a1b1c1d1
0kpZp
1zcBW
2XzKI
3Yaku
4YDJj
\n", "
" ], "text/plain": [ " a1 b1 c1 d1\n", "0 k p Z p\n", "1 z c B W\n", "2 X z K I\n", "3 Y a k u\n", "4 Y D J j" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[cols]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a1b1
0kp
1zc
2Xz
3Ya
4YD
\n", "
" ], "text/plain": [ " a1 b1\n", "0 k p\n", "1 z c\n", "2 X z\n", "3 Y a\n", "4 Y D" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.filter(items=[\"a1\", \"b1\"])" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a1b1c1d1
0kpZp
1zcBW
2XzKI
3Yaku
4YDJj
\n", "
" ], "text/plain": [ " a1 b1 c1 d1\n", "0 k p Z p\n", "1 z c B W\n", "2 X z K I\n", "3 Y a k u\n", "4 Y D J j" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.filter(regex=r\"^[a-d]1$\")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
a1a2b1c1b2
0kUpZG
1zNcBB
2XRzKB
3YrakF
4YHDJJ
\n", "
" ], "text/plain": [ " a1 a2 b1 c1 b2\n", "0 k U p Z G\n", "1 z N c B B\n", "2 X R z K B\n", "3 Y r a k F\n", "4 Y H D J J" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.filter(regex=r\"^(a|b|c)[1-2]$\")" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# assign" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a\n", "0 h\n", "1 e\n", "2 l\n", "3 l\n", "4 o\n" ] } ], "source": [ "import pandas as pd\n", "\n", "data = pd.DataFrame(\n", " dict(\n", " a=list(\"hello\"),\n", " )\n", ")\n", "print(data.head())" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a b\n", "0 H 1\n", "1 E 2\n", "2 L 3\n", "3 L 4\n", "4 O 5\n" ] } ], "source": [ "data[\"a\"] = data[\"a\"].str.upper()\n", "data[\"b\"] = list(range(1, 6))\n", "print(data.head())" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
0H1
1E2
2L3
3L4
4O5
\n", "
" ], "text/plain": [ " a b\n", "0 H 1\n", "1 E 2\n", "2 L 3\n", "3 L 4\n", "4 O 5" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.DataFrame(\n", " dict(\n", " a=list(\"hello\"),\n", " )\n", ")\n", "data.assign(\n", " a=data[\"a\"].str.upper(),\n", " b=list(range(1, 6)),\n", ")" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# eval & query" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidpid
01O
12L
23k
34F
45Z
\n", "
" ], "text/plain": [ " uid pid\n", "0 1 O\n", "1 2 L\n", "2 3 k\n", "3 4 F\n", "4 5 Z" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import random\n", "import string\n", "\n", "import pandas as pd\n", "\n", "random.seed(3.14)\n", "\n", "data = pd.DataFrame(\n", " dict(\n", " uid=list(range(1, 11)),\n", " pid=random.choices(string.ascii_letters, k=10),\n", " )\n", ")\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidpid
12L
34F
56u
78V
910m
\n", "
" ], "text/plain": [ " uid pid\n", "1 2 L\n", "3 4 F\n", "5 6 u\n", "7 8 V\n", "9 10 m" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.loc[data[\"uid\"] % 2 == 0]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidpid
12L
34F
\n", "
" ], "text/plain": [ " uid pid\n", "1 2 L\n", "3 4 F" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# fmt:off\n", "data.loc[\n", " (data[\"uid\"] % 2 == 0)\n", " & (data[\"pid\"].str.isupper())\n", " & (data[\"pid\"] != \"V\")\n", "]\n", "# fmt:on" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidpid
12L
34F
\n", "
" ], "text/plain": [ " uid pid\n", "1 2 L\n", "3 4 F" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cond = data.eval(\n", " \"\"\"\n", " uid % 2 == 0 and \\\n", " pid.str.isupper() and \\\n", " pid != 'V'\n", " \"\"\"\n", ")\n", "data.loc[cond]" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidpidcodeis_vip
01O1_OTrue
12L2_LTrue
23k3_kFalse
34F4_FTrue
45Z5_ZTrue
56u6_uFalse
67t7_tFalse
78V8_VTrue
89x9_xFalse
910m10_mFalse
\n", "
" ], "text/plain": [ " uid pid code is_vip\n", "0 1 O 1_O True\n", "1 2 L 2_L True\n", "2 3 k 3_k False\n", "3 4 F 4_F True\n", "4 5 Z 5_Z True\n", "5 6 u 6_u False\n", "6 7 t 7_t False\n", "7 8 V 8_V True\n", "8 9 x 9_x False\n", "9 10 m 10_m False" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.eval(\n", " \"\"\"\n", " uid = uid.astype('str')\n", " code = uid.str.cat(pid, sep=\"_\")\n", " is_vip = pid.str.isupper()\n", " \"\"\"\n", ")" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidpid
12L
34F
\n", "
" ], "text/plain": [ " uid pid\n", "1 2 L\n", "3 4 F" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.query(\n", " \"\"\"\n", " uid % 2 == 0 and \\\n", " pid.str.isupper() and \\\n", " pid != 'V'\n", " \"\"\"\n", ")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidpid
12L
34F
\n", "
" ], "text/plain": [ " uid pid\n", "1 2 L\n", "3 4 F" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.query(\"\"\"uid % 2 == 0 and pid.str.isupper() and pid != 'V'\"\"\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "vscode": { "interpreter": { "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6" } } }, "nbformat": 4, "nbformat_minor": 0 }