{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# applymap"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 4 | \n",
" 6 | \n",
"
\n",
" \n",
" | 1 | \n",
" 4 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 4 6\n",
"1 4 3"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame(\n",
" dict(\n",
" a=[[13, 3, 5, 6], [2, 4, 65, 6]],\n",
" b=[[44, 5, 6, 88, 0.5, 1], [\"22\", 40, 100]],\n",
" )\n",
")\n",
"df.head()\n",
"df.applymap(lambda x: len(x))"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# pipe"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Complex is better than complicated.Flat is better than nested."
]
}
],
"source": [
"%%bash\n",
"echo \"\n",
"Beautiful is better than ugly.\n",
"Explicit is better than implicit.\n",
"Simple is better than complex.\n",
"Complex is better than complicated.\n",
"Flat is better than nested.\n",
"Sparse is better than dense.\" | \\\n",
"grep ed | \\\n",
"tr -d \"\\n\""
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## 管道操作模拟"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def echo():\n",
" poetry = \"\"\"\n",
"Beautiful is better than ugly.\n",
"Explicit is better than implicit.\n",
"Simple is better than complex.\n",
"Complex is better than complicated.\n",
"Flat is better than nested.\n",
"Sparse is better than dense.\"\n",
" \"\"\"\n",
" return poetry.strip()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def grep(content: str, pattern: str):\n",
" import re\n",
"\n",
" filtered = []\n",
" content = content.splitlines()\n",
" for line in content:\n",
" if re.search(pattern, line):\n",
" filtered.append(line)\n",
"\n",
" return \"\".join(filtered)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def tr(content: str, delete: bool, char: str):\n",
" final = []\n",
"\n",
" if delete:\n",
" content = content.splitlines()\n",
" for line in content:\n",
" new_line = line.replace(char, \"\")\n",
" final.append(new_line)\n",
" if final:\n",
" return \"\".join(final)\n",
"\n",
" return content"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'Complex is better than complicated.Flat is better than nested.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pipe line\n",
"tr(\n",
" content=grep(\n",
" content=echo(),\n",
" pattern=\"ed\",\n",
" ),\n",
" delete=True,\n",
" char=\"\\n\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## DataFrame.pipe 示例"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" a | \n",
"
\n",
" \n",
" | 1 | \n",
" b | \n",
"
\n",
" \n",
" | 2 | \n",
" c | \n",
"
\n",
" \n",
" | 3 | \n",
" d | \n",
"
\n",
" \n",
" | 4 | \n",
" e | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a\n",
"0 a\n",
"1 b\n",
"2 c\n",
"3 d\n",
"4 e"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" a=list(\"abcde\"),\n",
" )\n",
")\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def parse(df: pd.DataFrame):\n",
" length = df[\"a\"].shape[0]\n",
" df[\"b\"] = list(range(1, length + 1))\n",
" df[\"c\"] = [v.upper() if v in [\"a\", \"b\", \"c\"] else None for v in df[\"a\"]]\n",
"\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def upper(df: pd.DataFrame, cols=None):\n",
" if cols:\n",
" df[cols] = df[cols].astype(str).applymap(lambda s: s.upper())\n",
" return df\n",
" return df.applymap(lambda s: str(s).upper())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A | \n",
" 1 | \n",
" A | \n",
"
\n",
" \n",
" | 1 | \n",
" B | \n",
" 2 | \n",
" B | \n",
"
\n",
" \n",
" | 2 | \n",
" C | \n",
" 3 | \n",
" C | \n",
"
\n",
" \n",
" | 3 | \n",
" D | \n",
" 4 | \n",
" NONE | \n",
"
\n",
" \n",
" | 4 | \n",
" E | \n",
" 5 | \n",
" NONE | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 A 1 A\n",
"1 B 2 B\n",
"2 C 3 C\n",
"3 D 4 NONE\n",
"4 E 5 NONE"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.pipe(parse).pipe(upper)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" A | \n",
" 1 | \n",
" A | \n",
"
\n",
" \n",
" | 1 | \n",
" B | \n",
" 2 | \n",
" B | \n",
"
\n",
" \n",
" | 2 | \n",
" C | \n",
" 3 | \n",
" C | \n",
"
\n",
" \n",
" | 3 | \n",
" D | \n",
" 4 | \n",
" None | \n",
"
\n",
" \n",
" | 4 | \n",
" E | \n",
" 5 | \n",
" None | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 A 1 A\n",
"1 B 2 B\n",
"2 C 3 C\n",
"3 D 4 None\n",
"4 E 5 None"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.pipe(parse).pipe(upper, cols=[\"a\"])"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# rename"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b'], dtype='object')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))\n",
"df = data.copy(deep=True) # 1\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'B'], dtype='object')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mapper = [\"A\", \"B\"]\n",
"df.columns = mapper\n",
"df\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 5 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1 4\n",
"1 2 5\n",
"2 3 6"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = data.copy(deep=True)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['a', 'B']"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols = df.columns.tolist() # 1\n",
"cols[cols.index(\"b\")] = \"B\" # 2\n",
"cols # 3"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'B'], dtype='object')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns = cols # 4\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 5 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a B\n",
"0 1 4\n",
"1 2 5\n",
"2 3 6"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = data.copy(deep=True)\n",
"df.rename(columns={\"b\": \"B\"}) # 1"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b'], dtype='object')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" B | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 4 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 5 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 6 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a B\n",
"0 1 4\n",
"1 2 5\n",
"2 3 6"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.rename(columns=lambda v: \"B\" if v == \"b\" else v)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# filter"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" a2 | \n",
" a3 | \n",
" b1 | \n",
" c1 | \n",
" d1 | \n",
" b2 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" k | \n",
" U | \n",
" H | \n",
" p | \n",
" Z | \n",
" p | \n",
" G | \n",
"
\n",
" \n",
" | 1 | \n",
" z | \n",
" N | \n",
" A | \n",
" c | \n",
" B | \n",
" W | \n",
" B | \n",
"
\n",
" \n",
" | 2 | \n",
" X | \n",
" R | \n",
" B | \n",
" z | \n",
" K | \n",
" I | \n",
" B | \n",
"
\n",
" \n",
" | 3 | \n",
" Y | \n",
" r | \n",
" y | \n",
" a | \n",
" k | \n",
" u | \n",
" F | \n",
"
\n",
" \n",
" | 4 | \n",
" Y | \n",
" H | \n",
" Q | \n",
" D | \n",
" J | \n",
" j | \n",
" J | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a1 a2 a3 b1 c1 d1 b2\n",
"0 k U H p Z p G\n",
"1 z N A c B W B\n",
"2 X R B z K I B\n",
"3 Y r y a k u F\n",
"4 Y H Q D J j J"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"import string\n",
"\n",
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" a1=random.choices(string.ascii_letters, k=5),\n",
" a2=random.choices(string.ascii_letters, k=5),\n",
" a3=random.choices(string.ascii_letters, k=5),\n",
" b1=random.choices(string.ascii_letters, k=5),\n",
" c1=random.choices(string.ascii_letters, k=5),\n",
" d1=random.choices(string.ascii_letters, k=5),\n",
" b2=random.choices(string.ascii_letters, k=5),\n",
" )\n",
")\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" b1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" k | \n",
" p | \n",
"
\n",
" \n",
" | 1 | \n",
" z | \n",
" c | \n",
"
\n",
" \n",
" | 2 | \n",
" X | \n",
" z | \n",
"
\n",
" \n",
" | 3 | \n",
" Y | \n",
" a | \n",
"
\n",
" \n",
" | 4 | \n",
" Y | \n",
" D | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a1 b1\n",
"0 k p\n",
"1 z c\n",
"2 X z\n",
"3 Y a\n",
"4 Y D"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[:, [\"a1\", \"b1\"]]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" b1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" k | \n",
" p | \n",
"
\n",
" \n",
" | 1 | \n",
" z | \n",
" c | \n",
"
\n",
" \n",
" | 2 | \n",
" X | \n",
" z | \n",
"
\n",
" \n",
" | 3 | \n",
" Y | \n",
" a | \n",
"
\n",
" \n",
" | 4 | \n",
" Y | \n",
" D | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a1 b1\n",
"0 k p\n",
"1 z c\n",
"2 X z\n",
"3 Y a\n",
"4 Y D"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[[\"a1\", \"b1\"]]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['a1', 'a2', 'a3', 'b1', 'c1', 'd1', 'b2']\n"
]
}
],
"source": [
"cols = data.columns.tolist()\n",
"print(cols)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['a1', 'b1', 'c1', 'd1']\n"
]
}
],
"source": [
"cols = [col for col in cols if col.endswith(\"1\")]\n",
"print(cols)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" b1 | \n",
" c1 | \n",
" d1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" k | \n",
" p | \n",
" Z | \n",
" p | \n",
"
\n",
" \n",
" | 1 | \n",
" z | \n",
" c | \n",
" B | \n",
" W | \n",
"
\n",
" \n",
" | 2 | \n",
" X | \n",
" z | \n",
" K | \n",
" I | \n",
"
\n",
" \n",
" | 3 | \n",
" Y | \n",
" a | \n",
" k | \n",
" u | \n",
"
\n",
" \n",
" | 4 | \n",
" Y | \n",
" D | \n",
" J | \n",
" j | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a1 b1 c1 d1\n",
"0 k p Z p\n",
"1 z c B W\n",
"2 X z K I\n",
"3 Y a k u\n",
"4 Y D J j"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[cols]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" b1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" k | \n",
" p | \n",
"
\n",
" \n",
" | 1 | \n",
" z | \n",
" c | \n",
"
\n",
" \n",
" | 2 | \n",
" X | \n",
" z | \n",
"
\n",
" \n",
" | 3 | \n",
" Y | \n",
" a | \n",
"
\n",
" \n",
" | 4 | \n",
" Y | \n",
" D | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a1 b1\n",
"0 k p\n",
"1 z c\n",
"2 X z\n",
"3 Y a\n",
"4 Y D"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.filter(items=[\"a1\", \"b1\"])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" b1 | \n",
" c1 | \n",
" d1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" k | \n",
" p | \n",
" Z | \n",
" p | \n",
"
\n",
" \n",
" | 1 | \n",
" z | \n",
" c | \n",
" B | \n",
" W | \n",
"
\n",
" \n",
" | 2 | \n",
" X | \n",
" z | \n",
" K | \n",
" I | \n",
"
\n",
" \n",
" | 3 | \n",
" Y | \n",
" a | \n",
" k | \n",
" u | \n",
"
\n",
" \n",
" | 4 | \n",
" Y | \n",
" D | \n",
" J | \n",
" j | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a1 b1 c1 d1\n",
"0 k p Z p\n",
"1 z c B W\n",
"2 X z K I\n",
"3 Y a k u\n",
"4 Y D J j"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.filter(regex=r\"^[a-d]1$\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a1 | \n",
" a2 | \n",
" b1 | \n",
" c1 | \n",
" b2 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" k | \n",
" U | \n",
" p | \n",
" Z | \n",
" G | \n",
"
\n",
" \n",
" | 1 | \n",
" z | \n",
" N | \n",
" c | \n",
" B | \n",
" B | \n",
"
\n",
" \n",
" | 2 | \n",
" X | \n",
" R | \n",
" z | \n",
" K | \n",
" B | \n",
"
\n",
" \n",
" | 3 | \n",
" Y | \n",
" r | \n",
" a | \n",
" k | \n",
" F | \n",
"
\n",
" \n",
" | 4 | \n",
" Y | \n",
" H | \n",
" D | \n",
" J | \n",
" J | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a1 a2 b1 c1 b2\n",
"0 k U p Z G\n",
"1 z N c B B\n",
"2 X R z K B\n",
"3 Y r a k F\n",
"4 Y H D J J"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.filter(regex=r\"^(a|b|c)[1-2]$\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# assign"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" a\n",
"0 h\n",
"1 e\n",
"2 l\n",
"3 l\n",
"4 o\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" a=list(\"hello\"),\n",
" )\n",
")\n",
"print(data.head())"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" a b\n",
"0 H 1\n",
"1 E 2\n",
"2 L 3\n",
"3 L 4\n",
"4 O 5\n"
]
}
],
"source": [
"data[\"a\"] = data[\"a\"].str.upper()\n",
"data[\"b\"] = list(range(1, 6))\n",
"print(data.head())"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" H | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" E | \n",
" 2 | \n",
"
\n",
" \n",
" | 2 | \n",
" L | \n",
" 3 | \n",
"
\n",
" \n",
" | 3 | \n",
" L | \n",
" 4 | \n",
"
\n",
" \n",
" | 4 | \n",
" O | \n",
" 5 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 H 1\n",
"1 E 2\n",
"2 L 3\n",
"3 L 4\n",
"4 O 5"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame(\n",
" dict(\n",
" a=list(\"hello\"),\n",
" )\n",
")\n",
"data.assign(\n",
" a=data[\"a\"].str.upper(),\n",
" b=list(range(1, 6)),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# eval & query"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" pid | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" O | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" L | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" k | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" F | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Z | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid pid\n",
"0 1 O\n",
"1 2 L\n",
"2 3 k\n",
"3 4 F\n",
"4 5 Z"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"import string\n",
"\n",
"import pandas as pd\n",
"\n",
"random.seed(3.14)\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" uid=list(range(1, 11)),\n",
" pid=random.choices(string.ascii_letters, k=10),\n",
" )\n",
")\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" pid | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 2 | \n",
" L | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" F | \n",
"
\n",
" \n",
" | 5 | \n",
" 6 | \n",
" u | \n",
"
\n",
" \n",
" | 7 | \n",
" 8 | \n",
" V | \n",
"
\n",
" \n",
" | 9 | \n",
" 10 | \n",
" m | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F\n",
"5 6 u\n",
"7 8 V\n",
"9 10 m"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[data[\"uid\"] % 2 == 0]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" pid | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 2 | \n",
" L | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" F | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# fmt:off\n",
"data.loc[\n",
" (data[\"uid\"] % 2 == 0)\n",
" & (data[\"pid\"].str.isupper())\n",
" & (data[\"pid\"] != \"V\")\n",
"]\n",
"# fmt:on"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" pid | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 2 | \n",
" L | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" F | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cond = data.eval(\n",
" \"\"\"\n",
" uid % 2 == 0 and \\\n",
" pid.str.isupper() and \\\n",
" pid != 'V'\n",
" \"\"\"\n",
")\n",
"data.loc[cond]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" pid | \n",
" code | \n",
" is_vip | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" O | \n",
" 1_O | \n",
" True | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" L | \n",
" 2_L | \n",
" True | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" k | \n",
" 3_k | \n",
" False | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" F | \n",
" 4_F | \n",
" True | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Z | \n",
" 5_Z | \n",
" True | \n",
"
\n",
" \n",
" | 5 | \n",
" 6 | \n",
" u | \n",
" 6_u | \n",
" False | \n",
"
\n",
" \n",
" | 6 | \n",
" 7 | \n",
" t | \n",
" 7_t | \n",
" False | \n",
"
\n",
" \n",
" | 7 | \n",
" 8 | \n",
" V | \n",
" 8_V | \n",
" True | \n",
"
\n",
" \n",
" | 8 | \n",
" 9 | \n",
" x | \n",
" 9_x | \n",
" False | \n",
"
\n",
" \n",
" | 9 | \n",
" 10 | \n",
" m | \n",
" 10_m | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid pid code is_vip\n",
"0 1 O 1_O True\n",
"1 2 L 2_L True\n",
"2 3 k 3_k False\n",
"3 4 F 4_F True\n",
"4 5 Z 5_Z True\n",
"5 6 u 6_u False\n",
"6 7 t 7_t False\n",
"7 8 V 8_V True\n",
"8 9 x 9_x False\n",
"9 10 m 10_m False"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.eval(\n",
" \"\"\"\n",
" uid = uid.astype('str')\n",
" code = uid.str.cat(pid, sep=\"_\")\n",
" is_vip = pid.str.isupper()\n",
" \"\"\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" pid | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 2 | \n",
" L | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" F | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.query(\n",
" \"\"\"\n",
" uid % 2 == 0 and \\\n",
" pid.str.isupper() and \\\n",
" pid != 'V'\n",
" \"\"\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" pid | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 2 | \n",
" L | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" F | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.query(\"\"\"uid % 2 == 0 and pid.str.isupper() and pid != 'V'\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"vscode": {
"interpreter": {
"hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}