Files
sspai-100-hours-series-python/code/18/pandas_advanced_tricks.ipynb
2022-06-30 09:04:07 +08:00

2465 lines
54 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# applymap"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 4 6\n",
"1 4 3"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame(\n",
" dict(\n",
" a=[[13, 3, 5, 6], [2, 4, 65, 6]],\n",
" b=[[44, 5, 6, 88, 0.5, 1], [\"22\", 40, 100]],\n",
" )\n",
")\n",
"df.head()\n",
"df.applymap(lambda x: len(x))"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# pipe"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Complex is better than complicated.Flat is better than nested."
]
}
],
"source": [
"%%bash\n",
"echo \"\n",
"Beautiful is better than ugly.\n",
"Explicit is better than implicit.\n",
"Simple is better than complex.\n",
"Complex is better than complicated.\n",
"Flat is better than nested.\n",
"Sparse is better than dense.\" | \\\n",
"grep ed | \\\n",
"tr -d \"\\n\""
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## 管道操作模拟"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def echo():\n",
" poetry = \"\"\"\n",
"Beautiful is better than ugly.\n",
"Explicit is better than implicit.\n",
"Simple is better than complex.\n",
"Complex is better than complicated.\n",
"Flat is better than nested.\n",
"Sparse is better than dense.\"\n",
" \"\"\"\n",
" return poetry.strip()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def grep(content: str, pattern: str):\n",
" import re\n",
"\n",
" filtered = []\n",
" content = content.splitlines()\n",
" for line in content:\n",
" if re.search(pattern, line):\n",
" filtered.append(line)\n",
"\n",
" return \"\".join(filtered)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def tr(content: str, delete: bool, char: str):\n",
" final = []\n",
"\n",
" if delete:\n",
" content = content.splitlines()\n",
" for line in content:\n",
" new_line = line.replace(char, \"\")\n",
" final.append(new_line)\n",
" if final:\n",
" return \"\".join(final)\n",
"\n",
" return content"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'Complex is better than complicated.Flat is better than nested.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pipe line\n",
"tr(\n",
" content=grep(\n",
" content=echo(),\n",
" pattern=\"ed\",\n",
" ),\n",
" delete=True,\n",
" char=\"\\n\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## DataFrame.pipe 示例"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>e</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a\n",
"0 a\n",
"1 b\n",
"2 c\n",
"3 d\n",
"4 e"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" a=list(\"abcde\"),\n",
" )\n",
")\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def parse(df: pd.DataFrame):\n",
" length = df[\"a\"].shape[0]\n",
" df[\"b\"] = list(range(1, length + 1))\n",
" df[\"c\"] = [v.upper() if v in [\"a\", \"b\", \"c\"] else None for v in df[\"a\"]]\n",
"\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def upper(df: pd.DataFrame, cols=None):\n",
" if cols:\n",
" df[cols] = df[cols].astype(str).applymap(lambda s: s.upper())\n",
" return df\n",
" return df.applymap(lambda s: str(s).upper())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>1</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>B</td>\n",
" <td>2</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C</td>\n",
" <td>3</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>D</td>\n",
" <td>4</td>\n",
" <td>NONE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>E</td>\n",
" <td>5</td>\n",
" <td>NONE</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c\n",
"0 A 1 A\n",
"1 B 2 B\n",
"2 C 3 C\n",
"3 D 4 NONE\n",
"4 E 5 NONE"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.pipe(parse).pipe(upper)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>1</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>B</td>\n",
" <td>2</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>C</td>\n",
" <td>3</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>D</td>\n",
" <td>4</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>E</td>\n",
" <td>5</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c\n",
"0 A 1 A\n",
"1 B 2 B\n",
"2 C 3 C\n",
"3 D 4 None\n",
"4 E 5 None"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.pipe(parse).pipe(upper, cols=[\"a\"])"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# rename"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b'], dtype='object')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))\n",
"df = data.copy(deep=True) # 1\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'B'], dtype='object')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mapper = [\"A\", \"B\"]\n",
"df.columns = mapper\n",
"df\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 1 4\n",
"1 2 5\n",
"2 3 6"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = data.copy(deep=True)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['a', 'B']"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols = df.columns.tolist() # 1\n",
"cols[cols.index(\"b\")] = \"B\" # 2\n",
"cols # 3"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'B'], dtype='object')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns = cols # 4\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>B</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a B\n",
"0 1 4\n",
"1 2 5\n",
"2 3 6"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = data.copy(deep=True)\n",
"df.rename(columns={\"b\": \"B\"}) # 1"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b'], dtype='object')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>B</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a B\n",
"0 1 4\n",
"1 2 5\n",
"2 3 6"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.rename(columns=lambda v: \"B\" if v == \"b\" else v)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# filter"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a1</th>\n",
" <th>a2</th>\n",
" <th>a3</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <th>d1</th>\n",
" <th>b2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>U</td>\n",
" <td>H</td>\n",
" <td>p</td>\n",
" <td>Z</td>\n",
" <td>p</td>\n",
" <td>G</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>z</td>\n",
" <td>N</td>\n",
" <td>A</td>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>W</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>X</td>\n",
" <td>R</td>\n",
" <td>B</td>\n",
" <td>z</td>\n",
" <td>K</td>\n",
" <td>I</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Y</td>\n",
" <td>r</td>\n",
" <td>y</td>\n",
" <td>a</td>\n",
" <td>k</td>\n",
" <td>u</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Y</td>\n",
" <td>H</td>\n",
" <td>Q</td>\n",
" <td>D</td>\n",
" <td>J</td>\n",
" <td>j</td>\n",
" <td>J</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a1 a2 a3 b1 c1 d1 b2\n",
"0 k U H p Z p G\n",
"1 z N A c B W B\n",
"2 X R B z K I B\n",
"3 Y r y a k u F\n",
"4 Y H Q D J j J"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"import string\n",
"\n",
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" a1=random.choices(string.ascii_letters, k=5),\n",
" a2=random.choices(string.ascii_letters, k=5),\n",
" a3=random.choices(string.ascii_letters, k=5),\n",
" b1=random.choices(string.ascii_letters, k=5),\n",
" c1=random.choices(string.ascii_letters, k=5),\n",
" d1=random.choices(string.ascii_letters, k=5),\n",
" b2=random.choices(string.ascii_letters, k=5),\n",
" )\n",
")\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a1</th>\n",
" <th>b1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>p</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>z</td>\n",
" <td>c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>X</td>\n",
" <td>z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Y</td>\n",
" <td>a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Y</td>\n",
" <td>D</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a1 b1\n",
"0 k p\n",
"1 z c\n",
"2 X z\n",
"3 Y a\n",
"4 Y D"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[:, [\"a1\", \"b1\"]]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a1</th>\n",
" <th>b1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>p</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>z</td>\n",
" <td>c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>X</td>\n",
" <td>z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Y</td>\n",
" <td>a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Y</td>\n",
" <td>D</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a1 b1\n",
"0 k p\n",
"1 z c\n",
"2 X z\n",
"3 Y a\n",
"4 Y D"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[[\"a1\", \"b1\"]]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['a1', 'a2', 'a3', 'b1', 'c1', 'd1', 'b2']\n"
]
}
],
"source": [
"cols = data.columns.tolist()\n",
"print(cols)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['a1', 'b1', 'c1', 'd1']\n"
]
}
],
"source": [
"cols = [col for col in cols if col.endswith(\"1\")]\n",
"print(cols)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a1</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <th>d1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>p</td>\n",
" <td>Z</td>\n",
" <td>p</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>z</td>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>W</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>X</td>\n",
" <td>z</td>\n",
" <td>K</td>\n",
" <td>I</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Y</td>\n",
" <td>a</td>\n",
" <td>k</td>\n",
" <td>u</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Y</td>\n",
" <td>D</td>\n",
" <td>J</td>\n",
" <td>j</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a1 b1 c1 d1\n",
"0 k p Z p\n",
"1 z c B W\n",
"2 X z K I\n",
"3 Y a k u\n",
"4 Y D J j"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[cols]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a1</th>\n",
" <th>b1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>p</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>z</td>\n",
" <td>c</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>X</td>\n",
" <td>z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Y</td>\n",
" <td>a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Y</td>\n",
" <td>D</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a1 b1\n",
"0 k p\n",
"1 z c\n",
"2 X z\n",
"3 Y a\n",
"4 Y D"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.filter(items=[\"a1\", \"b1\"])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a1</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <th>d1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>p</td>\n",
" <td>Z</td>\n",
" <td>p</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>z</td>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>W</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>X</td>\n",
" <td>z</td>\n",
" <td>K</td>\n",
" <td>I</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Y</td>\n",
" <td>a</td>\n",
" <td>k</td>\n",
" <td>u</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Y</td>\n",
" <td>D</td>\n",
" <td>J</td>\n",
" <td>j</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a1 b1 c1 d1\n",
"0 k p Z p\n",
"1 z c B W\n",
"2 X z K I\n",
"3 Y a k u\n",
"4 Y D J j"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.filter(regex=r\"^[a-d]1$\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a1</th>\n",
" <th>a2</th>\n",
" <th>b1</th>\n",
" <th>c1</th>\n",
" <th>b2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>U</td>\n",
" <td>p</td>\n",
" <td>Z</td>\n",
" <td>G</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>z</td>\n",
" <td>N</td>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>X</td>\n",
" <td>R</td>\n",
" <td>z</td>\n",
" <td>K</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Y</td>\n",
" <td>r</td>\n",
" <td>a</td>\n",
" <td>k</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Y</td>\n",
" <td>H</td>\n",
" <td>D</td>\n",
" <td>J</td>\n",
" <td>J</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a1 a2 b1 c1 b2\n",
"0 k U p Z G\n",
"1 z N c B B\n",
"2 X R z K B\n",
"3 Y r a k F\n",
"4 Y H D J J"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.filter(regex=r\"^(a|b|c)[1-2]$\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# assign"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" a\n",
"0 h\n",
"1 e\n",
"2 l\n",
"3 l\n",
"4 o\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" a=list(\"hello\"),\n",
" )\n",
")\n",
"print(data.head())"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" a b\n",
"0 H 1\n",
"1 E 2\n",
"2 L 3\n",
"3 L 4\n",
"4 O 5\n"
]
}
],
"source": [
"data[\"a\"] = data[\"a\"].str.upper()\n",
"data[\"b\"] = list(range(1, 6))\n",
"print(data.head())"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>H</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>E</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>L</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>L</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>O</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 H 1\n",
"1 E 2\n",
"2 L 3\n",
"3 L 4\n",
"4 O 5"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame(\n",
" dict(\n",
" a=list(\"hello\"),\n",
" )\n",
")\n",
"data.assign(\n",
" a=data[\"a\"].str.upper(),\n",
" b=list(range(1, 6)),\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# eval & query"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>pid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>O</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>k</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Z</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid pid\n",
"0 1 O\n",
"1 2 L\n",
"2 3 k\n",
"3 4 F\n",
"4 5 Z"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"import string\n",
"\n",
"import pandas as pd\n",
"\n",
"random.seed(3.14)\n",
"\n",
"data = pd.DataFrame(\n",
" dict(\n",
" uid=list(range(1, 11)),\n",
" pid=random.choices(string.ascii_letters, k=10),\n",
" )\n",
")\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>pid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>u</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>V</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>m</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F\n",
"5 6 u\n",
"7 8 V\n",
"9 10 m"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[data[\"uid\"] % 2 == 0]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>pid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# fmt:off\n",
"data.loc[\n",
" (data[\"uid\"] % 2 == 0)\n",
" & (data[\"pid\"].str.isupper())\n",
" & (data[\"pid\"] != \"V\")\n",
"]\n",
"# fmt:on"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>pid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cond = data.eval(\n",
" \"\"\"\n",
" uid % 2 == 0 and \\\n",
" pid.str.isupper() and \\\n",
" pid != 'V'\n",
" \"\"\"\n",
")\n",
"data.loc[cond]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>pid</th>\n",
" <th>code</th>\n",
" <th>is_vip</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>O</td>\n",
" <td>1_O</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>L</td>\n",
" <td>2_L</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>k</td>\n",
" <td>3_k</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>F</td>\n",
" <td>4_F</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Z</td>\n",
" <td>5_Z</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>u</td>\n",
" <td>6_u</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>t</td>\n",
" <td>7_t</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>V</td>\n",
" <td>8_V</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>x</td>\n",
" <td>9_x</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>m</td>\n",
" <td>10_m</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid pid code is_vip\n",
"0 1 O 1_O True\n",
"1 2 L 2_L True\n",
"2 3 k 3_k False\n",
"3 4 F 4_F True\n",
"4 5 Z 5_Z True\n",
"5 6 u 6_u False\n",
"6 7 t 7_t False\n",
"7 8 V 8_V True\n",
"8 9 x 9_x False\n",
"9 10 m 10_m False"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.eval(\n",
" \"\"\"\n",
" uid = uid.astype('str')\n",
" code = uid.str.cat(pid, sep=\"_\")\n",
" is_vip = pid.str.isupper()\n",
" \"\"\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>pid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.query(\n",
" \"\"\"\n",
" uid % 2 == 0 and \\\n",
" pid.str.isupper() and \\\n",
" pid != 'V'\n",
" \"\"\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>pid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid pid\n",
"1 2 L\n",
"3 4 F"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.query(\"\"\"uid % 2 == 0 and pid.str.isupper() and pid != 'V'\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"vscode": {
"interpreter": {
"hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}