feat: 新增Vol13章节示例源码及样例数据

This commit is contained in:
100gle
2022-06-02 10:55:59 +08:00
parent eed4eeb2dc
commit 588d70d94f
3 changed files with 2155 additions and 0 deletions

215
code/13/reshape_data.ipynb Normal file
View File

@@ -0,0 +1,215 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id 1992 1993 1994\n",
"0 a 1 3 5\n",
"1 b 2 4 6\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"wide = pd.DataFrame(\n",
" {\n",
" \"id\": [\"a\", \"b\"],\n",
" \"1992\": [1, 2],\n",
" \"1993\": [3, 4],\n",
" \"1994\": [5, 6],\n",
" }\n",
")\n",
"print(wide)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id year value\n",
"0 a 1992 1\n",
"1 b 1992 2\n",
"2 a 1993 3\n",
"3 b 1993 4\n",
"4 a 1994 5\n",
"5 b 1994 6\n"
]
}
],
"source": [
"melted = wide.melt(\n",
" id_vars=[\"id\"],\n",
" value_vars=[\"1992\", \"1993\", \"1994\"],\n",
" var_name=\"year\",\n",
" value_name=\"value\",\n",
")\n",
"print(melted)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [
{
"data": {
"text/plain": "year 1992 1993 1994\nid \na 1 3 5\nb 2 4 6",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>year</th>\n <th>1992</th>\n <th>1993</th>\n <th>1994</th>\n </tr>\n <tr>\n <th>id</th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>a</th>\n <td>1</td>\n <td>3</td>\n <td>5</td>\n </tr>\n <tr>\n <th>b</th>\n <td>2</td>\n <td>4</td>\n <td>6</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"melted.pivot(index=\"id\", columns=\"year\", values=\"value\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 14,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" platforms apps user_views\n",
"0 Windows Weibo 77769\n",
"1 Windows QQ 72028\n",
"2 Windows Wechat 21587\n",
"3 Windows Facebook 61832\n",
"4 Windows Twitter 99428\n",
"5 MacOS Weibo 40012\n",
"6 MacOS QQ 38287\n",
"7 MacOS Wechat 91048\n",
"8 MacOS Facebook 45890\n",
"9 MacOS Twitter 25002\n",
"10 iOS Weibo 81961\n",
"11 iOS QQ 91249\n",
"12 iOS Wechat 83411\n",
"13 iOS Facebook 94873\n",
"14 iOS Twitter 42350\n",
"15 Android Weibo 4867\n",
"16 Android QQ 60387\n",
"17 Android Wechat 22915\n",
"18 Android Facebook 62127\n",
"19 Android Twitter 8725\n",
"20 Web Weibo 44557\n",
"21 Web QQ 63616\n",
"22 Web Wechat 23107\n",
"23 Web Facebook 58033\n",
"24 Web Twitter 70295\n"
]
}
],
"source": [
"import random\n",
"from itertools import product\n",
"\n",
"import pandas as pd\n",
"\n",
"random.seed(3.14)\n",
"\n",
"platforms = [\"Windows\", \"MacOS\", \"iOS\", \"Android\", \"Web\"]\n",
"apps = [\"Weibo\", \"QQ\", \"Wechat\", \"Facebook\", \"Twitter\"]\n",
"\n",
"data = pd.DataFrame(list(product(platforms, apps)), columns=[\"platforms\", \"apps\"])\n",
"data[\"user_views\"] = random.choices(range(1000, 100000), k=data.shape[0])\n",
"print(data)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 15,
"outputs": [
{
"data": {
"text/plain": "apps Facebook QQ Twitter Wechat Weibo\nplatforms \nAndroid 62127 60387 8725 22915 4867\nMacOS 45890 38287 25002 91048 40012\nWeb 58033 63616 70295 23107 44557\nWindows 61832 72028 99428 21587 77769\niOS 94873 91249 42350 83411 81961",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>apps</th>\n <th>Facebook</th>\n <th>QQ</th>\n <th>Twitter</th>\n <th>Wechat</th>\n <th>Weibo</th>\n </tr>\n <tr>\n <th>platforms</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Android</th>\n <td>62127</td>\n <td>60387</td>\n <td>8725</td>\n <td>22915</td>\n <td>4867</td>\n </tr>\n <tr>\n <th>MacOS</th>\n <td>45890</td>\n <td>38287</td>\n <td>25002</td>\n <td>91048</td>\n <td>40012</td>\n </tr>\n <tr>\n <th>Web</th>\n <td>58033</td>\n <td>63616</td>\n <td>70295</td>\n <td>23107</td>\n <td>44557</td>\n </tr>\n <tr>\n <th>Windows</th>\n <td>61832</td>\n <td>72028</td>\n <td>99428</td>\n <td>21587</td>\n <td>77769</td>\n </tr>\n <tr>\n <th>iOS</th>\n <td>94873</td>\n <td>91249</td>\n <td>42350</td>\n <td>83411</td>\n <td>81961</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.pivot_table(\n",
" index=\"platforms\",\n",
" columns=\"apps\",\n",
" values=\"user_views\",\n",
" aggfunc=sum,\n",
" fill_value=0,\n",
")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 15,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

1048
code/13/tidy_data.ipynb Normal file

File diff suppressed because it is too large Load Diff