From e94d3af3be170589697016195f1ea8faa2fa4c1f Mon Sep 17 00:00:00 2001 From: 100gle <569590461@qq.com> Date: Thu, 30 Jun 2022 09:08:53 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9EVol19=E7=AB=A0?= =?UTF-8?q?=E8=8A=82=E7=A4=BA=E4=BE=8B=E6=BA=90=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/19/pyjanitor.ipynb | 1826 +++++++++++++++++++++++++++++++++++++ code/19/statsmodels.ipynb | 692 ++++++++++++++ 2 files changed, 2518 insertions(+) create mode 100644 code/19/pyjanitor.ipynb create mode 100644 code/19/statsmodels.ipynb diff --git a/code/19/pyjanitor.ipynb b/code/19/pyjanitor.ipynb new file mode 100644 index 0000000..5246159 --- /dev/null +++ b/code/19/pyjanitor.ipynb @@ -0,0 +1,1826 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Pyjanitor" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "可以将如下代码注释后并执行安装 Pyjanitor" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "#!pip install pyjanitor" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import janitor\n", + "\n", + "# or use `as` key word to set an alias.\n", + "import janitor as jn" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab231_
01a
13b
23c
\n", + "
" + ], + "text/plain": [ + " a b231_\n", + "0 1 a\n", + "1 3 b\n", + "2 3 c" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import janitor as jn\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [1, 3, 3],\n", + " \"b$231@!#_\": list(\"abc\"),\n", + " }\n", + ")\n", + "data.head()\n", + "data.pipe(jn.clean_names, remove_special=True)\n", + "data.clean_names(remove_special=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## coalesce" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
01<NA>
1<NA>1.3
23<NA>
3<NA><NA>
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 \n", + "1 1.3\n", + "2 3 \n", + "3 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# native pandas code\n", + "\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [1, pd.NA, 3, pd.NA],\n", + " \"b\": [pd.NA, 1.3, pd.NA, pd.NA],\n", + " }\n", + ")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def use_or_not(row):\n", + " a, b = pd.isna(row[\"a\"]), pd.isna(row[\"b\"])\n", + " if a and not b:\n", + " return row[\"b\"]\n", + " elif not a and b:\n", + " return row[\"a\"]\n", + " else:\n", + " return pd.NA" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
01<NA>1
1<NA>1.31.3
23<NA>3
3<NA><NA><NA>
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1 1\n", + "1 1.3 1.3\n", + "2 3 3\n", + "3 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"c\"] = data.apply(use_or_not, axis=1)\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
01<NA>1
1<NA>1.31.3
23<NA>3
3<NA><NA><NA>
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1 1\n", + "1 1.3 1.3\n", + "2 3 3\n", + "3 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# janitor code\n", + "import janitor\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [1, pd.NA, 3, pd.NA],\n", + " \"b\": [pd.NA, 1.3, pd.NA, pd.NA],\n", + " \"c\": [3, pd.NA, 2, 3.1],\n", + " }\n", + ")\n", + "data.coalesce(\"a\", \"b\", target_column_name=\"c\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
01<NA>3
1<NA>1.3<NA>
23<NA>2
3<NA><NA>3.1
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1 3\n", + "1 1.3 \n", + "2 3 2\n", + "3 3.1" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# DataFrame.bfill\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [1, pd.NA, 3, pd.NA],\n", + " \"b\": [pd.NA, 1.3, pd.NA, pd.NA],\n", + " \"c\": [3, pd.NA, 2, 3.1],\n", + " }\n", + ")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
01<NA>
11.31.3
23<NA>
3<NA><NA>
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 \n", + "1 1.3 1.3\n", + "2 3 \n", + "3 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.filter([\"a\", \"b\"]).bfill(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
01.03.03
11.31.3<NA>
23.02.02
33.13.13.1
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1.0 3.0 3\n", + "1 1.3 1.3 \n", + "2 3.0 2.0 2\n", + "3 3.1 3.1 3.1" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.filter([\"a\", \"b\", \"c\"]).bfill(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.0\n", + "1 1.3\n", + "2 3.0\n", + "3 3.1\n", + "Name: a, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.filter([\"a\", \"b\", \"c\"]).bfill(axis=1).iloc[:, 0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## case_when" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# native pandas code\n", + "import pandas as pd\n", + "\n", + "df = pd.DataFrame(\n", + " {\n", + " \"a\": [0, 0, 1, 2, \"hi\"],\n", + " \"b\": [0, 3, 4, 5, \"bye\"],\n", + " \"c\": [6, 7, 8, 9, \"wait\"],\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def case_when(row):\n", + " if (row[\"a\"] == 0 and row[\"b\"] != 0) or row[\"c\"] == \"wait\":\n", + " return row[\"a\"]\n", + " elif row[\"a\"] == 0 and row[\"b\"] == 0:\n", + " return \"x\"\n", + " else:\n", + " return row[\"c\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcnew_col
0006x
10370
21488
32599
4hibyewaithi
\n", + "
" + ], + "text/plain": [ + " a b c new_col\n", + "0 0 0 6 x\n", + "1 0 3 7 0\n", + "2 1 4 8 8\n", + "3 2 5 9 9\n", + "4 hi bye wait hi" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.assign(new_col=df.apply(case_when, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0006
1037
2148
3259
4hibyewait
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 0 6\n", + "1 0 3 7\n", + "2 1 4 8\n", + "3 2 5 9\n", + "4 hi bye wait" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# janitor code\n", + "import janitor\n", + "import pandas as pd\n", + "\n", + "df = pd.DataFrame(\n", + " {\n", + " \"a\": [0, 0, 1, 2, \"hi\"],\n", + " \"b\": [0, 3, 4, 5, \"bye\"],\n", + " \"c\": [6, 7, 8, 9, \"wait\"],\n", + " }\n", + ")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcnew_col
0006x
10370
21488
32599
4hibyewaithi
\n", + "
" + ], + "text/plain": [ + " a b c new_col\n", + "0 0 0 6 x\n", + "1 0 3 7 0\n", + "2 1 4 8 8\n", + "3 2 5 9 9\n", + "4 hi bye wait hi" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# fmt:off\n", + "df.case_when(\n", + " ((df.a == 0) & (df.b != 0)) | (df.c == \"wait\"), df.a,\n", + " (df.b == 0) & (df.a == 0), \"x\",\n", + " df.c,\n", + " column_name=\"new_col\",\n", + ")\n", + "# fmt:on" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## concatenate_columns & deconcatenate_column" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
no.prefixbase
01automatic
12decode
23embody
\n", + "
" + ], + "text/plain": [ + " no. prefix base\n", + "0 1 auto matic\n", + "1 2 de code\n", + "2 3 em body" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# native pandas code\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"no.\": [1, 2, 3],\n", + " \"prefix\": [\"auto\", \"de\", \"em\"],\n", + " \"base\": [\"matic\", \"code\", \"body\"],\n", + " }\n", + ")\n", + "\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 auto-matic\n", + "1 de-code\n", + "2 em-body\n", + "Name: prefix, dtype: object" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"prefix\"].str.cat(data[\"base\"], sep=\"-\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 auto-matic\n", + "1 de-code\n", + "2 em-body\n", + "dtype: object" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"prefix\"] + \"-\" + data[\"base\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1-auto-matic\n", + "1 2-de-code\n", + "2 3-em-body\n", + "Name: no., dtype: object" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " data[\"no.\"]\n", + " .astype(str)\n", + " .str.cat(data[\"prefix\"], sep=\"-\")\n", + " .str.cat(data[\"base\"], sep=\"-\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
no.prefixbaseword
01automatic1-auto-matic
12decode2-de-code
23embody3-em-body
\n", + "
" + ], + "text/plain": [ + " no. prefix base word\n", + "0 1 auto matic 1-auto-matic\n", + "1 2 de code 2-de-code\n", + "2 3 em body 3-em-body" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# janitor code\n", + "import janitor\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"no.\": [1, 2, 3],\n", + " \"prefix\": [\"auto\", \"de\", \"em\"],\n", + " \"base\": [\"matic\", \"code\", \"body\"],\n", + " }\n", + ")\n", + "data.concatenate_columns(\n", + " column_names=[\"no.\", \"prefix\", \"base\"],\n", + " new_column_name=\"word\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateyearmonthdayyear_month
02022-01-0220220102202201
12022-01-0920220109202201
22022-01-1620220116202201
32022-01-2320220123202201
42022-01-3020220130202201
\n", + "
" + ], + "text/plain": [ + " date year month day year_month\n", + "0 2022-01-02 2022 01 02 202201\n", + "1 2022-01-09 2022 01 09 202201\n", + "2 2022-01-16 2022 01 16 202201\n", + "3 2022-01-23 2022 01 23 202201\n", + "4 2022-01-30 2022 01 30 202201" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import janitor\n", + "import pandas as pd\n", + "(\n", + " pd.DataFrame({\"date\": pd.date_range(\"20220101\", \"20220201\", freq=\"1W\")})\n", + " .astype(str)\n", + " .deconcatenate_column(\"date\", new_column_names=[\"year\", \"month\", \"day\"], sep=\"-\")\n", + " .assign(year_month=lambda df: df[\"year\"].str.cat(df[\"month\"], sep=\"\"))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## join_apply" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
012
134
256
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 2\n", + "1 3 4\n", + "2 5 6" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# native pandas code\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [1, 3, 5],\n", + " \"b\": [2, 4, 6],\n", + " }\n", + ")\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0121.333333
1343.333333
2565.333333
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1 2 1.333333\n", + "1 3 4 3.333333\n", + "2 5 6 5.333333" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.assign(c=lambda df: df.apply(lambda row: (row[\"a\"] * 2 + row[\"b\"]) / 3, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0121.333333
1343.333333
2565.333333
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1 2 1.333333\n", + "1 3 4 3.333333\n", + "2 5 6 5.333333" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# equal this:\n", + "data.assign(\n", + " c=(data[\"a\"]*2+data[\"b\"])/3\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0121.333333
1343.333333
2565.333333
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 1 2 1.333333\n", + "1 3 4 3.333333\n", + "2 5 6 5.333333" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# janitor code\n", + "import janitor\n", + "import pandas as pd\n", + "\n", + "data = pd.DataFrame(\n", + " {\n", + " \"a\": [1, 3, 5],\n", + " \"b\": [2, 4, 6],\n", + " }\n", + ")\n", + "\n", + "data.join_apply(lambda row: (row[\"a\"] * 2 + row[\"b\"]) / 3, new_column_name=\"c\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "vscode": { + "interpreter": { + "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/code/19/statsmodels.ipynb b/code/19/statsmodels.ipynb new file mode 100644 index 0000000..649b1ab --- /dev/null +++ b/code/19/statsmodels.ipynb @@ -0,0 +1,692 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "%config InlineBackend.figure_format = 'svg'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import statsmodels.api as sm\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.rc(\"figure\", figsize=(16, 8))\n", + "plt.rc(\"font\", size=14)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data = sm.datasets.statecrime.load_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
violentmurderhs_gradpovertysinglewhiteurban
state
Alabama459.97.182.117.529.070.048.65
Alaska632.63.291.49.025.568.344.46
Arizona423.25.584.216.525.780.080.07
Arkansas530.36.382.418.826.378.439.54
California473.45.480.614.227.862.789.73
\n", + "
" + ], + "text/plain": [ + " violent murder hs_grad poverty single white urban\n", + "state \n", + "Alabama 459.9 7.1 82.1 17.5 29.0 70.0 48.65\n", + "Alaska 632.6 3.2 91.4 9.0 25.5 68.3 44.46\n", + "Arizona 423.2 5.5 84.2 16.5 25.7 80.0 80.07\n", + "Arkansas 530.3 6.3 82.4 18.8 26.3 78.4 39.54\n", + "California 473.4 5.4 80.6 14.2 27.8 62.7 89.73" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 51 entries, Alabama to Wyoming\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 violent 51 non-null float64\n", + " 1 murder 51 non-null float64\n", + " 2 hs_grad 51 non-null float64\n", + " 3 poverty 51 non-null float64\n", + " 4 single 51 non-null float64\n", + " 5 white 51 non-null float64\n", + " 6 urban 51 non-null float64\n", + "dtypes: float64(7)\n", + "memory usage: 3.2+ KB\n" + ] + } + ], + "source": [ + "data.data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
urbanpovertyhs_gradsingle
state
Alabama48.6517.582.129.0
Alaska44.469.091.425.5
Arizona80.0716.584.225.7
Arkansas39.5418.882.426.3
California89.7314.280.627.8
\n", + "
" + ], + "text/plain": [ + " urban poverty hs_grad single\n", + "state \n", + "Alabama 48.65 17.5 82.1 29.0\n", + "Alaska 44.46 9.0 91.4 25.5\n", + "Arizona 80.07 16.5 84.2 25.7\n", + "Arkansas 39.54 18.8 82.4 26.3\n", + "California 89.73 14.2 80.6 27.8" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.exog.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "state\n", + "Alabama 7.1\n", + "Alaska 3.2\n", + "Arizona 5.5\n", + "Arkansas 6.3\n", + "California 5.4\n", + "Name: murder, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.endog.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
consturbanpovertyhs_gradsingle
state
Alabama1.048.6517.582.129.0
Alaska1.044.469.091.425.5
Arizona1.080.0716.584.225.7
Arkansas1.039.5418.882.426.3
California1.089.7314.280.627.8
\n", + "
" + ], + "text/plain": [ + " const urban poverty hs_grad single\n", + "state \n", + "Alabama 1.0 48.65 17.5 82.1 29.0\n", + "Alaska 1.0 44.46 9.0 91.4 25.5\n", + "Arizona 1.0 80.07 16.5 84.2 25.7\n", + "Arkansas 1.0 39.54 18.8 82.4 26.3\n", + "California 1.0 89.73 14.2 80.6 27.8" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = data.exog\n", + "X = sm.add_constant(X)\n", + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "Y = data.endog" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "crime_model = sm.OLS(Y, X).fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# equal to this:\n", + "data_copy = data.data.copy()\n", + "data_copy = sm.add_constant(data_copy)\n", + "crime_model = sm.formula.ols(\n", + " \"murder ~ urban + poverty + hs_grad + single + C(const)\",\n", + " data=data_copy\n", + ").fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: murder R-squared: 0.813\n", + "Model: OLS Adj. R-squared: 0.797\n", + "Method: Least Squares F-statistic: 50.08\n", + "Date: Thu, 30 Jun 2022 Prob (F-statistic): 3.42e-16\n", + "Time: 09:07:46 Log-Likelihood: -95.050\n", + "No. Observations: 51 AIC: 200.1\n", + "Df Residuals: 46 BIC: 209.8\n", + "Df Model: 4 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Intercept -44.1024 12.086 -3.649 0.001 -68.430 -19.774\n", + "urban 0.0109 0.015 0.707 0.483 -0.020 0.042\n", + "poverty 0.4121 0.140 2.939 0.005 0.130 0.694\n", + "hs_grad 0.3059 0.117 2.611 0.012 0.070 0.542\n", + "single 0.6374 0.070 9.065 0.000 0.496 0.779\n", + "==============================================================================\n", + "Omnibus: 1.618 Durbin-Watson: 2.507\n", + "Prob(Omnibus): 0.445 Jarque-Bera (JB): 0.831\n", + "Skew: -0.220 Prob(JB): 0.660\n", + "Kurtosis: 3.445 Cond. No. 5.80e+03\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "[2] The condition number is large, 5.8e+03. This might indicate that there are\n", + "strong multicollinearity or other numerical problems.\n" + ] + } + ], + "source": [ + "print(crime_model.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8132403052312949\n" + ] + } + ], + "source": [ + "print(crime_model.rsquared)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Intercept -44.102416\n", + "urban 0.010888\n", + "poverty 0.412150\n", + "hs_grad 0.305927\n", + "single 0.637375\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "print(crime_model.params)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "eval_env: 1\n", + "eval_env: 1\n", + "eval_env: 1\n", + "eval_env: 1\n", + "eval_env: 1\n" + ] + }, + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:07:46.636984\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig = sm.graphics.plot_partregress_grid(crime_model)\n", + "fig.tight_layout(pad=1.0)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:07:46.860175\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig = sm.graphics.plot_fit(crime_model, \"single\")\n", + "fig.tight_layout(pad=1.0)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "vscode": { + "interpreter": { + "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}