{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "%config InlineBackend.figure_format = 'svg'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "\n", "plt.rc(\"figure\", figsize=(16, 8))\n", "plt.rc(\"font\", size=14)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "data = sm.datasets.statecrime.load_pandas()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
violentmurderhs_gradpovertysinglewhiteurban
state
Alabama459.97.182.117.529.070.048.65
Alaska632.63.291.49.025.568.344.46
Arizona423.25.584.216.525.780.080.07
Arkansas530.36.382.418.826.378.439.54
California473.45.480.614.227.862.789.73
\n", "
" ], "text/plain": [ " violent murder hs_grad poverty single white urban\n", "state \n", "Alabama 459.9 7.1 82.1 17.5 29.0 70.0 48.65\n", "Alaska 632.6 3.2 91.4 9.0 25.5 68.3 44.46\n", "Arizona 423.2 5.5 84.2 16.5 25.7 80.0 80.07\n", "Arkansas 530.3 6.3 82.4 18.8 26.3 78.4 39.54\n", "California 473.4 5.4 80.6 14.2 27.8 62.7 89.73" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.data.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 51 entries, Alabama to Wyoming\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 violent 51 non-null float64\n", " 1 murder 51 non-null float64\n", " 2 hs_grad 51 non-null float64\n", " 3 poverty 51 non-null float64\n", " 4 single 51 non-null float64\n", " 5 white 51 non-null float64\n", " 6 urban 51 non-null float64\n", "dtypes: float64(7)\n", "memory usage: 3.2+ KB\n" ] } ], "source": [ "data.data.info()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
urbanpovertyhs_gradsingle
state
Alabama48.6517.582.129.0
Alaska44.469.091.425.5
Arizona80.0716.584.225.7
Arkansas39.5418.882.426.3
California89.7314.280.627.8
\n", "
" ], "text/plain": [ " urban poverty hs_grad single\n", "state \n", "Alabama 48.65 17.5 82.1 29.0\n", "Alaska 44.46 9.0 91.4 25.5\n", "Arizona 80.07 16.5 84.2 25.7\n", "Arkansas 39.54 18.8 82.4 26.3\n", "California 89.73 14.2 80.6 27.8" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.exog.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "state\n", "Alabama 7.1\n", "Alaska 3.2\n", "Arizona 5.5\n", "Arkansas 6.3\n", "California 5.4\n", "Name: murder, dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.endog.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
consturbanpovertyhs_gradsingle
state
Alabama1.048.6517.582.129.0
Alaska1.044.469.091.425.5
Arizona1.080.0716.584.225.7
Arkansas1.039.5418.882.426.3
California1.089.7314.280.627.8
\n", "
" ], "text/plain": [ " const urban poverty hs_grad single\n", "state \n", "Alabama 1.0 48.65 17.5 82.1 29.0\n", "Alaska 1.0 44.46 9.0 91.4 25.5\n", "Arizona 1.0 80.07 16.5 84.2 25.7\n", "Arkansas 1.0 39.54 18.8 82.4 26.3\n", "California 1.0 89.73 14.2 80.6 27.8" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = data.exog\n", "X = sm.add_constant(X)\n", "X.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "Y = data.endog" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "crime_model = sm.OLS(Y, X).fit()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# equal to this:\n", "data_copy = data.data.copy()\n", "data_copy = sm.add_constant(data_copy)\n", "crime_model = sm.formula.ols(\n", " \"murder ~ urban + poverty + hs_grad + single + C(const)\",\n", " data=data_copy\n", ").fit()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: murder R-squared: 0.813\n", "Model: OLS Adj. R-squared: 0.797\n", "Method: Least Squares F-statistic: 50.08\n", "Date: Thu, 30 Jun 2022 Prob (F-statistic): 3.42e-16\n", "Time: 09:07:46 Log-Likelihood: -95.050\n", "No. Observations: 51 AIC: 200.1\n", "Df Residuals: 46 BIC: 209.8\n", "Df Model: 4 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept -44.1024 12.086 -3.649 0.001 -68.430 -19.774\n", "urban 0.0109 0.015 0.707 0.483 -0.020 0.042\n", "poverty 0.4121 0.140 2.939 0.005 0.130 0.694\n", "hs_grad 0.3059 0.117 2.611 0.012 0.070 0.542\n", "single 0.6374 0.070 9.065 0.000 0.496 0.779\n", "==============================================================================\n", "Omnibus: 1.618 Durbin-Watson: 2.507\n", "Prob(Omnibus): 0.445 Jarque-Bera (JB): 0.831\n", "Skew: -0.220 Prob(JB): 0.660\n", "Kurtosis: 3.445 Cond. No. 5.80e+03\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 5.8e+03. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n" ] } ], "source": [ "print(crime_model.summary())" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8132403052312949\n" ] } ], "source": [ "print(crime_model.rsquared)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Intercept -44.102416\n", "urban 0.010888\n", "poverty 0.412150\n", "hs_grad 0.305927\n", "single 0.637375\n", "dtype: float64\n" ] } ], "source": [ "print(crime_model.params)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "eval_env: 1\n", "eval_env: 1\n", "eval_env: 1\n", "eval_env: 1\n", "eval_env: 1\n" ] }, { "data": { "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:07:46.636984\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig = sm.graphics.plot_partregress_grid(crime_model)\n", "fig.tight_layout(pad=1.0)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "image/svg+xml": "\n\n\n \n \n \n \n 2022-06-30T09:07:46.860175\n image/svg+xml\n \n \n Matplotlib v3.5.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig = sm.graphics.plot_fit(crime_model, \"single\")\n", "fig.tight_layout(pad=1.0)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "vscode": { "interpreter": { "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6" } } }, "nbformat": 4, "nbformat_minor": 1 }