{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"%config InlineBackend.figure_format = 'svg'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import statsmodels.api as sm\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.rc(\"figure\", figsize=(16, 8))\n",
"plt.rc(\"font\", size=14)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"data = sm.datasets.statecrime.load_pandas()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" violent | \n",
" murder | \n",
" hs_grad | \n",
" poverty | \n",
" single | \n",
" white | \n",
" urban | \n",
"
\n",
" \n",
" | state | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | Alabama | \n",
" 459.9 | \n",
" 7.1 | \n",
" 82.1 | \n",
" 17.5 | \n",
" 29.0 | \n",
" 70.0 | \n",
" 48.65 | \n",
"
\n",
" \n",
" | Alaska | \n",
" 632.6 | \n",
" 3.2 | \n",
" 91.4 | \n",
" 9.0 | \n",
" 25.5 | \n",
" 68.3 | \n",
" 44.46 | \n",
"
\n",
" \n",
" | Arizona | \n",
" 423.2 | \n",
" 5.5 | \n",
" 84.2 | \n",
" 16.5 | \n",
" 25.7 | \n",
" 80.0 | \n",
" 80.07 | \n",
"
\n",
" \n",
" | Arkansas | \n",
" 530.3 | \n",
" 6.3 | \n",
" 82.4 | \n",
" 18.8 | \n",
" 26.3 | \n",
" 78.4 | \n",
" 39.54 | \n",
"
\n",
" \n",
" | California | \n",
" 473.4 | \n",
" 5.4 | \n",
" 80.6 | \n",
" 14.2 | \n",
" 27.8 | \n",
" 62.7 | \n",
" 89.73 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" violent murder hs_grad poverty single white urban\n",
"state \n",
"Alabama 459.9 7.1 82.1 17.5 29.0 70.0 48.65\n",
"Alaska 632.6 3.2 91.4 9.0 25.5 68.3 44.46\n",
"Arizona 423.2 5.5 84.2 16.5 25.7 80.0 80.07\n",
"Arkansas 530.3 6.3 82.4 18.8 26.3 78.4 39.54\n",
"California 473.4 5.4 80.6 14.2 27.8 62.7 89.73"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Index: 51 entries, Alabama to Wyoming\n",
"Data columns (total 7 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 violent 51 non-null float64\n",
" 1 murder 51 non-null float64\n",
" 2 hs_grad 51 non-null float64\n",
" 3 poverty 51 non-null float64\n",
" 4 single 51 non-null float64\n",
" 5 white 51 non-null float64\n",
" 6 urban 51 non-null float64\n",
"dtypes: float64(7)\n",
"memory usage: 3.2+ KB\n"
]
}
],
"source": [
"data.data.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" urban | \n",
" poverty | \n",
" hs_grad | \n",
" single | \n",
"
\n",
" \n",
" | state | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | Alabama | \n",
" 48.65 | \n",
" 17.5 | \n",
" 82.1 | \n",
" 29.0 | \n",
"
\n",
" \n",
" | Alaska | \n",
" 44.46 | \n",
" 9.0 | \n",
" 91.4 | \n",
" 25.5 | \n",
"
\n",
" \n",
" | Arizona | \n",
" 80.07 | \n",
" 16.5 | \n",
" 84.2 | \n",
" 25.7 | \n",
"
\n",
" \n",
" | Arkansas | \n",
" 39.54 | \n",
" 18.8 | \n",
" 82.4 | \n",
" 26.3 | \n",
"
\n",
" \n",
" | California | \n",
" 89.73 | \n",
" 14.2 | \n",
" 80.6 | \n",
" 27.8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" urban poverty hs_grad single\n",
"state \n",
"Alabama 48.65 17.5 82.1 29.0\n",
"Alaska 44.46 9.0 91.4 25.5\n",
"Arizona 80.07 16.5 84.2 25.7\n",
"Arkansas 39.54 18.8 82.4 26.3\n",
"California 89.73 14.2 80.6 27.8"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.exog.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"state\n",
"Alabama 7.1\n",
"Alaska 3.2\n",
"Arizona 5.5\n",
"Arkansas 6.3\n",
"California 5.4\n",
"Name: murder, dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.endog.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" const | \n",
" urban | \n",
" poverty | \n",
" hs_grad | \n",
" single | \n",
"
\n",
" \n",
" | state | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | Alabama | \n",
" 1.0 | \n",
" 48.65 | \n",
" 17.5 | \n",
" 82.1 | \n",
" 29.0 | \n",
"
\n",
" \n",
" | Alaska | \n",
" 1.0 | \n",
" 44.46 | \n",
" 9.0 | \n",
" 91.4 | \n",
" 25.5 | \n",
"
\n",
" \n",
" | Arizona | \n",
" 1.0 | \n",
" 80.07 | \n",
" 16.5 | \n",
" 84.2 | \n",
" 25.7 | \n",
"
\n",
" \n",
" | Arkansas | \n",
" 1.0 | \n",
" 39.54 | \n",
" 18.8 | \n",
" 82.4 | \n",
" 26.3 | \n",
"
\n",
" \n",
" | California | \n",
" 1.0 | \n",
" 89.73 | \n",
" 14.2 | \n",
" 80.6 | \n",
" 27.8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" const urban poverty hs_grad single\n",
"state \n",
"Alabama 1.0 48.65 17.5 82.1 29.0\n",
"Alaska 1.0 44.46 9.0 91.4 25.5\n",
"Arizona 1.0 80.07 16.5 84.2 25.7\n",
"Arkansas 1.0 39.54 18.8 82.4 26.3\n",
"California 1.0 89.73 14.2 80.6 27.8"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = data.exog\n",
"X = sm.add_constant(X)\n",
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"Y = data.endog"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"crime_model = sm.OLS(Y, X).fit()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# equal to this:\n",
"data_copy = data.data.copy()\n",
"data_copy = sm.add_constant(data_copy)\n",
"crime_model = sm.formula.ols(\n",
" \"murder ~ urban + poverty + hs_grad + single + C(const)\",\n",
" data=data_copy\n",
").fit()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: murder R-squared: 0.813\n",
"Model: OLS Adj. R-squared: 0.797\n",
"Method: Least Squares F-statistic: 50.08\n",
"Date: Thu, 30 Jun 2022 Prob (F-statistic): 3.42e-16\n",
"Time: 09:07:46 Log-Likelihood: -95.050\n",
"No. Observations: 51 AIC: 200.1\n",
"Df Residuals: 46 BIC: 209.8\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept -44.1024 12.086 -3.649 0.001 -68.430 -19.774\n",
"urban 0.0109 0.015 0.707 0.483 -0.020 0.042\n",
"poverty 0.4121 0.140 2.939 0.005 0.130 0.694\n",
"hs_grad 0.3059 0.117 2.611 0.012 0.070 0.542\n",
"single 0.6374 0.070 9.065 0.000 0.496 0.779\n",
"==============================================================================\n",
"Omnibus: 1.618 Durbin-Watson: 2.507\n",
"Prob(Omnibus): 0.445 Jarque-Bera (JB): 0.831\n",
"Skew: -0.220 Prob(JB): 0.660\n",
"Kurtosis: 3.445 Cond. No. 5.80e+03\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 5.8e+03. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n"
]
}
],
"source": [
"print(crime_model.summary())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.8132403052312949\n"
]
}
],
"source": [
"print(crime_model.rsquared)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Intercept -44.102416\n",
"urban 0.010888\n",
"poverty 0.412150\n",
"hs_grad 0.305927\n",
"single 0.637375\n",
"dtype: float64\n"
]
}
],
"source": [
"print(crime_model.params)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"eval_env: 1\n",
"eval_env: 1\n",
"eval_env: 1\n",
"eval_env: 1\n",
"eval_env: 1\n"
]
},
{
"data": {
"image/svg+xml": "\n\n\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = sm.graphics.plot_partregress_grid(crime_model)\n",
"fig.tight_layout(pad=1.0)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "\n\n\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = sm.graphics.plot_fit(crime_model, \"single\")\n",
"fig.tight_layout(pad=1.0)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"vscode": {
"interpreter": {
"hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
}
}
},
"nbformat": 4,
"nbformat_minor": 1
}