{ "cells": [ { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.preprocessing import LabelEncoder" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
appearancepatternspot_rangeechois_good
0brokenclearbigdepressing0
1roundclearbigpleasant1
2ovalbrightmiddleclear1
3roundbluredsmalldepressing0
\n", "
" ], "text/plain": [ " appearance pattern spot_range echo is_good\n", "0 broken clear big depressing 0\n", "1 round clear big pleasant 1\n", "2 oval bright middle clear 1\n", "3 round blured small depressing 0" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = [\n", " dict(\n", " appearance=\"broken\",\n", " pattern=\"clear\",\n", " spot_range=\"big\",\n", " echo=\"depressing\",\n", " is_good=0,\n", " ),\n", " dict(\n", " appearance=\"round\",\n", " pattern=\"clear\",\n", " spot_range=\"big\",\n", " echo=\"pleasant\",\n", " is_good=1,\n", " ),\n", " dict(\n", " appearance=\"oval\",\n", " pattern=\"bright\",\n", " spot_range=\"middle\",\n", " echo=\"clear\",\n", " is_good=1,\n", " ),\n", " dict(\n", " appearance=\"round\",\n", " pattern=\"blured\",\n", " spot_range=\"small\",\n", " echo=\"depressing\",\n", " is_good=0,\n", " ),\n", "]\n", "df = pd.DataFrame(data)\n", "df.head()\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C': 1.0,\n", " 'class_weight': None,\n", " 'dual': False,\n", " 'fit_intercept': True,\n", " 'intercept_scaling': 1,\n", " 'l1_ratio': None,\n", " 'max_iter': 100,\n", " 'multi_class': 'auto',\n", " 'n_jobs': None,\n", " 'penalty': 'l2',\n", " 'random_state': 0,\n", " 'solver': 'lbfgs',\n", " 'tol': 0.0001,\n", " 'verbose': 0,\n", " 'warm_start': False}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "encoder = LabelEncoder()\n", "model = LogisticRegression(random_state=0)\n", "\n", "X = df.drop([\"is_good\"], axis=1).apply(encoder.fit_transform)\n", "y = df[\"is_good\"].values\n", "\n", "estimator = model.fit(X, y)\n", "estimator.get_params()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
appearancepatternspot_rangeecho
00020
11111
22101
\n", "
" ], "text/plain": [ " appearance pattern spot_range echo\n", "0 0 0 2 0\n", "1 1 1 1 1\n", "2 2 1 0 1" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "unknown_data = [\n", " dict(\n", " appearance=\"broken\",\n", " pattern=\"blured\",\n", " spot_range=\"small\",\n", " echo=\"clear\",\n", " ),\n", " dict(\n", " appearance=\"oval\",\n", " pattern=\"clear\",\n", " spot_range=\"middle\",\n", " echo=\"depressing\",\n", " ),\n", " dict(\n", " appearance=\"round\",\n", " pattern=\"clear\",\n", " spot_range=\"big\",\n", " echo=\"depressing\",\n", " ),\n", "]\n", "records = pd.DataFrame(unknown_data)\n", "target = records.apply(encoder.fit_transform)\n", "target.head()\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/Bobot/pyenvs/pandas-startup/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
appearancepatternspot_rangeechois_good
0brokenbluredsmallclear0
1ovalclearmiddledepressing0
2roundclearbigdepressing1
\n", "
" ], "text/plain": [ " appearance pattern spot_range echo is_good\n", "0 broken blured small clear 0\n", "1 oval clear middle depressing 0\n", "2 round clear big depressing 1" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "records[\"is_good\"] = estimator.predict(target.values)\n", "records.head()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/Bobot/pyenvs/pandas-startup/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
appearancepatternspot_rangeechois_good
0brokenbluredsmallclear0
1ovalclearmiddledepressing0
2roundclearbigdepressing1
\n", "
" ], "text/plain": [ " appearance pattern spot_range echo is_good\n", "0 broken blured small clear 0\n", "1 oval clear middle depressing 0\n", "2 round clear big depressing 1" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "records[\"is_good\"] = estimator.predict(target.values)\n", "records.head()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/Bobot/pyenvs/pandas-startup/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "array([[0.74707445, 0.25292555],\n", " [0.56146879, 0.43853121],\n", " [0.38539327, 0.61460673]])" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "estimator.predict_proba(target.values)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f" }, "kernelspec": { "display_name": "Python 3.9.0 ('pandas-startup')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }