sspai-100-hours-series-python/projects/machine-learning/mock_ml_model_for_choosing_watermelon.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.preprocessing import LabelEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>appearance</th>\n",
       "      <th>pattern</th>\n",
       "      <th>spot_range</th>\n",
       "      <th>echo</th>\n",
       "      <th>is_good</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>broken</td>\n",
       "      <td>clear</td>\n",
       "      <td>big</td>\n",
       "      <td>depressing</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>round</td>\n",
       "      <td>clear</td>\n",
       "      <td>big</td>\n",
       "      <td>pleasant</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>oval</td>\n",
       "      <td>bright</td>\n",
       "      <td>middle</td>\n",
       "      <td>clear</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>round</td>\n",
       "      <td>blured</td>\n",
       "      <td>small</td>\n",
       "      <td>depressing</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  appearance pattern spot_range        echo  is_good\n",
       "0     broken   clear        big  depressing        0\n",
       "1      round   clear        big    pleasant        1\n",
       "2       oval  bright     middle       clear        1\n",
       "3      round  blured      small  depressing        0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = [\n",
    "    dict(\n",
    "        appearance=\"broken\",\n",
    "        pattern=\"clear\",\n",
    "        spot_range=\"big\",\n",
    "        echo=\"depressing\",\n",
    "        is_good=0,\n",
    "    ),\n",
    "    dict(\n",
    "        appearance=\"round\",\n",
    "        pattern=\"clear\",\n",
    "        spot_range=\"big\",\n",
    "        echo=\"pleasant\",\n",
    "        is_good=1,\n",
    "    ),\n",
    "    dict(\n",
    "        appearance=\"oval\",\n",
    "        pattern=\"bright\",\n",
    "        spot_range=\"middle\",\n",
    "        echo=\"clear\",\n",
    "        is_good=1,\n",
    "    ),\n",
    "    dict(\n",
    "        appearance=\"round\",\n",
    "        pattern=\"blured\",\n",
    "        spot_range=\"small\",\n",
    "        echo=\"depressing\",\n",
    "        is_good=0,\n",
    "    ),\n",
    "]\n",
    "df = pd.DataFrame(data)\n",
    "df.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'C': 1.0,\n",
       " 'class_weight': None,\n",
       " 'dual': False,\n",
       " 'fit_intercept': True,\n",
       " 'intercept_scaling': 1,\n",
       " 'l1_ratio': None,\n",
       " 'max_iter': 100,\n",
       " 'multi_class': 'auto',\n",
       " 'n_jobs': None,\n",
       " 'penalty': 'l2',\n",
       " 'random_state': 0,\n",
       " 'solver': 'lbfgs',\n",
       " 'tol': 0.0001,\n",
       " 'verbose': 0,\n",
       " 'warm_start': False}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "encoder = LabelEncoder()\n",
    "model = LogisticRegression(random_state=0)\n",
    "\n",
    "X = df.drop([\"is_good\"], axis=1).apply(encoder.fit_transform)\n",
    "y = df[\"is_good\"].values\n",
    "\n",
    "estimator = model.fit(X, y)\n",
    "estimator.get_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>appearance</th>\n",
       "      <th>pattern</th>\n",
       "      <th>spot_range</th>\n",
       "      <th>echo</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   appearance  pattern  spot_range  echo\n",
       "0           0        0           2     0\n",
       "1           1        1           1     1\n",
       "2           2        1           0     1"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unknown_data = [\n",
    "    dict(\n",
    "        appearance=\"broken\",\n",
    "        pattern=\"blured\",\n",
    "        spot_range=\"small\",\n",
    "        echo=\"clear\",\n",
    "    ),\n",
    "    dict(\n",
    "        appearance=\"oval\",\n",
    "        pattern=\"clear\",\n",
    "        spot_range=\"middle\",\n",
    "        echo=\"depressing\",\n",
    "    ),\n",
    "    dict(\n",
    "        appearance=\"round\",\n",
    "        pattern=\"clear\",\n",
    "        spot_range=\"big\",\n",
    "        echo=\"depressing\",\n",
    "    ),\n",
    "]\n",
    "records = pd.DataFrame(unknown_data)\n",
    "target = records.apply(encoder.fit_transform)\n",
    "target.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/Bobot/pyenvs/pandas-startup/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>appearance</th>\n",
       "      <th>pattern</th>\n",
       "      <th>spot_range</th>\n",
       "      <th>echo</th>\n",
       "      <th>is_good</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>broken</td>\n",
       "      <td>blured</td>\n",
       "      <td>small</td>\n",
       "      <td>clear</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>oval</td>\n",
       "      <td>clear</td>\n",
       "      <td>middle</td>\n",
       "      <td>depressing</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>round</td>\n",
       "      <td>clear</td>\n",
       "      <td>big</td>\n",
       "      <td>depressing</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  appearance pattern spot_range        echo  is_good\n",
       "0     broken  blured      small       clear        0\n",
       "1       oval   clear     middle  depressing        0\n",
       "2      round   clear        big  depressing        1"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "records[\"is_good\"] = estimator.predict(target.values)\n",
    "records.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/Bobot/pyenvs/pandas-startup/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>appearance</th>\n",
       "      <th>pattern</th>\n",
       "      <th>spot_range</th>\n",
       "      <th>echo</th>\n",
       "      <th>is_good</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>broken</td>\n",
       "      <td>blured</td>\n",
       "      <td>small</td>\n",
       "      <td>clear</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>oval</td>\n",
       "      <td>clear</td>\n",
       "      <td>middle</td>\n",
       "      <td>depressing</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>round</td>\n",
       "      <td>clear</td>\n",
       "      <td>big</td>\n",
       "      <td>depressing</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  appearance pattern spot_range        echo  is_good\n",
       "0     broken  blured      small       clear        0\n",
       "1       oval   clear     middle  depressing        0\n",
       "2      round   clear        big  depressing        1"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "records[\"is_good\"] = estimator.predict(target.values)\n",
    "records.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/Bobot/pyenvs/pandas-startup/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[0.74707445, 0.25292555],\n",
       "       [0.56146879, 0.43853121],\n",
       "       [0.38539327, 0.61460673]])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "estimator.predict_proba(target.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f"
  },
  "kernelspec": {
   "display_name": "Python 3.9.0 ('pandas-startup')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}