diff --git a/code/17/groupby.ipynb b/code/17/groupby.ipynb
new file mode 100644
index 0000000..5a2739c
--- /dev/null
+++ b/code/17/groupby.ipynb
@@ -0,0 +1,2032 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "%config InlineBackend.figure_formats = ['svg']\n",
+ "\n",
+ "import random\n",
+ "\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# GroupBy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "size\n",
+ "M 97\n",
+ "XL 90\n",
+ "XL 32\n",
+ "M 77\n",
+ "XL 39\n",
+ "Name: number, dtype: int64"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "random.seed(233)\n",
+ "\n",
+ "number = [random.randint(10, 100) for _ in range(10)]\n",
+ "size = [random.choices([\"M\", \"L\", \"XL\"], k=10)]\n",
+ "\n",
+ "s = pd.Series(number, index=size, name=\"number\").rename_axis(\"size\")\n",
+ "s.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "size\n",
+ "L 23\n",
+ "M 230\n",
+ "XL 312\n",
+ "Name: number, dtype: int64"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "s.groupby(level=0).sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " size | \n",
+ " number | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " M | \n",
+ " 97 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XL | \n",
+ " 90 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " XL | \n",
+ " 32 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " M | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " XL | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " size number\n",
+ "0 M 97\n",
+ "1 XL 90\n",
+ "2 XL 32\n",
+ "3 M 77\n",
+ "4 XL 39"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = s.to_frame().reset_index()\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "size\n",
+ "L 23\n",
+ "M 230\n",
+ "XL 312\n",
+ "Name: number, dtype: int64"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.groupby(\"size\")[\"number\"].sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "### GroupBy 的一些使用场景"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/0t/s0c95rbs6ds7w_b0d471p0kc0000gn/T/ipykernel_9426/460248279.py:2: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n",
+ " iris.columns = iris.columns.str.replace(\".\", \"_\")\n"
+ ]
+ }
+ ],
+ "source": [
+ "iris = pd.read_csv(\"../../data/iris.csv\")\n",
+ "iris.columns = iris.columns.str.replace(\".\", \"_\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 150 entries, 0 to 149\n",
+ "Data columns (total 5 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 sepal_length 150 non-null float64\n",
+ " 1 sepal_width 150 non-null float64\n",
+ " 2 petal_length 150 non-null float64\n",
+ " 3 petal_width 150 non-null float64\n",
+ " 4 variety 150 non-null object \n",
+ "dtypes: float64(4), object(1)\n",
+ "memory usage: 6.0+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "iris.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " variety | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 4.9 | \n",
+ " 3.0 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 4.7 | \n",
+ " 3.2 | \n",
+ " 1.3 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4.6 | \n",
+ " 3.1 | \n",
+ " 1.5 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5.0 | \n",
+ " 3.6 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width variety\n",
+ "0 5.1 3.5 1.4 0.2 Setosa\n",
+ "1 4.9 3.0 1.4 0.2 Setosa\n",
+ "2 4.7 3.2 1.3 0.2 Setosa\n",
+ "3 4.6 3.1 1.5 0.2 Setosa\n",
+ "4 5.0 3.6 1.4 0.2 Setosa"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "iris.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### 自定义函数"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " variety | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Setosa | \n",
+ " 32.5 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Setosa | \n",
+ " 7.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Setosa | \n",
+ " 15.5 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Setosa | \n",
+ " 8.5 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Setosa | \n",
+ " 36.0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 145 | \n",
+ " Virginica | \n",
+ " 26.0 | \n",
+ "
\n",
+ " \n",
+ " | 146 | \n",
+ " Virginica | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " | 147 | \n",
+ " Virginica | \n",
+ " 23.0 | \n",
+ "
\n",
+ " \n",
+ " | 148 | \n",
+ " Virginica | \n",
+ " 43.0 | \n",
+ "
\n",
+ " \n",
+ " | 149 | \n",
+ " Virginica | \n",
+ " 18.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
150 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " variety weight\n",
+ "0 Setosa 32.5\n",
+ "1 Setosa 7.0\n",
+ "2 Setosa 15.5\n",
+ "3 Setosa 8.5\n",
+ "4 Setosa 36.0\n",
+ ".. ... ...\n",
+ "145 Virginica 26.0\n",
+ "146 Virginica 4.0\n",
+ "147 Virginica 23.0\n",
+ "148 Virginica 43.0\n",
+ "149 Virginica 18.0\n",
+ "\n",
+ "[150 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def rank_by_sepal(group):\n",
+ " df = group.copy()\n",
+ " df[\"weight\"] = 0.8*df[\"sepal_width\"] + 0.2*df[\"sepal_length\"]\n",
+ " return df[\"weight\"].rank()\n",
+ "\n",
+ "\n",
+ "iris.groupby(\"variety\").apply(rank_by_sepal).reset_index(level=0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " variety | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Setosa | \n",
+ " 32.5 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Setosa | \n",
+ " 7.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Setosa | \n",
+ " 15.5 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Setosa | \n",
+ " 8.5 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Setosa | \n",
+ " 36.0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 145 | \n",
+ " Virginica | \n",
+ " 26.0 | \n",
+ "
\n",
+ " \n",
+ " | 146 | \n",
+ " Virginica | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " | 147 | \n",
+ " Virginica | \n",
+ " 23.0 | \n",
+ "
\n",
+ " \n",
+ " | 148 | \n",
+ " Virginica | \n",
+ " 43.0 | \n",
+ "
\n",
+ " \n",
+ " | 149 | \n",
+ " Virginica | \n",
+ " 18.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
150 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " variety weight\n",
+ "0 Setosa 32.5\n",
+ "1 Setosa 7.0\n",
+ "2 Setosa 15.5\n",
+ "3 Setosa 8.5\n",
+ "4 Setosa 36.0\n",
+ ".. ... ...\n",
+ "145 Virginica 26.0\n",
+ "146 Virginica 4.0\n",
+ "147 Virginica 23.0\n",
+ "148 Virginica 43.0\n",
+ "149 Virginica 18.0\n",
+ "\n",
+ "[150 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "iris.groupby(\"variety\").apply(lambda grp: rank_by_sepal(grp)).reset_index(level=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### 分组排序求前 TOP N"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " variety | \n",
+ "
\n",
+ " \n",
+ " | variety | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Setosa | \n",
+ " 14 | \n",
+ " 5.8 | \n",
+ " 4.0 | \n",
+ " 1.2 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 5.7 | \n",
+ " 4.4 | \n",
+ " 1.5 | \n",
+ " 0.4 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | Versicolor | \n",
+ " 50 | \n",
+ " 7.0 | \n",
+ " 3.2 | \n",
+ " 4.7 | \n",
+ " 1.4 | \n",
+ " Versicolor | \n",
+ "
\n",
+ " \n",
+ " | 52 | \n",
+ " 6.9 | \n",
+ " 3.1 | \n",
+ " 4.9 | \n",
+ " 1.5 | \n",
+ " Versicolor | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " 131 | \n",
+ " 7.9 | \n",
+ " 3.8 | \n",
+ " 6.4 | \n",
+ " 2.0 | \n",
+ " Virginica | \n",
+ "
\n",
+ " \n",
+ " | 117 | \n",
+ " 7.7 | \n",
+ " 3.8 | \n",
+ " 6.7 | \n",
+ " 2.2 | \n",
+ " Virginica | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width \\\n",
+ "variety \n",
+ "Setosa 14 5.8 4.0 1.2 0.2 \n",
+ " 15 5.7 4.4 1.5 0.4 \n",
+ "Versicolor 50 7.0 3.2 4.7 1.4 \n",
+ " 52 6.9 3.1 4.9 1.5 \n",
+ "Virginica 131 7.9 3.8 6.4 2.0 \n",
+ " 117 7.7 3.8 6.7 2.2 \n",
+ "\n",
+ " variety \n",
+ "variety \n",
+ "Setosa 14 Setosa \n",
+ " 15 Setosa \n",
+ "Versicolor 50 Versicolor \n",
+ " 52 Versicolor \n",
+ "Virginica 131 Virginica \n",
+ " 117 Virginica "
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "iris.groupby(\"variety\").apply(\n",
+ " lambda grp: grp.sort_values([\"sepal_length\", \"sepal_width\"], ascending=False)\n",
+ " .head(2),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " variety | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 5.1 | \n",
+ " 3.5 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 4.9 | \n",
+ " 3.0 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 50 | \n",
+ " 7.0 | \n",
+ " 3.2 | \n",
+ " 4.7 | \n",
+ " 1.4 | \n",
+ " Versicolor | \n",
+ "
\n",
+ " \n",
+ " | 51 | \n",
+ " 6.4 | \n",
+ " 3.2 | \n",
+ " 4.5 | \n",
+ " 1.5 | \n",
+ " Versicolor | \n",
+ "
\n",
+ " \n",
+ " | 100 | \n",
+ " 6.3 | \n",
+ " 3.3 | \n",
+ " 6.0 | \n",
+ " 2.5 | \n",
+ " Virginica | \n",
+ "
\n",
+ " \n",
+ " | 101 | \n",
+ " 5.8 | \n",
+ " 2.7 | \n",
+ " 5.1 | \n",
+ " 1.9 | \n",
+ " Virginica | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width variety\n",
+ "0 5.1 3.5 1.4 0.2 Setosa\n",
+ "1 4.9 3.0 1.4 0.2 Setosa\n",
+ "50 7.0 3.2 4.7 1.4 Versicolor\n",
+ "51 6.4 3.2 4.5 1.5 Versicolor\n",
+ "100 6.3 3.3 6.0 2.5 Virginica\n",
+ "101 5.8 2.7 5.1 1.9 Virginica"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "iris.groupby(\"variety\").head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ " variety | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 49 | \n",
+ " 5.0 | \n",
+ " 3.3 | \n",
+ " 1.4 | \n",
+ " 0.2 | \n",
+ " Setosa | \n",
+ "
\n",
+ " \n",
+ " | 99 | \n",
+ " 5.7 | \n",
+ " 2.8 | \n",
+ " 4.1 | \n",
+ " 1.3 | \n",
+ " Versicolor | \n",
+ "
\n",
+ " \n",
+ " | 149 | \n",
+ " 5.9 | \n",
+ " 3.0 | \n",
+ " 5.1 | \n",
+ " 1.8 | \n",
+ " Virginica | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width variety\n",
+ "49 5.0 3.3 1.4 0.2 Setosa\n",
+ "99 5.7 2.8 4.1 1.3 Versicolor\n",
+ "149 5.9 3.0 5.1 1.8 Virginica"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "iris.groupby(\"variety\").tail(1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### 分组聚合"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sepal_length | \n",
+ " sepal_width | \n",
+ " petal_length | \n",
+ " petal_width | \n",
+ "
\n",
+ " \n",
+ " | variety | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Setosa | \n",
+ " 0.266674 | \n",
+ " 0.189941 | \n",
+ " -0.357011 | \n",
+ " -0.436492 | \n",
+ "
\n",
+ " \n",
+ " | Setosa | \n",
+ " -0.300718 | \n",
+ " -1.129096 | \n",
+ " -0.357011 | \n",
+ " -0.436492 | \n",
+ "
\n",
+ " \n",
+ " | Setosa | \n",
+ " -0.868111 | \n",
+ " -0.601481 | \n",
+ " -0.932836 | \n",
+ " -0.436492 | \n",
+ "
\n",
+ " \n",
+ " | Setosa | \n",
+ " -1.151807 | \n",
+ " -0.865288 | \n",
+ " 0.218813 | \n",
+ " -0.436492 | \n",
+ "
\n",
+ " \n",
+ " | Setosa | \n",
+ " -0.017022 | \n",
+ " 0.453749 | \n",
+ " -0.357011 | \n",
+ " -0.436492 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " 0.176134 | \n",
+ " 0.080621 | \n",
+ " -0.637803 | \n",
+ " 0.997633 | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " -0.452916 | \n",
+ " -1.469783 | \n",
+ " -1.000191 | \n",
+ " -0.458766 | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " -0.138391 | \n",
+ " 0.080621 | \n",
+ " -0.637803 | \n",
+ " -0.094666 | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " -0.610178 | \n",
+ " 1.320944 | \n",
+ " -0.275415 | \n",
+ " 0.997633 | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " -1.081966 | \n",
+ " 0.080621 | \n",
+ " -0.818997 | \n",
+ " -0.822865 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
150 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sepal_length sepal_width petal_length petal_width\n",
+ "variety \n",
+ "Setosa 0.266674 0.189941 -0.357011 -0.436492\n",
+ "Setosa -0.300718 -1.129096 -0.357011 -0.436492\n",
+ "Setosa -0.868111 -0.601481 -0.932836 -0.436492\n",
+ "Setosa -1.151807 -0.865288 0.218813 -0.436492\n",
+ "Setosa -0.017022 0.453749 -0.357011 -0.436492\n",
+ "... ... ... ... ...\n",
+ "Virginica 0.176134 0.080621 -0.637803 0.997633\n",
+ "Virginica -0.452916 -1.469783 -1.000191 -0.458766\n",
+ "Virginica -0.138391 0.080621 -0.637803 -0.094666\n",
+ "Virginica -0.610178 1.320944 -0.275415 0.997633\n",
+ "Virginica -1.081966 0.080621 -0.818997 -0.822865\n",
+ "\n",
+ "[150 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "normalize = lambda x: (x - x.mean()) / x.std()\n",
+ "\n",
+ "iris.set_index(\"variety\").groupby(level=0).transform(normalize)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " count | \n",
+ " min | \n",
+ " std | \n",
+ " max | \n",
+ "
\n",
+ " \n",
+ " | variety | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Setosa | \n",
+ " 50 | \n",
+ " 4.3 | \n",
+ " 0.352490 | \n",
+ " 5.8 | \n",
+ "
\n",
+ " \n",
+ " | Versicolor | \n",
+ " 50 | \n",
+ " 4.9 | \n",
+ " 0.516171 | \n",
+ " 7.0 | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " 50 | \n",
+ " 4.9 | \n",
+ " 0.635880 | \n",
+ " 7.9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " count min std max\n",
+ "variety \n",
+ "Setosa 50 4.3 0.352490 5.8\n",
+ "Versicolor 50 4.9 0.516171 7.0\n",
+ "Virginica 50 4.9 0.635880 7.9"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "iris.groupby(\"variety\").agg(\n",
+ " count = (\"variety\", \"count\"),\n",
+ " min = (\"sepal_length\", min),\n",
+ " std = (\"sepal_length\", \"std\"),\n",
+ " max = (\"sepal_length\", max),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " count | \n",
+ " min | \n",
+ " std | \n",
+ " max | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Setosa | \n",
+ " 50 | \n",
+ " 4.3 | \n",
+ " 0.352490 | \n",
+ " 5.8 | \n",
+ "
\n",
+ " \n",
+ " | Versicolor | \n",
+ " 50 | \n",
+ " 4.9 | \n",
+ " 0.516171 | \n",
+ " 7.0 | \n",
+ "
\n",
+ " \n",
+ " | Virginica | \n",
+ " 50 | \n",
+ " 4.9 | \n",
+ " 0.635880 | \n",
+ " 7.9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " count min std max\n",
+ "Setosa 50 4.3 0.352490 5.8\n",
+ "Versicolor 50 4.9 0.516171 7.0\n",
+ "Virginica 50 4.9 0.635880 7.9"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# equal to:\n",
+ "\n",
+ "data = []\n",
+ "groups = iris[\"variety\"].unique()\n",
+ "for group in groups:\n",
+ " df = iris[iris[\"variety\"] == group]\n",
+ " stats = pd.DataFrame(\n",
+ " dict(\n",
+ " count = df.shape[0],\n",
+ " min = df[\"sepal_length\"].min(),\n",
+ " std = df[\"sepal_length\"].std(),\n",
+ " max = df[\"sepal_length\"].max(),\n",
+ " ),\n",
+ " index=[group],\n",
+ " )\n",
+ " data.append(stats)\n",
+ "\n",
+ "pd.concat(data)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "#### 分组绘制可视化图形"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "variety\n",
+ "Setosa AxesSubplot(0.125,0.125;0.775x0.755)\n",
+ "Versicolor AxesSubplot(0.125,0.125;0.775x0.755)\n",
+ "Virginica AxesSubplot(0.125,0.125;0.775x0.755)\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "iris.groupby(\"variety\").plot.kde(legend=True, figsize=(16, 2))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/0t/s0c95rbs6ds7w_b0d471p0kc0000gn/T/ipykernel_9426/838838217.py:9: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure.\n",
+ " fig.show()\n"
+ ]
+ },
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig = plt.figure(figsize=(16, 10))\n",
+ "for n, (klass, grp) in enumerate(iris.groupby(\"variety\")):\n",
+ " location = 220+n+1\n",
+ " axes = fig.add_subplot(location)\n",
+ "\n",
+ " grp.drop(\"variety\", axis=1).plot.box(ax=axes)\n",
+ " axes.set_title(f\"variety={klass}\")\n",
+ "\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%% md\n"
+ }
+ },
+ "source": [
+ "# Resampler"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "DatetimeIndex: 732 entries, 2020-01-01 to 2022-01-01\n",
+ "Freq: D\n",
+ "Data columns (total 1 columns):\n",
+ " # Column Non-Null Count Dtype\n",
+ "--- ------ -------------- -----\n",
+ " 0 sales 732 non-null int64\n",
+ "dtypes: int64(1)\n",
+ "memory usage: 11.4 KB\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sales | \n",
+ "
\n",
+ " \n",
+ " | datetime | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2020-01-01 | \n",
+ " 2956 | \n",
+ "
\n",
+ " \n",
+ " | 2020-01-02 | \n",
+ " 8704 | \n",
+ "
\n",
+ " \n",
+ " | 2020-01-03 | \n",
+ " 3881 | \n",
+ "
\n",
+ " \n",
+ " | 2020-01-04 | \n",
+ " 8031 | \n",
+ "
\n",
+ " \n",
+ " | 2020-01-05 | \n",
+ " 9124 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sales\n",
+ "datetime \n",
+ "2020-01-01 2956\n",
+ "2020-01-02 8704\n",
+ "2020-01-03 3881\n",
+ "2020-01-04 8031\n",
+ "2020-01-05 9124"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "import random\n",
+ "random.seed(233)\n",
+ "\n",
+ "dt = pd.date_range(start=\"20200101\", end=\"20220101\", freq=\"D\").set_names(\"datetime\")\n",
+ "data = pd.DataFrame(\n",
+ " [random.randrange(100, 10000) for _ in range(len(dt))],\n",
+ " index=dt,\n",
+ " columns=[\"sales\"],\n",
+ ")\n",
+ "data.info()\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sales | \n",
+ "
\n",
+ " \n",
+ " | datetime | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2020-12-31 | \n",
+ " 1850914 | \n",
+ "
\n",
+ " \n",
+ " | 2021-12-31 | \n",
+ " 1901049 | \n",
+ "
\n",
+ " \n",
+ " | 2022-12-31 | \n",
+ " 2581 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sales\n",
+ "datetime \n",
+ "2020-12-31 1850914\n",
+ "2021-12-31 1901049\n",
+ "2022-12-31 2581"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.resample(\"Y\").sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.resample(\"2W\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dt.to_series().resample(\"3M\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dt.to_period(\"2W\").to_series().resample(\"3d\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# data.reset_index().resample(\"4d\") # raise type error here."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " datetime | \n",
+ " sales | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2020-01-01 | \n",
+ " 2956 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2020-01-02 | \n",
+ " 8704 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2020-01-03 | \n",
+ " 3881 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2020-01-04 | \n",
+ " 8031 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2020-01-05 | \n",
+ " 9124 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " datetime sales\n",
+ "0 2020-01-01 2956\n",
+ "1 2020-01-02 8704\n",
+ "2 2020-01-03 3881\n",
+ "3 2020-01-04 8031\n",
+ "4 2020-01-05 9124"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = data.reset_index()\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sales | \n",
+ "
\n",
+ " \n",
+ " | datetime | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2020-12-31 | \n",
+ " 1850914 | \n",
+ "
\n",
+ " \n",
+ " | 2021-12-31 | \n",
+ " 1901049 | \n",
+ "
\n",
+ " \n",
+ " | 2022-12-31 | \n",
+ " 2581 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sales\n",
+ "datetime \n",
+ "2020-12-31 1850914\n",
+ "2021-12-31 1901049\n",
+ "2022-12-31 2581"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.groupby(pd.Grouper(key=\"datetime\", freq=\"Y\")).sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "ax = plt.subplot(111)\n",
+ "\n",
+ "random.seed(233)\n",
+ "\n",
+ "(\n",
+ " data.set_index(\"datetime\")\n",
+ " .assign(\n",
+ " overseas_sales = [random.randrange(1000, 5000) for _ in range(len(dt))],\n",
+ " )\n",
+ " .resample(\"3W\")\n",
+ " .sum()\n",
+ " .plot.line(\n",
+ " figsize=(10, 5),\n",
+ " ylabel=\"sales\",\n",
+ " xlabel=\"\",\n",
+ " ax=ax,\n",
+ " )\n",
+ ")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": "\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "ax = plt.subplot(111)\n",
+ "\n",
+ "random.seed(233)\n",
+ "\n",
+ "(\n",
+ " data.assign(\n",
+ " overseas_sales = [\n",
+ " random.randrange(1000, 5000) for _ in range(len(dt))\n",
+ " ],\n",
+ " )\n",
+ " .groupby(pd.Grouper(key=\"datetime\", freq=\"3W\"))\n",
+ " .sum()\n",
+ " .plot.line(\n",
+ " figsize=(10, 5),\n",
+ " ylabel=\"sales\",\n",
+ " xlabel=\"\",\n",
+ " ax=ax,\n",
+ " )\n",
+ ")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/data/iris.csv b/data/iris.csv
new file mode 100644
index 0000000..1b9d029
--- /dev/null
+++ b/data/iris.csv
@@ -0,0 +1,151 @@
+"sepal.length","sepal.width","petal.length","petal.width","variety"
+5.1,3.5,1.4,.2,"Setosa"
+4.9,3,1.4,.2,"Setosa"
+4.7,3.2,1.3,.2,"Setosa"
+4.6,3.1,1.5,.2,"Setosa"
+5,3.6,1.4,.2,"Setosa"
+5.4,3.9,1.7,.4,"Setosa"
+4.6,3.4,1.4,.3,"Setosa"
+5,3.4,1.5,.2,"Setosa"
+4.4,2.9,1.4,.2,"Setosa"
+4.9,3.1,1.5,.1,"Setosa"
+5.4,3.7,1.5,.2,"Setosa"
+4.8,3.4,1.6,.2,"Setosa"
+4.8,3,1.4,.1,"Setosa"
+4.3,3,1.1,.1,"Setosa"
+5.8,4,1.2,.2,"Setosa"
+5.7,4.4,1.5,.4,"Setosa"
+5.4,3.9,1.3,.4,"Setosa"
+5.1,3.5,1.4,.3,"Setosa"
+5.7,3.8,1.7,.3,"Setosa"
+5.1,3.8,1.5,.3,"Setosa"
+5.4,3.4,1.7,.2,"Setosa"
+5.1,3.7,1.5,.4,"Setosa"
+4.6,3.6,1,.2,"Setosa"
+5.1,3.3,1.7,.5,"Setosa"
+4.8,3.4,1.9,.2,"Setosa"
+5,3,1.6,.2,"Setosa"
+5,3.4,1.6,.4,"Setosa"
+5.2,3.5,1.5,.2,"Setosa"
+5.2,3.4,1.4,.2,"Setosa"
+4.7,3.2,1.6,.2,"Setosa"
+4.8,3.1,1.6,.2,"Setosa"
+5.4,3.4,1.5,.4,"Setosa"
+5.2,4.1,1.5,.1,"Setosa"
+5.5,4.2,1.4,.2,"Setosa"
+4.9,3.1,1.5,.2,"Setosa"
+5,3.2,1.2,.2,"Setosa"
+5.5,3.5,1.3,.2,"Setosa"
+4.9,3.6,1.4,.1,"Setosa"
+4.4,3,1.3,.2,"Setosa"
+5.1,3.4,1.5,.2,"Setosa"
+5,3.5,1.3,.3,"Setosa"
+4.5,2.3,1.3,.3,"Setosa"
+4.4,3.2,1.3,.2,"Setosa"
+5,3.5,1.6,.6,"Setosa"
+5.1,3.8,1.9,.4,"Setosa"
+4.8,3,1.4,.3,"Setosa"
+5.1,3.8,1.6,.2,"Setosa"
+4.6,3.2,1.4,.2,"Setosa"
+5.3,3.7,1.5,.2,"Setosa"
+5,3.3,1.4,.2,"Setosa"
+7,3.2,4.7,1.4,"Versicolor"
+6.4,3.2,4.5,1.5,"Versicolor"
+6.9,3.1,4.9,1.5,"Versicolor"
+5.5,2.3,4,1.3,"Versicolor"
+6.5,2.8,4.6,1.5,"Versicolor"
+5.7,2.8,4.5,1.3,"Versicolor"
+6.3,3.3,4.7,1.6,"Versicolor"
+4.9,2.4,3.3,1,"Versicolor"
+6.6,2.9,4.6,1.3,"Versicolor"
+5.2,2.7,3.9,1.4,"Versicolor"
+5,2,3.5,1,"Versicolor"
+5.9,3,4.2,1.5,"Versicolor"
+6,2.2,4,1,"Versicolor"
+6.1,2.9,4.7,1.4,"Versicolor"
+5.6,2.9,3.6,1.3,"Versicolor"
+6.7,3.1,4.4,1.4,"Versicolor"
+5.6,3,4.5,1.5,"Versicolor"
+5.8,2.7,4.1,1,"Versicolor"
+6.2,2.2,4.5,1.5,"Versicolor"
+5.6,2.5,3.9,1.1,"Versicolor"
+5.9,3.2,4.8,1.8,"Versicolor"
+6.1,2.8,4,1.3,"Versicolor"
+6.3,2.5,4.9,1.5,"Versicolor"
+6.1,2.8,4.7,1.2,"Versicolor"
+6.4,2.9,4.3,1.3,"Versicolor"
+6.6,3,4.4,1.4,"Versicolor"
+6.8,2.8,4.8,1.4,"Versicolor"
+6.7,3,5,1.7,"Versicolor"
+6,2.9,4.5,1.5,"Versicolor"
+5.7,2.6,3.5,1,"Versicolor"
+5.5,2.4,3.8,1.1,"Versicolor"
+5.5,2.4,3.7,1,"Versicolor"
+5.8,2.7,3.9,1.2,"Versicolor"
+6,2.7,5.1,1.6,"Versicolor"
+5.4,3,4.5,1.5,"Versicolor"
+6,3.4,4.5,1.6,"Versicolor"
+6.7,3.1,4.7,1.5,"Versicolor"
+6.3,2.3,4.4,1.3,"Versicolor"
+5.6,3,4.1,1.3,"Versicolor"
+5.5,2.5,4,1.3,"Versicolor"
+5.5,2.6,4.4,1.2,"Versicolor"
+6.1,3,4.6,1.4,"Versicolor"
+5.8,2.6,4,1.2,"Versicolor"
+5,2.3,3.3,1,"Versicolor"
+5.6,2.7,4.2,1.3,"Versicolor"
+5.7,3,4.2,1.2,"Versicolor"
+5.7,2.9,4.2,1.3,"Versicolor"
+6.2,2.9,4.3,1.3,"Versicolor"
+5.1,2.5,3,1.1,"Versicolor"
+5.7,2.8,4.1,1.3,"Versicolor"
+6.3,3.3,6,2.5,"Virginica"
+5.8,2.7,5.1,1.9,"Virginica"
+7.1,3,5.9,2.1,"Virginica"
+6.3,2.9,5.6,1.8,"Virginica"
+6.5,3,5.8,2.2,"Virginica"
+7.6,3,6.6,2.1,"Virginica"
+4.9,2.5,4.5,1.7,"Virginica"
+7.3,2.9,6.3,1.8,"Virginica"
+6.7,2.5,5.8,1.8,"Virginica"
+7.2,3.6,6.1,2.5,"Virginica"
+6.5,3.2,5.1,2,"Virginica"
+6.4,2.7,5.3,1.9,"Virginica"
+6.8,3,5.5,2.1,"Virginica"
+5.7,2.5,5,2,"Virginica"
+5.8,2.8,5.1,2.4,"Virginica"
+6.4,3.2,5.3,2.3,"Virginica"
+6.5,3,5.5,1.8,"Virginica"
+7.7,3.8,6.7,2.2,"Virginica"
+7.7,2.6,6.9,2.3,"Virginica"
+6,2.2,5,1.5,"Virginica"
+6.9,3.2,5.7,2.3,"Virginica"
+5.6,2.8,4.9,2,"Virginica"
+7.7,2.8,6.7,2,"Virginica"
+6.3,2.7,4.9,1.8,"Virginica"
+6.7,3.3,5.7,2.1,"Virginica"
+7.2,3.2,6,1.8,"Virginica"
+6.2,2.8,4.8,1.8,"Virginica"
+6.1,3,4.9,1.8,"Virginica"
+6.4,2.8,5.6,2.1,"Virginica"
+7.2,3,5.8,1.6,"Virginica"
+7.4,2.8,6.1,1.9,"Virginica"
+7.9,3.8,6.4,2,"Virginica"
+6.4,2.8,5.6,2.2,"Virginica"
+6.3,2.8,5.1,1.5,"Virginica"
+6.1,2.6,5.6,1.4,"Virginica"
+7.7,3,6.1,2.3,"Virginica"
+6.3,3.4,5.6,2.4,"Virginica"
+6.4,3.1,5.5,1.8,"Virginica"
+6,3,4.8,1.8,"Virginica"
+6.9,3.1,5.4,2.1,"Virginica"
+6.7,3.1,5.6,2.4,"Virginica"
+6.9,3.1,5.1,2.3,"Virginica"
+5.8,2.7,5.1,1.9,"Virginica"
+6.8,3.2,5.9,2.3,"Virginica"
+6.7,3.3,5.7,2.5,"Virginica"
+6.7,3,5.2,2.3,"Virginica"
+6.3,2.5,5,1.9,"Virginica"
+6.5,3,5.2,2,"Virginica"
+6.2,3.4,5.4,2.3,"Virginica"
+5.9,3,5.1,1.8,"Virginica"
\ No newline at end of file