diff --git a/code/10/series.ipynb b/code/10/series.ipynb new file mode 100644 index 0000000..62ae025 --- /dev/null +++ b/code/10/series.ipynb @@ -0,0 +1,1050 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# 什么是 Series?" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "4 5\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "data = [1, 2, 3, 4, 5]\n", + "series = pd.Series(data)\n", + "print(series)" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RangeIndex(start=0, stop=5, step=1)\n" + ] + } + ], + "source": [ + "print(series.index)" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "count 5.000000\n", + "mean 3.000000\n", + "std 1.581139\n", + "min 1.000000\n", + "25% 2.000000\n", + "50% 3.000000\n", + "75% 4.000000\n", + "max 5.000000\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "print(series.describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# 常用的 Series 属性" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 44\n", + "1 41\n", + "2 12\n", + "3 34\n", + "4 15\n", + "Name: numbers, dtype: int64" + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import random\n", + "\n", + "import pandas as pd\n", + "\n", + "random.seed(233)\n", + "\n", + "data = [random.randrange(1, 50) for _ in range(100)]\n", + "series = pd.Series(data, name=\"numbers\")\n", + "series.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=100, step=1)" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.index" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]\n" + ] + } + ], + "source": [ + "print(list(series.index))" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('int64')" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(100,)" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "100" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.size" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'numbers'" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.name" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([44, 41, 12, 34, 15, 31, 36, 14, 6, 7, 18, 15, 45, 46, 1, 5, 44,\n", + " 16, 39, 46, 46, 15, 48, 19, 8, 31, 21, 14, 6, 19, 32, 34, 41, 10,\n", + " 40, 46, 22, 20, 7, 33, 29, 48, 18, 27, 3, 26, 48, 12, 4, 49, 5,\n", + " 49, 8, 14, 11, 23, 21, 48, 34, 34, 29, 9, 2, 30, 8, 45, 23, 46,\n", + " 43, 7, 45, 6, 37, 24, 4, 26, 20, 35, 3, 39, 33, 30, 26, 47, 1,\n", + " 5, 31, 23, 25, 9, 41, 29, 23, 20, 39, 48, 47, 9, 14, 32])" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.values" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# 常用的 Series 方法" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "arr = [random.randrange(1, 50) for _ in range(100)]\n", + "arr = pd.Series(arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数学运算 / 统计" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 79\n", + "1 85\n", + "2 21\n", + "3 60\n", + "4 60\n", + " ..\n", + "95 55\n", + "96 93\n", + "97 25\n", + "98 51\n", + "99 48\n", + "Length: 100, dtype: int64" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.add(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 9\n", + "1 -3\n", + "2 3\n", + "3 8\n", + "4 -30\n", + " ..\n", + "95 41\n", + "96 1\n", + "97 -7\n", + "98 -23\n", + "99 16\n", + "Length: 100, dtype: int64" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.sub(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1540\n", + "1 1804\n", + "2 108\n", + "3 884\n", + "4 675\n", + " ... \n", + "95 336\n", + "96 2162\n", + "97 144\n", + "98 518\n", + "99 512\n", + "Length: 100, dtype: int64" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.mul(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.257143\n", + "1 0.931818\n", + "2 1.333333\n", + "3 1.307692\n", + "4 0.333333\n", + " ... \n", + "95 6.857143\n", + "96 1.021739\n", + "97 0.562500\n", + "98 0.378378\n", + "99 2.000000\n", + "Length: 100, dtype: float64" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.div(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "49" + ] + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 165, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "25.5" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.median()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "25.51" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "14.990566056926294" + ] + }, + "execution_count": 168, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.std()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 函数应用" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 False\n", + "2 True\n", + "3 True\n", + "4 False\n", + "Name: numbers, dtype: bool" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def mod2(x):\n", + " if x % 2 == 0:\n", + " return True\n", + " return False\n", + "\n", + "\n", + "idx = series.map(mod2)\n", + "idx.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 44\n", + "2 12\n", + "3 34\n", + "6 36\n", + "7 14\n", + "Name: numbers, dtype: int64" + ] + }, + "execution_count": 170, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series[idx].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 False\n", + "2 True\n", + "3 True\n", + "4 False\n", + "Name: numbers, dtype: bool" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx = series.apply(mod2)\n", + "idx.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 44\n", + "2 12\n", + "3 34\n", + "6 36\n", + "7 14\n", + "Name: numbers, dtype: int64" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series[idx].head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## 特定类型接口" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "str_series = pd.Series([\"apple\", \"orange\", \"banana\", \"grape\"], name=\"fruit\")" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 1\n", + "2 3\n", + "3 1\n", + "Name: fruit, dtype: int64" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "str_series.str.count(\"a\")" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5\n", + "1 6\n", + "2 6\n", + "3 5\n", + "Name: fruit, dtype: int64" + ] + }, + "execution_count": 175, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "str_series.str.len()" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 aXXle\n", + "1 orange\n", + "2 banana\n", + "3 grape\n", + "Name: fruit, dtype: object" + ] + }, + "execution_count": 176, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "str_series.str.replace(\"[p]{2}\", \"XX\")" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "dt_series = pd.Series([20220101, 20220201, 20220301, 20220401], name=\"date\")\n", + "dt_series = pd.to_datetime(dt_series, format=\"%Y-%m-%d\")" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('