Files

1064 lines
18 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# 什么是 Series"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n",
"4 5\n",
"dtype: int64\n"
]
}
],
"source": [
"import pandas as pd\n",
"data = [1, 2, 3, 4, 5]\n",
"series = pd.Series(data)\n",
"print(series)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RangeIndex(start=0, stop=5, step=1)\n"
]
}
],
"source": [
"print(series.index)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"count 5.000000\n",
"mean 3.000000\n",
"std 1.581139\n",
"min 1.000000\n",
"25% 2.000000\n",
"50% 3.000000\n",
"75% 4.000000\n",
"max 5.000000\n",
"dtype: float64\n"
]
}
],
"source": [
"print(series.describe())"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# 常用的 Series 属性"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 44\n",
"1 41\n",
"2 12\n",
"3 34\n",
"4 15\n",
"Name: numbers, dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"\n",
"import pandas as pd\n",
"\n",
"random.seed(233)\n",
"\n",
"data = [random.randrange(1, 50) for _ in range(100)]\n",
"series = pd.Series(data, name=\"numbers\")\n",
"series.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=100, step=1)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.index"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]\n"
]
}
],
"source": [
"print(list(series.index))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"dtype('int64')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.dtype"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(100,)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"100"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.size"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"'numbers'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.name"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array([44, 41, 12, 34, 15, 31, 36, 14, 6, 7, 18, 15, 45, 46, 1, 5, 44,\n",
" 16, 39, 46, 46, 15, 48, 19, 8, 31, 21, 14, 6, 19, 32, 34, 41, 10,\n",
" 40, 46, 22, 20, 7, 33, 29, 48, 18, 27, 3, 26, 48, 12, 4, 49, 5,\n",
" 49, 8, 14, 11, 23, 21, 48, 34, 34, 29, 9, 2, 30, 8, 45, 23, 46,\n",
" 43, 7, 45, 6, 37, 24, 4, 26, 20, 35, 3, 39, 33, 30, 26, 47, 1,\n",
" 5, 31, 23, 25, 9, 41, 29, 23, 20, 39, 48, 47, 9, 14, 32])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.values"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# 常用的 Series 方法"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"arr = [random.randrange(1, 50) for _ in range(100)]\n",
"arr = pd.Series(arr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 数学运算 / 统计"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 79\n",
"1 85\n",
"2 21\n",
"3 60\n",
"4 60\n",
" ..\n",
"95 55\n",
"96 93\n",
"97 25\n",
"98 51\n",
"99 48\n",
"Length: 100, dtype: int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.add(arr)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 9\n",
"1 -3\n",
"2 3\n",
"3 8\n",
"4 -30\n",
" ..\n",
"95 41\n",
"96 1\n",
"97 -7\n",
"98 -23\n",
"99 16\n",
"Length: 100, dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.sub(arr)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 1540\n",
"1 1804\n",
"2 108\n",
"3 884\n",
"4 675\n",
" ... \n",
"95 336\n",
"96 2162\n",
"97 144\n",
"98 518\n",
"99 512\n",
"Length: 100, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.mul(arr)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 1.257143\n",
"1 0.931818\n",
"2 1.333333\n",
"3 1.307692\n",
"4 0.333333\n",
" ... \n",
"95 6.857143\n",
"96 1.021739\n",
"97 0.562500\n",
"98 0.378378\n",
"99 2.000000\n",
"Length: 100, dtype: float64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.div(arr)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"49"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.max()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.min()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"25.5"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.median()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"25.51"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.mean()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"14.990566056926294"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series.std()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 函数应用"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 True\n",
"3 True\n",
"4 False\n",
"Name: numbers, dtype: bool"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def mod2(x):\n",
" if x % 2 == 0:\n",
" return True\n",
" return False\n",
"\n",
"\n",
"idx = series.map(mod2)\n",
"idx.head()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 44\n",
"2 12\n",
"3 34\n",
"6 36\n",
"7 14\n",
"Name: numbers, dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series[idx].head()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 True\n",
"1 False\n",
"2 True\n",
"3 True\n",
"4 False\n",
"Name: numbers, dtype: bool"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"idx = series.apply(mod2)\n",
"idx.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 44\n",
"2 12\n",
"3 34\n",
"6 36\n",
"7 14\n",
"Name: numbers, dtype: int64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series[idx].head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## 特定类型接口"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"str_series = pd.Series([\"apple\", \"orange\", \"banana\", \"grape\"], name=\"fruit\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 1\n",
"2 3\n",
"3 1\n",
"Name: fruit, dtype: int64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"str_series.str.count(\"a\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"1 6\n",
"2 6\n",
"3 5\n",
"Name: fruit, dtype: int64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"str_series.str.len()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/0t/s0c95rbs6ds7w_b0d471p0kc0000gn/T/ipykernel_83159/772644344.py:1: FutureWarning: The default value of regex will change from True to False in a future version.\n",
" str_series.str.replace(\"[p]{2}\", \"XX\")\n"
]
},
{
"data": {
"text/plain": [
"0 aXXle\n",
"1 orange\n",
"2 banana\n",
"3 grape\n",
"Name: fruit, dtype: object"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"str_series.str.replace(\"[p]{2}\", \"XX\")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"dt_series = pd.Series([20220101, 20220201, 20220301, 20220401], name=\"date\").astype(\"string\")\n",
"dt_series = pd.to_datetime(dt_series, format=\"%Y-%m-%d\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"dtype('<M8[ns]')"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_series.dtype"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 2022-01-01\n",
"1 2022-02-01\n",
"2 2022-03-01\n",
"3 2022-04-01\n",
"Name: date, dtype: datetime64[ns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_series.head()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n",
"Name: date, dtype: int64"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_series.dt.month"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 32\n",
"2 60\n",
"3 91\n",
"Name: date, dtype: int64"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_series.dt.dayofyear"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 5\n",
"1 1\n",
"2 1\n",
"3 4\n",
"Name: date, dtype: int64"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt_series.dt.weekday"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 ('sspai-100-hours-series-python')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"vscode": {
"interpreter": {
"hash": "7a101baf08afe636412f97dd4a9fc2e65b6f84f0ec50413bf3e19b04a26b8ba6"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}