diff --git a/code/16/index.ipynb b/code/16/index.ipynb new file mode 100644 index 0000000..330b811 --- /dev/null +++ b/code/16/index.ipynb @@ -0,0 +1,2792 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# -*- coding:utf-8 -*-\n", + "\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a
01
12
23
34
45
\n", + "
" + ], + "text/plain": [ + " a\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "4 5" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.DataFrame({\"a\": [1, 2, 3, 4, 5]})\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "4 5\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"a\"].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=5, step=1)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.index" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=5, step=1)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"a\"].index" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a
12
23
34
\n", + "
" + ], + "text/plain": [ + " a\n", + "1 2\n", + "2 3\n", + "3 4" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[1:3, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.at[3, \"a\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data[\"b\"] = list(\"abcde\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data.set_index(\"b\", inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='b')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.index" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='b')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"a\"].index" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a
b
a1
b2
c3
d4
e5
\n", + "
" + ], + "text/plain": [ + " a\n", + "b \n", + "a 1\n", + "b 2\n", + "c 3\n", + "d 4\n", + "e 5" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a
b
a1
b2
c3
\n", + "
" + ], + "text/plain": [ + " a\n", + "b \n", + "a 1\n", + "b 2\n", + "c 3" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[\"a\":\"c\", :]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.at[\"a\", \"a\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ba
0a1
1b2
2c3
3d4
4e5
\n", + "
" + ], + "text/plain": [ + " b a\n", + "0 a 1\n", + "1 b 2\n", + "2 c 3\n", + "3 d 4\n", + "4 e 5" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a
b
a1
b2
c3
d4
e5
\n", + "
" + ], + "text/plain": [ + " a\n", + "b \n", + "a 1\n", + "b 2\n", + "c 3\n", + "d 4\n", + "e 5" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ba
c
1a1
1b2
\n", + "
" + ], + "text/plain": [ + " b a\n", + "c \n", + "1 a 1\n", + "1 b 2" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = data.reset_index()\n", + "\n", + "data[\"c\"] = [1, 1, 2, 2, 3]\n", + "data = data.set_index(\"c\")\n", + "data.loc[1, :]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaldatetimetimedeltaperiod
0aA(0.0, 1.5]2022-01-011 days 00:00:002021Q1
1bA(1.5, 3.0]2022-01-021 days 03:00:002021Q2
2cB(3.0, 4.5]2022-01-031 days 06:00:002021Q3
3dB(4.5, 6.0]2022-01-041 days 09:00:002021Q4
4eB(6.0, 7.5]2022-01-051 days 12:00:002022Q1
\n", + "
" + ], + "text/plain": [ + " index category interval datetime timedelta period\n", + "0 a A (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n", + "1 b A (1.5, 3.0] 2022-01-02 1 days 03:00:00 2021Q2\n", + "2 c B (3.0, 4.5] 2022-01-03 1 days 06:00:00 2021Q3\n", + "3 d B (4.5, 6.0] 2022-01-04 1 days 09:00:00 2021Q4\n", + "4 e B (6.0, 7.5] 2022-01-05 1 days 12:00:00 2022Q1" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.DataFrame(\n", + " {\n", + " \"index\": \"a b c d e\".split(),\n", + " \"category\": pd.Categorical([*[\"A\"]*2, *[\"B\"]*3]),\n", + " \"interval\": pd.interval_range(start=0, end=7.5, freq=1.5),\n", + " \"datetime\": pd.date_range(\"20220101\", periods=5, freq=\"D\"),\n", + " \"timedelta\": pd.timedelta_range(start=\"1 day\", periods=5, freq=\"3H\"),\n", + " \"period\": pd.period_range(\"20210101\", periods=5, freq=\"Q\"),\n", + " }\n", + ")\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 基础索引类型" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='index')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# base index\n", + "base = data.set_index(\"index\")\n", + "base.index" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categoryintervaldatetimetimedeltaperiod
index
aA(0.0, 1.5]2022-01-011 days 00:00:002021Q1
cB(3.0, 4.5]2022-01-031 days 06:00:002021Q3
\n", + "
" + ], + "text/plain": [ + " category interval datetime timedelta period\n", + "index \n", + "a A (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n", + "c B (3.0, 4.5] 2022-01-03 1 days 06:00:00 2021Q3" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "base.loc[[\"a\", \"c\"], :]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['A', 'B', 'C', 'D', 'E'], dtype='object', name='index')" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "base.index.str.upper()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['A', 'b', 'C', 'd', 'e'], dtype='object', name='index')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "base.index.map(lambda s: s.upper() if s in list(\"ac\") else s)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 范围索引类型" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=5, step=1)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# range index\n", + "data.index" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.index.start" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.index.stop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 区间索引类型" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategorydatetimetimedeltaperiod
interval
(0.0, 1.5]aA2022-01-011 days 00:00:002021Q1
(1.5, 3.0]bA2022-01-021 days 03:00:002021Q2
(3.0, 4.5]cB2022-01-031 days 06:00:002021Q3
(4.5, 6.0]dB2022-01-041 days 09:00:002021Q4
(6.0, 7.5]eB2022-01-051 days 12:00:002022Q1
\n", + "
" + ], + "text/plain": [ + " index category datetime timedelta period\n", + "interval \n", + "(0.0, 1.5] a A 2022-01-01 1 days 00:00:00 2021Q1\n", + "(1.5, 3.0] b A 2022-01-02 1 days 03:00:00 2021Q2\n", + "(3.0, 4.5] c B 2022-01-03 1 days 06:00:00 2021Q3\n", + "(4.5, 6.0] d B 2022-01-04 1 days 09:00:00 2021Q4\n", + "(6.0, 7.5] e B 2022-01-05 1 days 12:00:00 2022Q1" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "interval = data.set_index(\"interval\")\n", + "interval.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0], (6.0, 7.5]], dtype='interval[float64, right]', name='interval')" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "interval.index" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategorydatetimetimedeltaperiod
interval
(0.0, 1.5]aA2022-01-011 days 00:00:002021Q1
(3.0, 4.5]cB2022-01-031 days 06:00:002021Q3
\n", + "
" + ], + "text/plain": [ + " index category datetime timedelta period\n", + "interval \n", + "(0.0, 1.5] a A 2022-01-01 1 days 00:00:00 2021Q1\n", + "(3.0, 4.5] c B 2022-01-03 1 days 06:00:00 2021Q3" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "interval.loc[[1.5, 3.2]]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategorydatetimetimedeltaperiod
interval
(0.0, 1.5]aA2022-01-011 days 00:00:002021Q1
(3.0, 4.5]cB2022-01-031 days 06:00:002021Q3
\n", + "
" + ], + "text/plain": [ + " index category datetime timedelta period\n", + "interval \n", + "(0.0, 1.5] a A 2022-01-01 1 days 00:00:00 2021Q1\n", + "(3.0, 4.5] c B 2022-01-03 1 days 06:00:00 2021Q3" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "interval.loc[[pd.Interval(0, 1.5), pd.Interval(3, 4.5)], :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 类别索引类型" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexintervaldatetimetimedeltaperiod
category
Aa(0.0, 1.5]2022-01-011 days 00:00:002021Q1
Ab(1.5, 3.0]2022-01-021 days 03:00:002021Q2
Bc(3.0, 4.5]2022-01-031 days 06:00:002021Q3
Bd(4.5, 6.0]2022-01-041 days 09:00:002021Q4
Be(6.0, 7.5]2022-01-051 days 12:00:002022Q1
\n", + "
" + ], + "text/plain": [ + " index interval datetime timedelta period\n", + "category \n", + "A a (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n", + "A b (1.5, 3.0] 2022-01-02 1 days 03:00:00 2021Q2\n", + "B c (3.0, 4.5] 2022-01-03 1 days 06:00:00 2021Q3\n", + "B d (4.5, 6.0] 2022-01-04 1 days 09:00:00 2021Q4\n", + "B e (6.0, 7.5] 2022-01-05 1 days 12:00:00 2022Q1" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# categorical index\n", + "cat = data.set_index(\"category\")\n", + "cat.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "CategoricalIndex(['A', 'A', 'B', 'B', 'B'], categories=['A', 'B'], ordered=False, dtype='category', name='category')" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat.index" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexintervaldatetimetimedeltaperiod
category
Aa(0.0, 1.5]2022-01-011 days 00:00:002021Q1
Ab(1.5, 3.0]2022-01-021 days 03:00:002021Q2
\n", + "
" + ], + "text/plain": [ + " index interval datetime timedelta period\n", + "category \n", + "A a (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n", + "A b (1.5, 3.0] 2022-01-02 1 days 03:00:00 2021Q2" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat.loc[\"A\", :]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['A', 'B'], dtype='object')" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat.index.categories" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 1, 1, 1], dtype=int8)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat.index.codes" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "CategoricalIndex(['LEVEL-A', 'LEVEL-A', 'LEVEL-B', 'LEVEL-B', 'LEVEL-B'], categories=['LEVEL-A', 'LEVEL-B'], ordered=False, dtype='category', name='category')" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cat.index.rename_categories(lambda c: f\"LEVEL-{c}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 日期时间索引类型" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaltimedeltaperiod
datetime
2022-01-01aA(0.0, 1.5]1 days 00:00:002021Q1
2022-01-02bA(1.5, 3.0]1 days 03:00:002021Q2
2022-01-03cB(3.0, 4.5]1 days 06:00:002021Q3
2022-01-04dB(4.5, 6.0]1 days 09:00:002021Q4
2022-01-05eB(6.0, 7.5]1 days 12:00:002022Q1
\n", + "
" + ], + "text/plain": [ + " index category interval timedelta period\n", + "datetime \n", + "2022-01-01 a A (0.0, 1.5] 1 days 00:00:00 2021Q1\n", + "2022-01-02 b A (1.5, 3.0] 1 days 03:00:00 2021Q2\n", + "2022-01-03 c B (3.0, 4.5] 1 days 06:00:00 2021Q3\n", + "2022-01-04 d B (4.5, 6.0] 1 days 09:00:00 2021Q4\n", + "2022-01-05 e B (6.0, 7.5] 1 days 12:00:00 2022Q1" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# datetime index\n", + "dt = data.set_index(\"datetime\")\n", + "dt.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',\n", + " '2022-01-05'],\n", + " dtype='datetime64[ns]', name='datetime', freq=None)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt.index" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaltimedeltaperiod
datetime
2022-01-01aA(0.0, 1.5]1 days 00:00:002021Q1
2022-01-02bA(1.5, 3.0]1 days 03:00:002021Q2
2022-01-03cB(3.0, 4.5]1 days 06:00:002021Q3
\n", + "
" + ], + "text/plain": [ + " index category interval timedelta period\n", + "datetime \n", + "2022-01-01 a A (0.0, 1.5] 1 days 00:00:00 2021Q1\n", + "2022-01-02 b A (1.5, 3.0] 1 days 03:00:00 2021Q2\n", + "2022-01-03 c B (3.0, 4.5] 1 days 06:00:00 2021Q3" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt.loc[\"20220101\":\"20220103\", :]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaltimedeltaperiod
datetime
2022-01-01aA(0.0, 1.5]1 days 00:00:002021Q1
2022-01-02bA(1.5, 3.0]1 days 03:00:002021Q2
2022-01-03cB(3.0, 4.5]1 days 06:00:002021Q3
\n", + "
" + ], + "text/plain": [ + " index category interval timedelta period\n", + "datetime \n", + "2022-01-01 a A (0.0, 1.5] 1 days 00:00:00 2021Q1\n", + "2022-01-02 b A (1.5, 3.0] 1 days 03:00:00 2021Q2\n", + "2022-01-03 c B (3.0, 4.5] 1 days 06:00:00 2021Q3" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt.loc[\"2022-01-01\":\"2022-01-03\", :]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([1, 2, 3, 4, 5], dtype='int64', name='datetime')" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt.index.day" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['2022/01/01', '2022/01/02', '2022/01/03', '2022/01/04', '2022/01/05'], dtype='object', name='datetime')" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt.index.strftime(\"%Y/%m%/%d\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 时间差索引类型" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaldatetimeperiod
timedelta
1 days 00:00:00aA(0.0, 1.5]2022-01-012021Q1
1 days 03:00:00bA(1.5, 3.0]2022-01-022021Q2
1 days 06:00:00cB(3.0, 4.5]2022-01-032021Q3
1 days 09:00:00dB(4.5, 6.0]2022-01-042021Q4
1 days 12:00:00eB(6.0, 7.5]2022-01-052022Q1
\n", + "
" + ], + "text/plain": [ + " index category interval datetime period\n", + "timedelta \n", + "1 days 00:00:00 a A (0.0, 1.5] 2022-01-01 2021Q1\n", + "1 days 03:00:00 b A (1.5, 3.0] 2022-01-02 2021Q2\n", + "1 days 06:00:00 c B (3.0, 4.5] 2022-01-03 2021Q3\n", + "1 days 09:00:00 d B (4.5, 6.0] 2022-01-04 2021Q4\n", + "1 days 12:00:00 e B (6.0, 7.5] 2022-01-05 2022Q1" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# timedelta index\n", + "delta = data.set_index(\"timedelta\")\n", + "delta.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "TimedeltaIndex(['1 days 00:00:00', '1 days 03:00:00', '1 days 06:00:00',\n", + " '1 days 09:00:00', '1 days 12:00:00'],\n", + " dtype='timedelta64[ns]', name='timedelta', freq=None)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "delta.index" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2021-12-31 00:00:00', '2021-12-31 21:00:00',\n", + " '2022-01-01 18:00:00', '2022-01-02 15:00:00',\n", + " '2022-01-03 12:00:00'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dt.index - delta.index" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([0, 10800, 21600, 32400, 43200], dtype='int64', name='timedelta')" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "delta.index.seconds" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dayshoursminutessecondsmillisecondsmicrosecondsnanoseconds
01000000
11300000
21600000
31900000
411200000
\n", + "
" + ], + "text/plain": [ + " days hours minutes seconds milliseconds microseconds nanoseconds\n", + "0 1 0 0 0 0 0 0\n", + "1 1 3 0 0 0 0 0\n", + "2 1 6 0 0 0 0 0\n", + "3 1 9 0 0 0 0 0\n", + "4 1 12 0 0 0 0 0" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "delta.index.components" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaldatetimeperiod
timedelta
1 days 00:00:00aA(0.0, 1.5]2022-01-012021Q1
1 days 09:00:00dB(4.5, 6.0]2022-01-042021Q4
\n", + "
" + ], + "text/plain": [ + " index category interval datetime period\n", + "timedelta \n", + "1 days 00:00:00 a A (0.0, 1.5] 2022-01-01 2021Q1\n", + "1 days 09:00:00 d B (4.5, 6.0] 2022-01-04 2021Q4" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "delta.loc[[pd.Timedelta(\"1d\"), pd.Timedelta(\"1d 9h\")], :]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 周期索引类型" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaldatetimetimedelta
period
2021Q1aA(0.0, 1.5]2022-01-011 days 00:00:00
2021Q2bA(1.5, 3.0]2022-01-021 days 03:00:00
2021Q3cB(3.0, 4.5]2022-01-031 days 06:00:00
2021Q4dB(4.5, 6.0]2022-01-041 days 09:00:00
2022Q1eB(6.0, 7.5]2022-01-051 days 12:00:00
\n", + "
" + ], + "text/plain": [ + " index category interval datetime timedelta\n", + "period \n", + "2021Q1 a A (0.0, 1.5] 2022-01-01 1 days 00:00:00\n", + "2021Q2 b A (1.5, 3.0] 2022-01-02 1 days 03:00:00\n", + "2021Q3 c B (3.0, 4.5] 2022-01-03 1 days 06:00:00\n", + "2021Q4 d B (4.5, 6.0] 2022-01-04 1 days 09:00:00\n", + "2022Q1 e B (6.0, 7.5] 2022-01-05 1 days 12:00:00" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# period index\n", + "period = data.set_index(\"period\")\n", + "period.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "PeriodIndex(['2021Q1', '2021Q2', '2021Q3', '2021Q4', '2022Q1'], dtype='period[Q-DEC]', name='period')" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "period.index" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcategoryintervaldatetimetimedelta
period
2021Q4dB(4.5, 6.0]2022-01-041 days 09:00:00
2021Q1aA(0.0, 1.5]2022-01-011 days 00:00:00
\n", + "
" + ], + "text/plain": [ + " index category interval datetime timedelta\n", + "period \n", + "2021Q4 d B (4.5, 6.0] 2022-01-04 1 days 09:00:00\n", + "2021Q1 a A (0.0, 1.5] 2022-01-01 1 days 00:00:00" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "period.loc[[\"2021Q4\", \"2021Q1\"], :]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2021-01-01', '2021-04-01', '2021-07-01', '2021-10-01',\n", + " '2022-01-01'],\n", + " dtype='datetime64[ns]', name='period', freq='QS-OCT')" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "period.index.start_time" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([1, 2, 3, 4, 1], dtype='int64', name='period')" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "period.index.quarter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f" + }, + "kernelspec": { + "display_name": "Python 3.9.0 ('pandas-startup')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}