Files
2022-06-23 10:13:32 +08:00

2793 lines
67 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# -*- coding:utf-8 -*-\n",
"\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a\n",
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n",
"4 5"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame({\"a\": [1, 2, 3, 4, 5]})\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n",
"4 5\n",
"Name: a, dtype: int64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"a\"].head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=5, step=1)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.index"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=5, step=1)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"a\"].index"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a\n",
"1 2\n",
"2 3\n",
"3 4"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[1:3, :]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.at[3, \"a\"]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"data[\"b\"] = list(\"abcde\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"data.set_index(\"b\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='b')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.index"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='b')"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[\"a\"].index"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>e</th>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a\n",
"b \n",
"a 1\n",
"b 2\n",
"c 3\n",
"d 4\n",
"e 5"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a\n",
"b \n",
"a 1\n",
"b 2\n",
"c 3"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.loc[\"a\":\"c\", :]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.at[\"a\", \"a\"]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>a</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>b</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>c</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>d</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>e</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" b a\n",
"0 a 1\n",
"1 b 2\n",
"2 c 3\n",
"3 d 4\n",
"4 e 5"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>e</th>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a\n",
"b \n",
"a 1\n",
"b 2\n",
"c 3\n",
"d 4\n",
"e 5"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>a</th>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>a</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>b</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" b a\n",
"c \n",
"1 a 1\n",
"1 b 2"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = data.reset_index()\n",
"\n",
"data[\"c\"] = [1, 1, 2, 2, 3]\n",
"data = data.set_index(\"c\")\n",
"data.loc[1, :]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>b</td>\n",
" <td>A</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>2022-01-02</td>\n",
" <td>1 days 03:00:00</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>2022-01-03</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>d</td>\n",
" <td>B</td>\n",
" <td>(4.5, 6.0]</td>\n",
" <td>2022-01-04</td>\n",
" <td>1 days 09:00:00</td>\n",
" <td>2021Q4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>e</td>\n",
" <td>B</td>\n",
" <td>(6.0, 7.5]</td>\n",
" <td>2022-01-05</td>\n",
" <td>1 days 12:00:00</td>\n",
" <td>2022Q1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval datetime timedelta period\n",
"0 a A (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n",
"1 b A (1.5, 3.0] 2022-01-02 1 days 03:00:00 2021Q2\n",
"2 c B (3.0, 4.5] 2022-01-03 1 days 06:00:00 2021Q3\n",
"3 d B (4.5, 6.0] 2022-01-04 1 days 09:00:00 2021Q4\n",
"4 e B (6.0, 7.5] 2022-01-05 1 days 12:00:00 2022Q1"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame(\n",
" {\n",
" \"index\": \"a b c d e\".split(),\n",
" \"category\": pd.Categorical([*[\"A\"]*2, *[\"B\"]*3]),\n",
" \"interval\": pd.interval_range(start=0, end=7.5, freq=1.5),\n",
" \"datetime\": pd.date_range(\"20220101\", periods=5, freq=\"D\"),\n",
" \"timedelta\": pd.timedelta_range(start=\"1 day\", periods=5, freq=\"3H\"),\n",
" \"period\": pd.period_range(\"20210101\", periods=5, freq=\"Q\"),\n",
" }\n",
")\n",
"data.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 基础索引类型"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='index')"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# base index\n",
"base = data.set_index(\"index\")\n",
"base.index"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>B</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>2022-01-03</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" category interval datetime timedelta period\n",
"index \n",
"a A (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n",
"c B (3.0, 4.5] 2022-01-03 1 days 06:00:00 2021Q3"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base.loc[[\"a\", \"c\"], :]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'B', 'C', 'D', 'E'], dtype='object', name='index')"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base.index.str.upper()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'b', 'C', 'd', 'e'], dtype='object', name='index')"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base.index.map(lambda s: s.upper() if s in list(\"ac\") else s)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 范围索引类型"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=5, step=1)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# range index\n",
"data.index"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.index.start"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"5"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.index.stop"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 区间索引类型"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>interval</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>(0.0, 1.5]</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>(1.5, 3.0]</th>\n",
" <td>b</td>\n",
" <td>A</td>\n",
" <td>2022-01-02</td>\n",
" <td>1 days 03:00:00</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>(3.0, 4.5]</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>2022-01-03</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>(4.5, 6.0]</th>\n",
" <td>d</td>\n",
" <td>B</td>\n",
" <td>2022-01-04</td>\n",
" <td>1 days 09:00:00</td>\n",
" <td>2021Q4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>(6.0, 7.5]</th>\n",
" <td>e</td>\n",
" <td>B</td>\n",
" <td>2022-01-05</td>\n",
" <td>1 days 12:00:00</td>\n",
" <td>2022Q1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category datetime timedelta period\n",
"interval \n",
"(0.0, 1.5] a A 2022-01-01 1 days 00:00:00 2021Q1\n",
"(1.5, 3.0] b A 2022-01-02 1 days 03:00:00 2021Q2\n",
"(3.0, 4.5] c B 2022-01-03 1 days 06:00:00 2021Q3\n",
"(4.5, 6.0] d B 2022-01-04 1 days 09:00:00 2021Q4\n",
"(6.0, 7.5] e B 2022-01-05 1 days 12:00:00 2022Q1"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interval = data.set_index(\"interval\")\n",
"interval.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0], (6.0, 7.5]], dtype='interval[float64, right]', name='interval')"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interval.index"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>interval</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>(0.0, 1.5]</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>(3.0, 4.5]</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>2022-01-03</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category datetime timedelta period\n",
"interval \n",
"(0.0, 1.5] a A 2022-01-01 1 days 00:00:00 2021Q1\n",
"(3.0, 4.5] c B 2022-01-03 1 days 06:00:00 2021Q3"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interval.loc[[1.5, 3.2]]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>interval</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>(0.0, 1.5]</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>(3.0, 4.5]</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>2022-01-03</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category datetime timedelta period\n",
"interval \n",
"(0.0, 1.5] a A 2022-01-01 1 days 00:00:00 2021Q1\n",
"(3.0, 4.5] c B 2022-01-03 1 days 06:00:00 2021Q3"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"interval.loc[[pd.Interval(0, 1.5), pd.Interval(3, 4.5)], :]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 类别索引类型"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>category</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>a</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>b</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>2022-01-02</td>\n",
" <td>1 days 03:00:00</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>c</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>2022-01-03</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>d</td>\n",
" <td>(4.5, 6.0]</td>\n",
" <td>2022-01-04</td>\n",
" <td>1 days 09:00:00</td>\n",
" <td>2021Q4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>e</td>\n",
" <td>(6.0, 7.5]</td>\n",
" <td>2022-01-05</td>\n",
" <td>1 days 12:00:00</td>\n",
" <td>2022Q1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index interval datetime timedelta period\n",
"category \n",
"A a (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n",
"A b (1.5, 3.0] 2022-01-02 1 days 03:00:00 2021Q2\n",
"B c (3.0, 4.5] 2022-01-03 1 days 06:00:00 2021Q3\n",
"B d (4.5, 6.0] 2022-01-04 1 days 09:00:00 2021Q4\n",
"B e (6.0, 7.5] 2022-01-05 1 days 12:00:00 2022Q1"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# categorical index\n",
"cat = data.set_index(\"category\")\n",
"cat.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"CategoricalIndex(['A', 'A', 'B', 'B', 'B'], categories=['A', 'B'], ordered=False, dtype='category', name='category')"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat.index"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>category</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>a</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>b</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>2022-01-02</td>\n",
" <td>1 days 03:00:00</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index interval datetime timedelta period\n",
"category \n",
"A a (0.0, 1.5] 2022-01-01 1 days 00:00:00 2021Q1\n",
"A b (1.5, 3.0] 2022-01-02 1 days 03:00:00 2021Q2"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat.loc[\"A\", :]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'B'], dtype='object')"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat.index.categories"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 0, 1, 1, 1], dtype=int8)"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat.index.codes"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"CategoricalIndex(['LEVEL-A', 'LEVEL-A', 'LEVEL-B', 'LEVEL-B', 'LEVEL-B'], categories=['LEVEL-A', 'LEVEL-B'], ordered=False, dtype='category', name='category')"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat.index.rename_categories(lambda c: f\"LEVEL-{c}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 日期时间索引类型"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2022-01-01</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-02</th>\n",
" <td>b</td>\n",
" <td>A</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>1 days 03:00:00</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-03</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-04</th>\n",
" <td>d</td>\n",
" <td>B</td>\n",
" <td>(4.5, 6.0]</td>\n",
" <td>1 days 09:00:00</td>\n",
" <td>2021Q4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-05</th>\n",
" <td>e</td>\n",
" <td>B</td>\n",
" <td>(6.0, 7.5]</td>\n",
" <td>1 days 12:00:00</td>\n",
" <td>2022Q1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval timedelta period\n",
"datetime \n",
"2022-01-01 a A (0.0, 1.5] 1 days 00:00:00 2021Q1\n",
"2022-01-02 b A (1.5, 3.0] 1 days 03:00:00 2021Q2\n",
"2022-01-03 c B (3.0, 4.5] 1 days 06:00:00 2021Q3\n",
"2022-01-04 d B (4.5, 6.0] 1 days 09:00:00 2021Q4\n",
"2022-01-05 e B (6.0, 7.5] 1 days 12:00:00 2022Q1"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# datetime index\n",
"dt = data.set_index(\"datetime\")\n",
"dt.head()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',\n",
" '2022-01-05'],\n",
" dtype='datetime64[ns]', name='datetime', freq=None)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt.index"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2022-01-01</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-02</th>\n",
" <td>b</td>\n",
" <td>A</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>1 days 03:00:00</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-03</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval timedelta period\n",
"datetime \n",
"2022-01-01 a A (0.0, 1.5] 1 days 00:00:00 2021Q1\n",
"2022-01-02 b A (1.5, 3.0] 1 days 03:00:00 2021Q2\n",
"2022-01-03 c B (3.0, 4.5] 1 days 06:00:00 2021Q3"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt.loc[\"20220101\":\"20220103\", :]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>timedelta</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2022-01-01</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>1 days 00:00:00</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-02</th>\n",
" <td>b</td>\n",
" <td>A</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>1 days 03:00:00</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022-01-03</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>1 days 06:00:00</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval timedelta period\n",
"datetime \n",
"2022-01-01 a A (0.0, 1.5] 1 days 00:00:00 2021Q1\n",
"2022-01-02 b A (1.5, 3.0] 1 days 03:00:00 2021Q2\n",
"2022-01-03 c B (3.0, 4.5] 1 days 06:00:00 2021Q3"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt.loc[\"2022-01-01\":\"2022-01-03\", :]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Int64Index([1, 2, 3, 4, 5], dtype='int64', name='datetime')"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt.index.day"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['2022/01/01', '2022/01/02', '2022/01/03', '2022/01/04', '2022/01/05'], dtype='object', name='datetime')"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt.index.strftime(\"%Y/%m%/%d\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 时间差索引类型"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>timedelta</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1 days 00:00:00</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 days 03:00:00</th>\n",
" <td>b</td>\n",
" <td>A</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>2022-01-02</td>\n",
" <td>2021Q2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 days 06:00:00</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>2022-01-03</td>\n",
" <td>2021Q3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 days 09:00:00</th>\n",
" <td>d</td>\n",
" <td>B</td>\n",
" <td>(4.5, 6.0]</td>\n",
" <td>2022-01-04</td>\n",
" <td>2021Q4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 days 12:00:00</th>\n",
" <td>e</td>\n",
" <td>B</td>\n",
" <td>(6.0, 7.5]</td>\n",
" <td>2022-01-05</td>\n",
" <td>2022Q1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval datetime period\n",
"timedelta \n",
"1 days 00:00:00 a A (0.0, 1.5] 2022-01-01 2021Q1\n",
"1 days 03:00:00 b A (1.5, 3.0] 2022-01-02 2021Q2\n",
"1 days 06:00:00 c B (3.0, 4.5] 2022-01-03 2021Q3\n",
"1 days 09:00:00 d B (4.5, 6.0] 2022-01-04 2021Q4\n",
"1 days 12:00:00 e B (6.0, 7.5] 2022-01-05 2022Q1"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# timedelta index\n",
"delta = data.set_index(\"timedelta\")\n",
"delta.head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"TimedeltaIndex(['1 days 00:00:00', '1 days 03:00:00', '1 days 06:00:00',\n",
" '1 days 09:00:00', '1 days 12:00:00'],\n",
" dtype='timedelta64[ns]', name='timedelta', freq=None)"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"delta.index"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2021-12-31 00:00:00', '2021-12-31 21:00:00',\n",
" '2022-01-01 18:00:00', '2022-01-02 15:00:00',\n",
" '2022-01-03 12:00:00'],\n",
" dtype='datetime64[ns]', freq=None)"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt.index - delta.index"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Int64Index([0, 10800, 21600, 32400, 43200], dtype='int64', name='timedelta')"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"delta.index.seconds"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>days</th>\n",
" <th>hours</th>\n",
" <th>minutes</th>\n",
" <th>seconds</th>\n",
" <th>milliseconds</th>\n",
" <th>microseconds</th>\n",
" <th>nanoseconds</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" days hours minutes seconds milliseconds microseconds nanoseconds\n",
"0 1 0 0 0 0 0 0\n",
"1 1 3 0 0 0 0 0\n",
"2 1 6 0 0 0 0 0\n",
"3 1 9 0 0 0 0 0\n",
"4 1 12 0 0 0 0 0"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"delta.index.components"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>period</th>\n",
" </tr>\n",
" <tr>\n",
" <th>timedelta</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1 days 00:00:00</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>2021Q1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 days 09:00:00</th>\n",
" <td>d</td>\n",
" <td>B</td>\n",
" <td>(4.5, 6.0]</td>\n",
" <td>2022-01-04</td>\n",
" <td>2021Q4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval datetime period\n",
"timedelta \n",
"1 days 00:00:00 a A (0.0, 1.5] 2022-01-01 2021Q1\n",
"1 days 09:00:00 d B (4.5, 6.0] 2022-01-04 2021Q4"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"delta.loc[[pd.Timedelta(\"1d\"), pd.Timedelta(\"1d 9h\")], :]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 周期索引类型"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" </tr>\n",
" <tr>\n",
" <th>period</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2021Q1</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021Q2</th>\n",
" <td>b</td>\n",
" <td>A</td>\n",
" <td>(1.5, 3.0]</td>\n",
" <td>2022-01-02</td>\n",
" <td>1 days 03:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021Q3</th>\n",
" <td>c</td>\n",
" <td>B</td>\n",
" <td>(3.0, 4.5]</td>\n",
" <td>2022-01-03</td>\n",
" <td>1 days 06:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021Q4</th>\n",
" <td>d</td>\n",
" <td>B</td>\n",
" <td>(4.5, 6.0]</td>\n",
" <td>2022-01-04</td>\n",
" <td>1 days 09:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2022Q1</th>\n",
" <td>e</td>\n",
" <td>B</td>\n",
" <td>(6.0, 7.5]</td>\n",
" <td>2022-01-05</td>\n",
" <td>1 days 12:00:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval datetime timedelta\n",
"period \n",
"2021Q1 a A (0.0, 1.5] 2022-01-01 1 days 00:00:00\n",
"2021Q2 b A (1.5, 3.0] 2022-01-02 1 days 03:00:00\n",
"2021Q3 c B (3.0, 4.5] 2022-01-03 1 days 06:00:00\n",
"2021Q4 d B (4.5, 6.0] 2022-01-04 1 days 09:00:00\n",
"2022Q1 e B (6.0, 7.5] 2022-01-05 1 days 12:00:00"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# period index\n",
"period = data.set_index(\"period\")\n",
"period.head()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"PeriodIndex(['2021Q1', '2021Q2', '2021Q3', '2021Q4', '2022Q1'], dtype='period[Q-DEC]', name='period')"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"period.index"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>category</th>\n",
" <th>interval</th>\n",
" <th>datetime</th>\n",
" <th>timedelta</th>\n",
" </tr>\n",
" <tr>\n",
" <th>period</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2021Q4</th>\n",
" <td>d</td>\n",
" <td>B</td>\n",
" <td>(4.5, 6.0]</td>\n",
" <td>2022-01-04</td>\n",
" <td>1 days 09:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2021Q1</th>\n",
" <td>a</td>\n",
" <td>A</td>\n",
" <td>(0.0, 1.5]</td>\n",
" <td>2022-01-01</td>\n",
" <td>1 days 00:00:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index category interval datetime timedelta\n",
"period \n",
"2021Q4 d B (4.5, 6.0] 2022-01-04 1 days 09:00:00\n",
"2021Q1 a A (0.0, 1.5] 2022-01-01 1 days 00:00:00"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"period.loc[[\"2021Q4\", \"2021Q1\"], :]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2021-01-01', '2021-04-01', '2021-07-01', '2021-10-01',\n",
" '2022-01-01'],\n",
" dtype='datetime64[ns]', name='period', freq='QS-OCT')"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"period.index.start_time"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Int64Index([1, 2, 3, 4, 1], dtype='int64', name='period')"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"period.index.quarter"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f"
},
"kernelspec": {
"display_name": "Python 3.9.0 ('pandas-startup')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}