In [1]:
# -*- coding:utf-8 -*-

import pandas as pd

In [2]:
data = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
data.head()

Unnamed: 0,a
0,1
1,2
2,3
3,4
4,5


In [3]:
data["a"].head()

0    1
1    2
2    3
3    4
4    5
Name: a, dtype: int64

In [4]:
data.index

RangeIndex(start=0, stop=5, step=1)

In [5]:
data["a"].index

RangeIndex(start=0, stop=5, step=1)

In [6]:
data.loc[1:3, :]

Unnamed: 0,a
1,2
2,3
3,4


In [7]:
data.at[3, "a"]

4

In [8]:
data["b"] = list("abcde")

In [9]:
data.set_index("b", inplace=True)

In [10]:
data.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='b')

In [11]:
data["a"].index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='b')

In [12]:
data.head()

Unnamed: 0_level_0,a
b,Unnamed: 1_level_1
a,1
b,2
c,3
d,4
e,5


In [13]:
data.loc["a":"c", :]

Unnamed: 0_level_0,a
b,Unnamed: 1_level_1
a,1
b,2
c,3


In [14]:
data.at["a", "a"]

1

In [15]:
data.reset_index()

Unnamed: 0,b,a
0,a,1
1,b,2
2,c,3
3,d,4
4,e,5


In [16]:
data.head()

Unnamed: 0_level_0,a
b,Unnamed: 1_level_1
a,1
b,2
c,3
d,4
e,5


In [17]:
data = data.reset_index()

data["c"] = [1, 1, 2, 2, 3]
data = data.set_index("c")
data.loc[1, :]

Unnamed: 0_level_0,b,a
c,Unnamed: 1_level_1,Unnamed: 2_level_1
1,a,1
1,b,2


In [18]:
data = pd.DataFrame(
    {
        "index": "a b c d e".split(),
        "category": pd.Categorical([*["A"]*2, *["B"]*3]),
        "interval": pd.interval_range(start=0, end=7.5, freq=1.5),
        "datetime": pd.date_range("20220101", periods=5, freq="D"),
        "timedelta": pd.timedelta_range(start="1 day", periods=5, freq="3H"),
        "period": pd.period_range("20210101", periods=5, freq="Q"),
    }
)
data.head()

Unnamed: 0,index,category,interval,datetime,timedelta,period
0,a,A,"(0.0, 1.5]",2022-01-01,1 days 00:00:00,2021Q1
1,b,A,"(1.5, 3.0]",2022-01-02,1 days 03:00:00,2021Q2
2,c,B,"(3.0, 4.5]",2022-01-03,1 days 06:00:00,2021Q3
3,d,B,"(4.5, 6.0]",2022-01-04,1 days 09:00:00,2021Q4
4,e,B,"(6.0, 7.5]",2022-01-05,1 days 12:00:00,2022Q1


## 基础索引类型

In [19]:
# base index
base = data.set_index("index")
base.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object', name='index')

In [20]:
base.loc[["a", "c"], :]

Unnamed: 0_level_0,category,interval,datetime,timedelta,period
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
a,A,"(0.0, 1.5]",2022-01-01,1 days 00:00:00,2021Q1
c,B,"(3.0, 4.5]",2022-01-03,1 days 06:00:00,2021Q3


In [21]:
base.index.str.upper()

Index(['A', 'B', 'C', 'D', 'E'], dtype='object', name='index')

In [22]:
base.index.map(lambda s: s.upper() if s in list("ac") else s)

Index(['A', 'b', 'C', 'd', 'e'], dtype='object', name='index')

## 范围索引类型

In [23]:
# range index
data.index

RangeIndex(start=0, stop=5, step=1)

In [24]:
data.index.start

0

In [25]:
data.index.stop

5

## 区间索引类型

In [26]:
interval = data.set_index("interval")
interval.head()

Unnamed: 0_level_0,index,category,datetime,timedelta,period
interval,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0.0, 1.5]",a,A,2022-01-01,1 days 00:00:00,2021Q1
"(1.5, 3.0]",b,A,2022-01-02,1 days 03:00:00,2021Q2
"(3.0, 4.5]",c,B,2022-01-03,1 days 06:00:00,2021Q3
"(4.5, 6.0]",d,B,2022-01-04,1 days 09:00:00,2021Q4
"(6.0, 7.5]",e,B,2022-01-05,1 days 12:00:00,2022Q1


In [27]:
interval.index

IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0], (6.0, 7.5]], dtype='interval[float64, right]', name='interval')

In [28]:
interval.loc[[1.5, 3.2]]

Unnamed: 0_level_0,index,category,datetime,timedelta,period
interval,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0.0, 1.5]",a,A,2022-01-01,1 days 00:00:00,2021Q1
"(3.0, 4.5]",c,B,2022-01-03,1 days 06:00:00,2021Q3


In [29]:
interval.loc[[pd.Interval(0, 1.5), pd.Interval(3, 4.5)], :]

Unnamed: 0_level_0,index,category,datetime,timedelta,period
interval,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0.0, 1.5]",a,A,2022-01-01,1 days 00:00:00,2021Q1
"(3.0, 4.5]",c,B,2022-01-03,1 days 06:00:00,2021Q3


## 类别索引类型

In [30]:
# categorical index
cat = data.set_index("category")
cat.head()

Unnamed: 0_level_0,index,interval,datetime,timedelta,period
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,a,"(0.0, 1.5]",2022-01-01,1 days 00:00:00,2021Q1
A,b,"(1.5, 3.0]",2022-01-02,1 days 03:00:00,2021Q2
B,c,"(3.0, 4.5]",2022-01-03,1 days 06:00:00,2021Q3
B,d,"(4.5, 6.0]",2022-01-04,1 days 09:00:00,2021Q4
B,e,"(6.0, 7.5]",2022-01-05,1 days 12:00:00,2022Q1


In [31]:
cat.index

CategoricalIndex(['A', 'A', 'B', 'B', 'B'], categories=['A', 'B'], ordered=False, dtype='category', name='category')

In [32]:
cat.loc["A", :]

Unnamed: 0_level_0,index,interval,datetime,timedelta,period
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,a,"(0.0, 1.5]",2022-01-01,1 days 00:00:00,2021Q1
A,b,"(1.5, 3.0]",2022-01-02,1 days 03:00:00,2021Q2


In [33]:
cat.index.categories

Index(['A', 'B'], dtype='object')

In [34]:
cat.index.codes

array([0, 0, 1, 1, 1], dtype=int8)

In [35]:
cat.index.rename_categories(lambda c: f"LEVEL-{c}")

CategoricalIndex(['LEVEL-A', 'LEVEL-A', 'LEVEL-B', 'LEVEL-B', 'LEVEL-B'], categories=['LEVEL-A', 'LEVEL-B'], ordered=False, dtype='category', name='category')

## 日期时间索引类型

In [36]:
# datetime index
dt = data.set_index("datetime")
dt.head()

Unnamed: 0_level_0,index,category,interval,timedelta,period
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,a,A,"(0.0, 1.5]",1 days 00:00:00,2021Q1
2022-01-02,b,A,"(1.5, 3.0]",1 days 03:00:00,2021Q2
2022-01-03,c,B,"(3.0, 4.5]",1 days 06:00:00,2021Q3
2022-01-04,d,B,"(4.5, 6.0]",1 days 09:00:00,2021Q4
2022-01-05,e,B,"(6.0, 7.5]",1 days 12:00:00,2022Q1


In [37]:
dt.index

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05'],
              dtype='datetime64[ns]', name='datetime', freq=None)

In [38]:
dt.loc["20220101":"20220103", :]

Unnamed: 0_level_0,index,category,interval,timedelta,period
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,a,A,"(0.0, 1.5]",1 days 00:00:00,2021Q1
2022-01-02,b,A,"(1.5, 3.0]",1 days 03:00:00,2021Q2
2022-01-03,c,B,"(3.0, 4.5]",1 days 06:00:00,2021Q3


In [39]:
dt.loc["2022-01-01":"2022-01-03", :]

Unnamed: 0_level_0,index,category,interval,timedelta,period
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-01-01,a,A,"(0.0, 1.5]",1 days 00:00:00,2021Q1
2022-01-02,b,A,"(1.5, 3.0]",1 days 03:00:00,2021Q2
2022-01-03,c,B,"(3.0, 4.5]",1 days 06:00:00,2021Q3


In [40]:
dt.index.day

Int64Index([1, 2, 3, 4, 5], dtype='int64', name='datetime')

In [41]:
dt.index.strftime("%Y/%m%/%d")

Index(['2022/01/01', '2022/01/02', '2022/01/03', '2022/01/04', '2022/01/05'], dtype='object', name='datetime')

## 时间差索引类型

In [42]:
# timedelta index
delta = data.set_index("timedelta")
delta.head()

Unnamed: 0_level_0,index,category,interval,datetime,period
timedelta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1 days 00:00:00,a,A,"(0.0, 1.5]",2022-01-01,2021Q1
1 days 03:00:00,b,A,"(1.5, 3.0]",2022-01-02,2021Q2
1 days 06:00:00,c,B,"(3.0, 4.5]",2022-01-03,2021Q3
1 days 09:00:00,d,B,"(4.5, 6.0]",2022-01-04,2021Q4
1 days 12:00:00,e,B,"(6.0, 7.5]",2022-01-05,2022Q1


In [43]:
delta.index

TimedeltaIndex(['1 days 00:00:00', '1 days 03:00:00', '1 days 06:00:00',
                '1 days 09:00:00', '1 days 12:00:00'],
               dtype='timedelta64[ns]', name='timedelta', freq=None)

In [44]:
dt.index - delta.index

DatetimeIndex(['2021-12-31 00:00:00', '2021-12-31 21:00:00',
               '2022-01-01 18:00:00', '2022-01-02 15:00:00',
               '2022-01-03 12:00:00'],
              dtype='datetime64[ns]', freq=None)

In [45]:
delta.index.seconds

Int64Index([0, 10800, 21600, 32400, 43200], dtype='int64', name='timedelta')

In [46]:
delta.index.components

Unnamed: 0,days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds
0,1,0,0,0,0,0,0
1,1,3,0,0,0,0,0
2,1,6,0,0,0,0,0
3,1,9,0,0,0,0,0
4,1,12,0,0,0,0,0


In [47]:
delta.loc[[pd.Timedelta("1d"), pd.Timedelta("1d 9h")], :]

Unnamed: 0_level_0,index,category,interval,datetime,period
timedelta,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1 days 00:00:00,a,A,"(0.0, 1.5]",2022-01-01,2021Q1
1 days 09:00:00,d,B,"(4.5, 6.0]",2022-01-04,2021Q4


## 周期索引类型

In [48]:
# period index
period = data.set_index("period")
period.head()

Unnamed: 0_level_0,index,category,interval,datetime,timedelta
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021Q1,a,A,"(0.0, 1.5]",2022-01-01,1 days 00:00:00
2021Q2,b,A,"(1.5, 3.0]",2022-01-02,1 days 03:00:00
2021Q3,c,B,"(3.0, 4.5]",2022-01-03,1 days 06:00:00
2021Q4,d,B,"(4.5, 6.0]",2022-01-04,1 days 09:00:00
2022Q1,e,B,"(6.0, 7.5]",2022-01-05,1 days 12:00:00


In [49]:
period.index

PeriodIndex(['2021Q1', '2021Q2', '2021Q3', '2021Q4', '2022Q1'], dtype='period[Q-DEC]', name='period')

In [50]:
period.loc[["2021Q4", "2021Q1"], :]

Unnamed: 0_level_0,index,category,interval,datetime,timedelta
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021Q4,d,B,"(4.5, 6.0]",2022-01-04,1 days 09:00:00
2021Q1,a,A,"(0.0, 1.5]",2022-01-01,1 days 00:00:00


In [51]:
period.index.start_time

DatetimeIndex(['2021-01-01', '2021-04-01', '2021-07-01', '2021-10-01',
               '2022-01-01'],
              dtype='datetime64[ns]', name='period', freq='QS-OCT')

In [52]:
period.index.quarter

Int64Index([1, 2, 3, 4, 1], dtype='int64', name='period')