# 什么是 Series？

In [148]:
import pandas as pd
data = [1, 2, 3, 4, 5]
series = pd.Series(data)
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [149]:
print(series.index)

RangeIndex(start=0, stop=5, step=1)


In [150]:
print(series.describe())

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64


# 常用的 Series 属性

In [151]:
import random

import pandas as pd

random.seed(233)

data = [random.randrange(1, 50) for _ in range(100)]
series = pd.Series(data, name="numbers")
series.head(5)

0    44
1    41
2    12
3    34
4    15
Name: numbers, dtype: int64

In [152]:
series.index

RangeIndex(start=0, stop=100, step=1)

In [153]:
print(list(series.index))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


In [154]:
series.dtype

dtype('int64')

In [155]:
series.shape

(100,)

In [156]:
series.size

100

In [157]:
series.name

'numbers'

In [158]:
series.values

array([44, 41, 12, 34, 15, 31, 36, 14,  6,  7, 18, 15, 45, 46,  1,  5, 44,
       16, 39, 46, 46, 15, 48, 19,  8, 31, 21, 14,  6, 19, 32, 34, 41, 10,
       40, 46, 22, 20,  7, 33, 29, 48, 18, 27,  3, 26, 48, 12,  4, 49,  5,
       49,  8, 14, 11, 23, 21, 48, 34, 34, 29,  9,  2, 30,  8, 45, 23, 46,
       43,  7, 45,  6, 37, 24,  4, 26, 20, 35,  3, 39, 33, 30, 26, 47,  1,
        5, 31, 23, 25,  9, 41, 29, 23, 20, 39, 48, 47,  9, 14, 32])

# 常用的 Series 方法

In [159]:
arr = [random.randrange(1, 50) for _ in range(100)]
arr = pd.Series(arr)

## 数学运算 / 统计

In [160]:
series.add(arr)

0     79
1     85
2     21
3     60
4     60
      ..
95    55
96    93
97    25
98    51
99    48
Length: 100, dtype: int64

In [161]:
series.sub(arr)

0      9
1     -3
2      3
3      8
4    -30
      ..
95    41
96     1
97    -7
98   -23
99    16
Length: 100, dtype: int64

In [162]:
series.mul(arr)

0     1540
1     1804
2      108
3      884
4      675
      ... 
95     336
96    2162
97     144
98     518
99     512
Length: 100, dtype: int64

In [163]:
series.div(arr)

0     1.257143
1     0.931818
2     1.333333
3     1.307692
4     0.333333
        ...   
95    6.857143
96    1.021739
97    0.562500
98    0.378378
99    2.000000
Length: 100, dtype: float64

In [164]:
series.max()

49

In [165]:
series.min()

1

In [166]:
series.median()

25.5

In [167]:
series.mean()

25.51

In [168]:
series.std()

14.990566056926294

## 函数应用

In [169]:
def mod2(x):
    if x % 2 == 0:
        return True
    return False


idx = series.map(mod2)
idx.head()

0     True
1    False
2     True
3     True
4    False
Name: numbers, dtype: bool

In [170]:
series[idx].head()

0    44
2    12
3    34
6    36
7    14
Name: numbers, dtype: int64

In [171]:
idx = series.apply(mod2)
idx.head()

0     True
1    False
2     True
3     True
4    False
Name: numbers, dtype: bool

In [172]:
series[idx].head()

0    44
2    12
3    34
6    36
7    14
Name: numbers, dtype: int64

## 特定类型接口

In [173]:
str_series = pd.Series(["apple", "orange", "banana", "grape"], name="fruit")

In [174]:
str_series.str.count("a")

0    1
1    1
2    3
3    1
Name: fruit, dtype: int64

In [175]:
str_series.str.len()

0    5
1    6
2    6
3    5
Name: fruit, dtype: int64

In [176]:
str_series.str.replace("[p]{2}", "XX")

0     aXXle
1    orange
2    banana
3     grape
Name: fruit, dtype: object

In [177]:
dt_series = pd.Series([20220101, 20220201, 20220301, 20220401], name="date")
dt_series = pd.to_datetime(dt_series, format="%Y-%m-%d")

In [178]:
dt_series.dtype

dtype('<M8[ns]')

In [179]:
dt_series.head()

0   1970-01-01 00:00:00.020220101
1   1970-01-01 00:00:00.020220201
2   1970-01-01 00:00:00.020220301
3   1970-01-01 00:00:00.020220401
Name: date, dtype: datetime64[ns]

In [180]:
dt_series.dt.month

0    1
1    1
2    1
3    1
Name: date, dtype: int64

In [181]:
dt_series.dt.dayofyear

0    1
1    1
2    1
3    1
Name: date, dtype: int64

In [182]:
dt_series.dt.weekday

0    3
1    3
2    3
3    3
Name: date, dtype: int64