# applymap

In [1]:
import pandas as pd

df = pd.DataFrame(
    dict(
        a=[[13, 3, 5, 6], [2, 4, 65, 6]],
        b=[[44, 5, 6, 88, 0.5, 1], ["22", 40, 100]],
    )
)
df.head()
df.applymap(lambda x: len(x))

Unnamed: 0,a,b
0,4,6
1,4,3


# pipe

In [2]:
%%bash
echo "
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense." | \
grep ed | \
tr -d "\n"

Complex is better than complicated.Flat is better than nested.

## 管道操作模拟

In [3]:
def echo():
    poetry = """
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense."
    """
    return poetry.strip()

In [4]:
def grep(content: str, pattern: str):
    import re

    filtered = []
    content = content.splitlines()
    for line in content:
        if re.search(pattern, line):
            filtered.append(line)

    return "".join(filtered)

In [5]:
def tr(content: str, delete: bool, char: str):
    final = []

    if delete:
        content = content.splitlines()
        for line in content:
            new_line = line.replace(char, "")
            final.append(new_line)
    if final:
        return "".join(final)

    return content

In [6]:
# pipe line
tr(
    content=grep(
        content=echo(),
        pattern="ed",
    ),
    delete=True,
    char="\n",
)

'Complex is better than complicated.Flat is better than nested.'

## DataFrame.pipe 示例

In [7]:
import pandas as pd

data = pd.DataFrame(
    dict(
        a=list("abcde"),
    )
)
data

Unnamed: 0,a
0,a
1,b
2,c
3,d
4,e


In [8]:
def parse(df: pd.DataFrame):
    length = df["a"].shape[0]
    df["b"] = list(range(1, length + 1))
    df["c"] = [v.upper() if v in ["a", "b", "c"] else None for v in df["a"]]

    return df

In [9]:
def upper(df: pd.DataFrame, cols=None):
    if cols:
        df[cols] = df[cols].astype(str).applymap(lambda s: s.upper())
        return df
    return df.applymap(lambda s: str(s).upper())

In [10]:
data.pipe(parse).pipe(upper)

Unnamed: 0,a,b,c
0,A,1,A
1,B,2,B
2,C,3,C
3,D,4,NONE
4,E,5,NONE


In [11]:
data.pipe(parse).pipe(upper, cols=["a"])

Unnamed: 0,a,b,c
0,A,1,A
1,B,2,B
2,C,3,C
3,D,4,
4,E,5,


# rename

In [12]:
import pandas as pd

data = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
df = data.copy(deep=True)  # 1
df.columns

Index(['a', 'b'], dtype='object')

In [13]:
mapper = ["A", "B"]
df.columns = mapper
df
df.columns

Index(['A', 'B'], dtype='object')

In [14]:
df = data.copy(deep=True)
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [15]:
cols = df.columns.tolist()  # 1
cols[cols.index("b")] = "B"  # 2
cols  # 3

['a', 'B']

In [16]:
df.columns = cols  # 4
df.columns

Index(['a', 'B'], dtype='object')

In [17]:
df = data.copy(deep=True)
df.rename(columns={"b": "B"})  # 1

Unnamed: 0,a,B
0,1,4
1,2,5
2,3,6


In [18]:
df.columns

Index(['a', 'b'], dtype='object')

In [19]:
df.rename(columns=lambda v: "B" if v == "b" else v)

Unnamed: 0,a,B
0,1,4
1,2,5
2,3,6


# filter

In [20]:
import random
import string

import pandas as pd

data = pd.DataFrame(
    dict(
        a1=random.choices(string.ascii_letters, k=5),
        a2=random.choices(string.ascii_letters, k=5),
        a3=random.choices(string.ascii_letters, k=5),
        b1=random.choices(string.ascii_letters, k=5),
        c1=random.choices(string.ascii_letters, k=5),
        d1=random.choices(string.ascii_letters, k=5),
        b2=random.choices(string.ascii_letters, k=5),
    )
)
data.head()

Unnamed: 0,a1,a2,a3,b1,c1,d1,b2
0,k,U,H,p,Z,p,G
1,z,N,A,c,B,W,B
2,X,R,B,z,K,I,B
3,Y,r,y,a,k,u,F
4,Y,H,Q,D,J,j,J


In [21]:
data.loc[:, ["a1", "b1"]]

Unnamed: 0,a1,b1
0,k,p
1,z,c
2,X,z
3,Y,a
4,Y,D


In [22]:
data[["a1", "b1"]]

Unnamed: 0,a1,b1
0,k,p
1,z,c
2,X,z
3,Y,a
4,Y,D


In [23]:
cols = data.columns.tolist()
print(cols)

['a1', 'a2', 'a3', 'b1', 'c1', 'd1', 'b2']


In [24]:
cols = [col for col in cols if col.endswith("1")]
print(cols)

['a1', 'b1', 'c1', 'd1']


In [25]:
data[cols]

Unnamed: 0,a1,b1,c1,d1
0,k,p,Z,p
1,z,c,B,W
2,X,z,K,I
3,Y,a,k,u
4,Y,D,J,j


In [26]:
data.filter(items=["a1", "b1"])

Unnamed: 0,a1,b1
0,k,p
1,z,c
2,X,z
3,Y,a
4,Y,D


In [27]:
data.filter(regex=r"^[a-d]1$")

Unnamed: 0,a1,b1,c1,d1
0,k,p,Z,p
1,z,c,B,W
2,X,z,K,I
3,Y,a,k,u
4,Y,D,J,j


In [28]:
data.filter(regex=r"^(a|b|c)[1-2]$")

Unnamed: 0,a1,a2,b1,c1,b2
0,k,U,p,Z,G
1,z,N,c,B,B
2,X,R,z,K,B
3,Y,r,a,k,F
4,Y,H,D,J,J


# assign

In [29]:
import pandas as pd

data = pd.DataFrame(
    dict(
        a=list("hello"),
    )
)
print(data.head())

   a
0  h
1  e
2  l
3  l
4  o


In [30]:
data["a"] = data["a"].str.upper()
data["b"] = list(range(1, 6))
print(data.head())

   a  b
0  H  1
1  E  2
2  L  3
3  L  4
4  O  5


In [31]:
data = pd.DataFrame(
    dict(
        a=list("hello"),
    )
)
data.assign(
    a=data["a"].str.upper(),
    b=list(range(1, 6)),
)

Unnamed: 0,a,b
0,H,1
1,E,2
2,L,3
3,L,4
4,O,5


# eval & query

In [32]:
import random
import string

import pandas as pd

random.seed(3.14)

data = pd.DataFrame(
    dict(
        uid=list(range(1, 11)),
        pid=random.choices(string.ascii_letters, k=10),
    )
)
data.head()

Unnamed: 0,uid,pid
0,1,O
1,2,L
2,3,k
3,4,F
4,5,Z


In [33]:
data.loc[data["uid"] % 2 == 0]

Unnamed: 0,uid,pid
1,2,L
3,4,F
5,6,u
7,8,V
9,10,m


In [34]:
# fmt:off
data.loc[
    (data["uid"] % 2 == 0)
    & (data["pid"].str.isupper())
    & (data["pid"] != "V")
]
# fmt:on

Unnamed: 0,uid,pid
1,2,L
3,4,F


In [35]:
cond = data.eval(
    """
    uid % 2 == 0 and \
    pid.str.isupper() and \
    pid != 'V'
    """
)
data.loc[cond]

Unnamed: 0,uid,pid
1,2,L
3,4,F


In [36]:
data.eval(
    """
    uid = uid.astype('str')
    code = uid.str.cat(pid, sep="_")
    is_vip = pid.str.isupper()
    """
)

Unnamed: 0,uid,pid,code,is_vip
0,1,O,1_O,True
1,2,L,2_L,True
2,3,k,3_k,False
3,4,F,4_F,True
4,5,Z,5_Z,True
5,6,u,6_u,False
6,7,t,7_t,False
7,8,V,8_V,True
8,9,x,9_x,False
9,10,m,10_m,False


In [37]:
data.query(
    """
    uid % 2 == 0 and \
    pid.str.isupper() and \
    pid != 'V'
    """
)

Unnamed: 0,uid,pid
1,2,L
3,4,F


In [38]:
data.query("""uid % 2 == 0 and pid.str.isupper() and pid != 'V'""")

Unnamed: 0,uid,pid
1,2,L
3,4,F
