# 正则表达式基本语法

## 字符

In [None]:
# 精确匹配

import re

text = "Hello, world"
pattern = "world"
print(re.findall(pattern, text))

In [None]:
# 模糊匹配

import re

text = "Hello, world"
pattern = "."
print(re.findall(pattern, text))

## 量词

In [None]:
import re

text = "fooraskdsororaskaaaaadsooo"
print(
    "{n} pattern: ", re.findall(r"a{3}", text), "\n",
    "{n,} pattern: ", re.findall(r"a{1,}", text), "\n",
    "{n, m} pattern: ", re.findall(r"a{1,3}", text), "\n",
    sep="",
)

In [None]:
import re

text = "fooraskdsororaskaaaaadsooo"
print(
    "? pattern: ", re.findall(r"a?", text), "\n",
    "* pattern: ", re.findall(r"a*", text), "\n",
    "+ pattern: ", re.findall(r"a+", text), "\n",
    sep="",
)

## 条件

In [None]:
# anchor
import re

poetry = """The Zen of Python
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
"""

print(f'Start with "S": {re.findall(r"^S.*", poetry, re.MULTILINE)}')

In [None]:
print('Anti Characters:', re.findall(r"[^\nA-Z].*", poetry, re.MULTILINE))

In [None]:
print(f'End with "ed.": {re.findall(r".*ed.$", poetry, re.MULTILINE)}')

In [None]:
# or

import re

text = """
http://www.google.com
https://www.google.com
"""

print(f"Match http or https url: {re.findall(r'http.*|https.*', text)}")

In [None]:
# group and sub string

import re

poetry = """The Zen of Python
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
"""
groups = re.findall(r"([A-Z].*?) is better than (.*).", poetry, re.MULTILINE)
for group in groups:
    start = group[0].lower()
    end = group[1]
    print(f"{start} -> {end}")


# 在 Python 中使用正则表达式

In [None]:
import re

poetry = """The Zen of Python
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
"""

print(f're.match: ', re.match(r'^[Tt]he.*', poetry, re.MULTILINE).group())

In [None]:
print(f're.search: ' , re.search(r'(?P<beautiful>^B.+) is', poetry, re.MULTILINE).group("beautiful"))

In [None]:
print(f're.findall: ', re.findall(r'(?P<Title>^[A-Z].+) is', poetry, re.MULTILINE))

In [None]:
print(f're.split: ', re.split(r' is better than |[\n\.]', poetry))

In [None]:
print(f're.sub: ', re.sub(r'\s?is better than\s?', ' -> ', poetry))

In [None]:
# pattern object

import re

dates = """
1999/12/31
2000/1/1
2000/1/2
2000/1/3
2000/1/4
2000/1/5
"""

pattern = re.compile(r'(?P<year>\d{4})/(?P<month>\d{1,2})/(?P<day>\d{1,2})', re.MULTILINE)
for group in pattern.finditer(dates):
    print(group.groupdict())