feat: 新增Vol07章节示例源码
This commit is contained in:
280
code/07/regexp.ipynb
Normal file
280
code/07/regexp.ipynb
Normal file
@@ -0,0 +1,280 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 正则表达式基本语法"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 字符"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 精确匹配\n",
|
||||
"\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"text = \"Hello, world\"\n",
|
||||
"pattern = \"world\"\n",
|
||||
"print(re.findall(pattern, text))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 模糊匹配\n",
|
||||
"\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"text = \"Hello, world\"\n",
|
||||
"pattern = \".\"\n",
|
||||
"print(re.findall(pattern, text))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 量词"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"text = \"fooraskdsororaskaaaaadsooo\"\n",
|
||||
"print(\n",
|
||||
" \"{n} pattern: \", re.findall(r\"a{3}\", text), \"\\n\",\n",
|
||||
" \"{n,} pattern: \", re.findall(r\"a{1,}\", text), \"\\n\",\n",
|
||||
" \"{n, m} pattern: \", re.findall(r\"a{1,3}\", text), \"\\n\",\n",
|
||||
" sep=\"\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"text = \"fooraskdsororaskaaaaadsooo\"\n",
|
||||
"print(\n",
|
||||
" \"? pattern: \", re.findall(r\"a?\", text), \"\\n\",\n",
|
||||
" \"* pattern: \", re.findall(r\"a*\", text), \"\\n\",\n",
|
||||
" \"+ pattern: \", re.findall(r\"a+\", text), \"\\n\",\n",
|
||||
" sep=\"\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 条件"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# anchor\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"poetry = \"\"\"The Zen of Python\n",
|
||||
"Beautiful is better than ugly.\n",
|
||||
"Explicit is better than implicit.\n",
|
||||
"Simple is better than complex.\n",
|
||||
"Complex is better than complicated.\n",
|
||||
"Flat is better than nested.\n",
|
||||
"Sparse is better than dense.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"print(f'Start with \"S\": {re.findall(r\"^S.*\", poetry, re.MULTILINE)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Anti Characters:', re.findall(r\"[^\\nA-Z].*\", poetry, re.MULTILINE))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f'End with \"ed.\": {re.findall(r\".*ed.$\", poetry, re.MULTILINE)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# or\n",
|
||||
"\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"text = \"\"\"\n",
|
||||
"http://www.google.com\n",
|
||||
"https://www.google.com\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"print(f\"Match http or https url: {re.findall(r'http.*|https.*', text)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# group and sub string\n",
|
||||
"\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"poetry = \"\"\"The Zen of Python\n",
|
||||
"Beautiful is better than ugly.\n",
|
||||
"Explicit is better than implicit.\n",
|
||||
"Simple is better than complex.\n",
|
||||
"Complex is better than complicated.\n",
|
||||
"Flat is better than nested.\n",
|
||||
"Sparse is better than dense.\n",
|
||||
"\"\"\"\n",
|
||||
"groups = re.findall(r\"([A-Z].*?) is better than (.*).\", poetry, re.MULTILINE)\n",
|
||||
"for group in groups:\n",
|
||||
" start = group[0].lower()\n",
|
||||
" end = group[1]\n",
|
||||
" print(f\"{start} -> {end}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 在 Python 中使用正则表达式"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"poetry = \"\"\"The Zen of Python\n",
|
||||
"Beautiful is better than ugly.\n",
|
||||
"Explicit is better than implicit.\n",
|
||||
"Simple is better than complex.\n",
|
||||
"Complex is better than complicated.\n",
|
||||
"Flat is better than nested.\n",
|
||||
"Sparse is better than dense.\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"print(f're.match: ', re.match(r'^[Tt]he.*', poetry, re.MULTILINE).group())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f're.search: ' , re.search(r'(?P<beautiful>^B.+) is', poetry, re.MULTILINE).group(\"beautiful\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f're.findall: ', re.findall(r'(?P<Title>^[A-Z].+) is', poetry, re.MULTILINE))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f're.split: ', re.split(r' is better than |[\\n\\.]', poetry))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f're.sub: ', re.sub(r'\\s?is better than\\s?', ' -> ', poetry))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pattern object\n",
|
||||
"\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"dates = \"\"\"\n",
|
||||
"1999/12/31\n",
|
||||
"2000/1/1\n",
|
||||
"2000/1/2\n",
|
||||
"2000/1/3\n",
|
||||
"2000/1/4\n",
|
||||
"2000/1/5\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"pattern = re.compile(r'(?P<year>\\d{4})/(?P<month>\\d{1,2})/(?P<day>\\d{1,2})', re.MULTILINE)\n",
|
||||
"for group in pattern.finditer(dates):\n",
|
||||
" print(group.groupdict())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user