From fead78da030450be542cfe81779c51a6d04ba52a Mon Sep 17 00:00:00 2001
From: 100gle <569590461@qq.com>
Date: Thu, 12 May 2022 11:18:31 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9EVol07=E7=AB=A0?=
 =?UTF-8?q?=E8=8A=82=E7=A4=BA=E4=BE=8B=E6=BA=90=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 code/07/regexp.ipynb | 280 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 280 insertions(+)
 create mode 100644 code/07/regexp.ipynb

diff --git a/code/07/regexp.ipynb b/code/07/regexp.ipynb
new file mode 100644
index 0000000..2956d1d
--- /dev/null
+++ b/code/07/regexp.ipynb
@@ -0,0 +1,280 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 正则表达式基本语法"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 字符"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 精确匹配\n",
+    "\n",
+    "import re\n",
+    "\n",
+    "text = \"Hello, world\"\n",
+    "pattern = \"world\"\n",
+    "print(re.findall(pattern, text))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 模糊匹配\n",
+    "\n",
+    "import re\n",
+    "\n",
+    "text = \"Hello, world\"\n",
+    "pattern = \".\"\n",
+    "print(re.findall(pattern, text))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 量词"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "text = \"fooraskdsororaskaaaaadsooo\"\n",
+    "print(\n",
+    "    \"{n} pattern: \", re.findall(r\"a{3}\", text), \"\\n\",\n",
+    "    \"{n,} pattern: \", re.findall(r\"a{1,}\", text), \"\\n\",\n",
+    "    \"{n, m} pattern: \", re.findall(r\"a{1,3}\", text), \"\\n\",\n",
+    "    sep=\"\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "text = \"fooraskdsororaskaaaaadsooo\"\n",
+    "print(\n",
+    "    \"? pattern: \", re.findall(r\"a?\", text), \"\\n\",\n",
+    "    \"* pattern: \", re.findall(r\"a*\", text), \"\\n\",\n",
+    "    \"+ pattern: \", re.findall(r\"a+\", text), \"\\n\",\n",
+    "    sep=\"\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 条件"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# anchor\n",
+    "import re\n",
+    "\n",
+    "poetry = \"\"\"The Zen of Python\n",
+    "Beautiful is better than ugly.\n",
+    "Explicit is better than implicit.\n",
+    "Simple is better than complex.\n",
+    "Complex is better than complicated.\n",
+    "Flat is better than nested.\n",
+    "Sparse is better than dense.\n",
+    "\"\"\"\n",
+    "\n",
+    "print(f'Start with \"S\": {re.findall(r\"^S.*\", poetry, re.MULTILINE)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('Anti Characters:', re.findall(r\"[^\\nA-Z].*\", poetry, re.MULTILINE))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'End with \"ed.\": {re.findall(r\".*ed.$\", poetry, re.MULTILINE)}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# or\n",
+    "\n",
+    "import re\n",
+    "\n",
+    "text = \"\"\"\n",
+    "http://www.google.com\n",
+    "https://www.google.com\n",
+    "\"\"\"\n",
+    "\n",
+    "print(f\"Match http or https url: {re.findall(r'http.*|https.*', text)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# group and sub string\n",
+    "\n",
+    "import re\n",
+    "\n",
+    "poetry = \"\"\"The Zen of Python\n",
+    "Beautiful is better than ugly.\n",
+    "Explicit is better than implicit.\n",
+    "Simple is better than complex.\n",
+    "Complex is better than complicated.\n",
+    "Flat is better than nested.\n",
+    "Sparse is better than dense.\n",
+    "\"\"\"\n",
+    "groups = re.findall(r\"([A-Z].*?) is better than (.*).\", poetry, re.MULTILINE)\n",
+    "for group in groups:\n",
+    "    start = group[0].lower()\n",
+    "    end = group[1]\n",
+    "    print(f\"{start} -> {end}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 在 Python 中使用正则表达式"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "poetry = \"\"\"The Zen of Python\n",
+    "Beautiful is better than ugly.\n",
+    "Explicit is better than implicit.\n",
+    "Simple is better than complex.\n",
+    "Complex is better than complicated.\n",
+    "Flat is better than nested.\n",
+    "Sparse is better than dense.\n",
+    "\"\"\"\n",
+    "\n",
+    "print(f're.match: ', re.match(r'^[Tt]he.*', poetry, re.MULTILINE).group())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f're.search: ' , re.search(r'(?P<beautiful>^B.+) is', poetry, re.MULTILINE).group(\"beautiful\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f're.findall: ', re.findall(r'(?P<Title>^[A-Z].+) is', poetry, re.MULTILINE))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f're.split: ', re.split(r' is better than |[\\n\\.]', poetry))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f're.sub: ', re.sub(r'\\s?is better than\\s?', ' -> ', poetry))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pattern object\n",
+    "\n",
+    "import re\n",
+    "\n",
+    "dates = \"\"\"\n",
+    "1999/12/31\n",
+    "2000/1/1\n",
+    "2000/1/2\n",
+    "2000/1/3\n",
+    "2000/1/4\n",
+    "2000/1/5\n",
+    "\"\"\"\n",
+    "\n",
+    "pattern = re.compile(r'(?P<year>\\d{4})/(?P<month>\\d{1,2})/(?P<day>\\d{1,2})', re.MULTILINE)\n",
+    "for group in pattern.finditer(dates):\n",
+    "    print(group.groupdict())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}