From 55f588cf9302544fa63b7e510591956ef71fc948 Mon Sep 17 00:00:00 2001 From: 100gle <569590461@qq.com> Date: Tue, 7 Feb 2023 11:31:59 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E6=8A=80=E8=83=BD?= =?UTF-8?q?=E6=89=A9=E5=B1=95N13=E4=B8=80=E7=AB=A0=E7=9B=B8=E5=85=B3?= =?UTF-8?q?=E7=A4=BA=E4=BE=8B=E6=BA=90=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/newsletter/N13/faker.ipynb | 727 ++++++++++++++++++++++++ code/newsletter/N13/tests/conftest.py | 42 ++ code/newsletter/N13/tests/test_faker.py | 29 + 3 files changed, 798 insertions(+) create mode 100644 code/newsletter/N13/faker.ipynb create mode 100644 code/newsletter/N13/tests/conftest.py create mode 100644 code/newsletter/N13/tests/test_faker.py diff --git a/code/newsletter/N13/faker.ipynb b/code/newsletter/N13/faker.ipynb new file mode 100644 index 0000000..35b9ef4 --- /dev/null +++ b/code/newsletter/N13/faker.ipynb @@ -0,0 +1,727 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 快速生成假数据" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import faker\n", + "\n", + "fake = faker.Faker()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Paul Jones'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.name()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Hong Kong dollar'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.currency_name() " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Mosley, Stevens and Cuevas'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.company()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.providers" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Craig Daniels'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.providers[5].name()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 生成本土化数据" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import faker\n", + "fake = faker.Faker(locale=\"zh-CN\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'黄丽娟'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.name()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'13918155548'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.phone_number()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'佳禾网络有限公司'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.company()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'tao68@example.org'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.email()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 创建多个本土化版本" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "fake = faker.Faker(locale=[\"zh-CN\", \"ja-JP\", \"ko-KR\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'程畅'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake[\"zh-CN\"].name()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'近藤 稔'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake[\"ja-JP\"].name()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'구현우'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake[\"ko-KR\"].name()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['zh_CN', 'ja_JP', 'ko_KR']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.locales" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[,\n", + " ,\n", + " ]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.factories" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 使用社区生态中补充的 Provider" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment below code to install third-party provider\n", + "# !pip install faker-biology" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from faker import Faker\n", + "from faker_biology.bioseq import Bioseq\n", + "\n", + "fake = Faker()\n", + "fake.add_provider(Bioseq)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'TCGACCATCG'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.dna(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'UAAGGUCGGC'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.rna(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'MVGFAGGAENV'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.protein(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 如何实现自己的 Provider" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import faker\n", + "\n", + "from faker.providers import DynamicProvider\n", + "\n", + "fake = faker.Faker()\n", + "\n", + "locales = DynamicProvider(\n", + " provider_name=\"custom_locales\",\n", + " elements=[\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n", + ")\n", + "fake.add_provider(locales)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'ko-KR'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_locales()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import faker\n", + "from faker.providers import BaseProvider\n", + "\n", + "\n", + "class MyProvider(BaseProvider):\n", + " def custom_locales(self):\n", + " choices = [\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n", + " return choices\n", + "\n", + " def custom_programming_languages(self):\n", + " langs = [\"Python\", \"Java\", \"Golang\", \"JavaScript\", \"Swift\"]\n", + " return langs\n", + "\n", + " def custom_numbers(self, n: int = 10):\n", + " if n < 0:\n", + " raise ValueError(\"n must be greater than or equal to 0.\")\n", + "\n", + " return list(range(n))\n", + "\n", + "\n", + "fake = faker.Faker()\n", + "fake.add_provider(MyProvider)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['zh-CN', 'en-US', 'ja-JP', 'ko-KR']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_locales()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Python', 'Java', 'Golang', 'JavaScript', 'Swift']" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_programming_languages()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_numbers()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 2, 3, 4]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_numbers(n=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "import faker\n", + "from faker.providers import BaseProvider\n", + "\n", + "\n", + "class MyProvider(BaseProvider):\n", + " def custom_locales(self):\n", + " choices = [\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n", + " return self.random_element(choices)\n", + "\n", + " def custom_programming_languages(self):\n", + " langs = [\"Python\", \"Java\", \"Golang\", \"JavaScript\", \"Swift\"]\n", + " return self.random_element(langs)\n", + "\n", + " def custom_numbers(self, n: int = 10):\n", + " if n < 0:\n", + " raise ValueError(\"n must be greater than or equal to 0.\")\n", + "\n", + " return self.random_element(list(range(n)))\n", + "\n", + "fake = faker.Faker()\n", + "fake.add_provider(MyProvider)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'ko-KR'" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_locales()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Swift'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_programming_languages()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fake.custom_numbers(n=10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.0 ('pandas-startup')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/code/newsletter/N13/tests/conftest.py b/code/newsletter/N13/tests/conftest.py new file mode 100644 index 0000000..3d8f448 --- /dev/null +++ b/code/newsletter/N13/tests/conftest.py @@ -0,0 +1,42 @@ +import pytest +from faker import Faker +from faker.providers import BaseProvider + + +@pytest.fixture(scope="session", autouse=True) +def faker_session_locale(): + return "zh-CN" + + +class MyProvider(BaseProvider): + def custom_locales(self): + choices = ["zh-CN", "en-US", "ja-JP", "ko-KR"] + return self.random_element(choices) + + def custom_programming_languages(self): + langs = ["Python", "Java", "Golang", "JavaScript", "Swift"] + return self.random_element(langs) + + def custom_numbers(self, n: int = 10): + if n < 0: + raise ValueError("n must be greater than or equal to 0.") + + return self.random_element(list(range(n))) + + +@pytest.fixture() +def faker(request): + if "faker_locale" in request.fixturenames: + locale = request.getfixturevalue("faker_locale") + fake = Faker(locale=locale) + else: + fake = request.getfixturevalue("_session_faker") + + seed = 0 + if "faker_seed" in request.fixturenames: + seed = request.getfixturevalue("faker_seed") + fake.seed_instance(seed=seed) + fake.unique.clear() + + fake.add_provider(MyProvider) + return fake diff --git a/code/newsletter/N13/tests/test_faker.py b/code/newsletter/N13/tests/test_faker.py new file mode 100644 index 0000000..6d7b3a2 --- /dev/null +++ b/code/newsletter/N13/tests/test_faker.py @@ -0,0 +1,29 @@ +import pytest + + +def test_faker_fixture(faker): + assert faker.unique.boolean() + + +def test_faker_chinese_name(faker): + import re + + name = faker.name() + print(f"name is: {name}") + assert re.match(r"[\u4e00-\u9eff]", name) + + +def test_faker_custom_locale(faker): + el = faker.custom_locales() + assert el in ["zh-CN", "en-US", "ja-JP", "ko-KR"] + + +def test_faker_custom_programming_language(faker): + el = faker.custom_programming_languages() + assert el in ["Python", "Java", "Golang", "JavaScript", "Swift"] + + +@pytest.mark.parametrize("n", argvalues=[1, 10, 100]) +def test_faker_custom_numbers(faker, n): + el = faker.custom_numbers(n=n) + assert el in list(range(n))