{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 快速生成假数据" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import faker\n", "\n", "fake = faker.Faker()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Paul Jones'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.name()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Hong Kong dollar'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.currency_name() " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Mosley, Stevens and Cuevas'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.company()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.providers" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Craig Daniels'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.providers[5].name()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 生成本土化数据" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import faker\n", "fake = faker.Faker(locale=\"zh-CN\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'黄丽娟'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.name()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'13918155548'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.phone_number()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'佳禾网络有限公司'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.company()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'tao68@example.org'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.email()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 创建多个本土化版本" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "fake = faker.Faker(locale=[\"zh-CN\", \"ja-JP\", \"ko-KR\"])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'程畅'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake[\"zh-CN\"].name()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'近藤 稔'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake[\"ja-JP\"].name()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'구현우'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake[\"ko-KR\"].name()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['zh_CN', 'ja_JP', 'ko_KR']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.locales" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[,\n", " ,\n", " ]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.factories" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 使用社区生态中补充的 Provider" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# uncomment below code to install third-party provider\n", "# !pip install faker-biology" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "from faker import Faker\n", "from faker_biology.bioseq import Bioseq\n", "\n", "fake = Faker()\n", "fake.add_provider(Bioseq)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'TCGACCATCG'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.dna(10)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'UAAGGUCGGC'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.rna(10)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'MVGFAGGAENV'" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.protein(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 如何实现自己的 Provider" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import faker\n", "\n", "from faker.providers import DynamicProvider\n", "\n", "fake = faker.Faker()\n", "\n", "locales = DynamicProvider(\n", " provider_name=\"custom_locales\",\n", " elements=[\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n", ")\n", "fake.add_provider(locales)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'ko-KR'" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_locales()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "import faker\n", "from faker.providers import BaseProvider\n", "\n", "\n", "class MyProvider(BaseProvider):\n", " def custom_locales(self):\n", " choices = [\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n", " return choices\n", "\n", " def custom_programming_languages(self):\n", " langs = [\"Python\", \"Java\", \"Golang\", \"JavaScript\", \"Swift\"]\n", " return langs\n", "\n", " def custom_numbers(self, n: int = 10):\n", " if n < 0:\n", " raise ValueError(\"n must be greater than or equal to 0.\")\n", "\n", " return list(range(n))\n", "\n", "\n", "fake = faker.Faker()\n", "fake.add_provider(MyProvider)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['zh-CN', 'en-US', 'ja-JP', 'ko-KR']" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_locales()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Python', 'Java', 'Golang', 'JavaScript', 'Swift']" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_programming_languages()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_numbers()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3, 4]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_numbers(n=5)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "import faker\n", "from faker.providers import BaseProvider\n", "\n", "\n", "class MyProvider(BaseProvider):\n", " def custom_locales(self):\n", " choices = [\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n", " return self.random_element(choices)\n", "\n", " def custom_programming_languages(self):\n", " langs = [\"Python\", \"Java\", \"Golang\", \"JavaScript\", \"Swift\"]\n", " return self.random_element(langs)\n", "\n", " def custom_numbers(self, n: int = 10):\n", " if n < 0:\n", " raise ValueError(\"n must be greater than or equal to 0.\")\n", "\n", " return self.random_element(list(range(n)))\n", "\n", "fake = faker.Faker()\n", "fake.add_provider(MyProvider)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'ko-KR'" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_locales()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Swift'" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_programming_languages()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fake.custom_numbers(n=10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.0 ('pandas-startup')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f" } } }, "nbformat": 4, "nbformat_minor": 2 }