Files
2023-08-08 16:08:44 +08:00

708 lines
14 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 快速生成假数据"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import faker\n",
"\n",
"fake = faker.Faker()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Paul Jones'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.name()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Hong Kong dollar'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.currency_name() "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Mosley, Stevens and Cuevas'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.company()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<faker.providers.user_agent.Provider at 0x7f88d87d6130>,\n",
" <faker.providers.ssn.en_US.Provider at 0x7f88d87d6160>,\n",
" <faker.providers.python.Provider at 0x7f88d87bfe20>,\n",
" <faker.providers.profile.Provider at 0x7f88d87bffd0>,\n",
" <faker.providers.phone_number.en_US.Provider at 0x7f88d87bfee0>,\n",
" <faker.providers.person.en_US.Provider at 0x7f88d87bfbe0>,\n",
" <faker.providers.misc.en_US.Provider at 0x7f88d87bfca0>,\n",
" <faker.providers.lorem.en_US.Provider at 0x7f88d87bfaf0>,\n",
" <faker.providers.job.en_US.Provider at 0x7f88d87bf940>,\n",
" <faker.providers.isbn.Provider at 0x7f88d87bf640>,\n",
" <faker.providers.internet.en_US.Provider at 0x7f88d87bf850>,\n",
" <faker.providers.geo.en_US.Provider at 0x7f88d87bf580>,\n",
" <faker.providers.file.Provider at 0x7f88d87bf790>,\n",
" <faker.providers.date_time.en_US.Provider at 0x7f88d87bf4c0>,\n",
" <faker.providers.currency.en_US.Provider at 0x7f88d87bf5e0>,\n",
" <faker.providers.credit_card.en_US.Provider at 0x7f88d87bf400>,\n",
" <faker.providers.company.en_US.Provider at 0x7f88d879ce20>,\n",
" <faker.providers.color.en_US.Provider at 0x7f88d879cb20>,\n",
" <faker.providers.barcode.en_US.Provider at 0x7f88d879c550>,\n",
" <faker.providers.bank.en_GB.Provider at 0x7f89282f9b50>,\n",
" <faker.providers.automotive.en_US.Provider at 0x7f88c87f8ee0>,\n",
" <faker.providers.address.en_US.Provider at 0x7f88d8695b80>]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.providers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 生成本土化数据"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import faker\n",
"fake = faker.Faker(locale=\"zh-CN\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'黄丽娟'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.name()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'13918155548'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.phone_number()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'佳禾网络有限公司'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.company()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'tao68@example.org'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.email()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 创建多个本土化版本"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"fake = faker.Faker(locale=[\"zh-CN\", \"ja-JP\", \"ko-KR\"])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'程畅'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake[\"zh-CN\"].name()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'近藤 稔'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake[\"ja-JP\"].name()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'구현우'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake[\"ko-KR\"].name()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['zh_CN', 'ja_JP', 'ko_KR']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.locales"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<faker.generator.Generator at 0x7f88d88ff580>,\n",
" <faker.generator.Generator at 0x7f88d88ff550>,\n",
" <faker.generator.Generator at 0x7f88d88ffa90>]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.factories"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 使用社区生态中补充的 Provider"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# uncomment below code to install third-party provider\n",
"# !pip install faker-biology"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"from faker import Faker\n",
"from faker_biology.bioseq import Bioseq\n",
"\n",
"fake = Faker()\n",
"fake.add_provider(Bioseq)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'TCGACCATCG'"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.dna(10)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'UAAGGUCGGC'"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.rna(10)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'MVGFAGGAENV'"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.protein(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 如何实现自己的 Provider"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"import faker\n",
"\n",
"from faker.providers import DynamicProvider\n",
"\n",
"fake = faker.Faker()\n",
"\n",
"locales = DynamicProvider(\n",
" provider_name=\"custom_locales\",\n",
" elements=[\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n",
")\n",
"fake.add_provider(locales)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ko-KR'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_locales()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"import faker\n",
"from faker.providers import BaseProvider\n",
"\n",
"\n",
"class MyProvider(BaseProvider):\n",
" def custom_locales(self):\n",
" choices = [\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n",
" return choices\n",
"\n",
" def custom_programming_languages(self):\n",
" langs = [\"Python\", \"Java\", \"Golang\", \"JavaScript\", \"Swift\"]\n",
" return langs\n",
"\n",
" def custom_numbers(self, n: int = 10):\n",
" if n < 0:\n",
" raise ValueError(\"n must be greater than or equal to 0.\")\n",
"\n",
" return list(range(n))\n",
"\n",
"\n",
"fake = faker.Faker()\n",
"fake.add_provider(MyProvider)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['zh-CN', 'en-US', 'ja-JP', 'ko-KR']"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_locales()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Python', 'Java', 'Golang', 'JavaScript', 'Swift']"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_programming_languages()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_numbers()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1, 2, 3, 4]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_numbers(n=5)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"import faker\n",
"from faker.providers import BaseProvider\n",
"\n",
"\n",
"class MyProvider(BaseProvider):\n",
" def custom_locales(self):\n",
" choices = [\"zh-CN\", \"en-US\", \"ja-JP\", \"ko-KR\"]\n",
" return self.random_element(choices)\n",
"\n",
" def custom_programming_languages(self):\n",
" langs = [\"Python\", \"Java\", \"Golang\", \"JavaScript\", \"Swift\"]\n",
" return self.random_element(langs)\n",
"\n",
" def custom_numbers(self, n: int = 10):\n",
" if n < 0:\n",
" raise ValueError(\"n must be greater than or equal to 0.\")\n",
"\n",
" return self.random_element(list(range(n)))\n",
"\n",
"fake = faker.Faker()\n",
"fake.add_provider(MyProvider)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ko-KR'"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_locales()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Swift'"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_programming_languages()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fake.custom_numbers(n=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.0 ('pandas-startup')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}