{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 创建一个 Dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100gle|male|author\n" ] } ], "source": [ "import tablib\n", "\n", "data = tablib.Dataset([\"100gle\", \"male\", \"author\"])\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 为 Dataset 设置字段名" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "name |gender|job \n", "------|------|------\n", "100gle|male |author\n" ] } ], "source": [ "data.headers = (\"name\", \"gender\", \"job\")\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a|b|c\n", "-|-|-\n", "1|2|3\n", "4|5|6\n" ] } ], "source": [ "data = tablib.Dataset(\n", " *[[1, 2, 3], [4, 5, 6]],\n", " headers=[\"a\", \"b\", \"c\"],\n", ")\n", "\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 将数据添加进 Dataset 中" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "name |gender|job \n", "------|------|----------\n", "100gle|male |author \n", "100gle|male |programmer\n" ] } ], "source": [ "data = tablib.Dataset()\n", "data.headers = (\"name\", \"gender\", \"job\")\n", "data.append([\"100gle\", \"male\", \"author\"])\n", "data.append([\"100gle\", \"male\", \"programmer\"])\n", "\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "name |gender|job \n", "------|------|----------\n", "Marry |female|artist \n", "100gle|male |author \n", "100gle|male |programmer\n" ] } ], "source": [ "data.lpush([\"Marry\", \"female\", \"artist\"])\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "name |gender|job |tag\n", "------|------|----------|---\n", "Marry |female|artist |1 \n", "100gle|male |author |2 \n", "100gle|male |programmer|3 \n" ] } ], "source": [ "data.append_col([1, 2, 3], header=\"tag\")\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id|name |gender|job |tag\n", "--|------|------|----------|---\n", "a |Marry |female|artist |1 \n", "b |100gle|male |author |2 \n", "c |100gle|male |programmer|3 \n" ] } ], "source": [ "data.lpush_col([\"a\", \"b\", \"c\"], header=\"id\")\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 索引与删除 Dataset 数据" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Marry', '100gle', '100gle']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"name\"]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['artist', 'author', 'programmer']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.get_col(3)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('a', 'Marry', 'female', 'artist', 1)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[0]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('b', '100gle', 'male', 'author', 2),\n", " ('c', '100gle', 'male', 'programmer', 3)]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[1:]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('c', '100gle', 'male', 'programmer', 3)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[-1]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id|name |gender|job |tag\n", "--|------|------|----------|---\n", "b |100gle|male |author |2 \n", "c |100gle|male |programmer|3 \n" ] } ], "source": [ "del data[0]\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id|name |job |tag\n", "--|------|----------|---\n", "b |100gle|author |2 \n", "c |100gle|programmer|3 \n" ] } ], "source": [ "del data[\"gender\"]\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "data.wipe()\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 通过标签筛选数据" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id |title |content |create_at \n", "------------------------------------|-----------------------|------------------|----------\n", "f2097b2c-6d62-469a-a7ee-d44e53fc5458|URL Schema 的 1000 种使用方式|如何使用 URL Schema...|2022-09-06\n", "2cb43639-a76f-4e9f-8e7f-d7c5809b5489|Python 基础快速入门 |快速入门 Python... |2022-09-06\n", "9eca4106-6520-4b82-93fd-a273eaf0cb6f|使用 Tablib 来完成数据表格操作 |有关 Tablib 的使用技巧...|2022-10-01\n", "cbcf0041-5198-47c7-84e5-e9a88bd09ecc|使用 Django 从零打造一个博客系统 |Django 基础... |2022-09-30\n" ] } ], "source": [ "from uuid import uuid4\n", "\n", "import tablib\n", "\n", "blogs = tablib.Dataset(headers=[\"id\", \"title\", \"content\", \"create_at\"])\n", "records = [\n", " [\n", " (uuid4(), \"URL Schema 的 1000 种使用方式\", \"如何使用 URL Schema...\", \"2022-09-06\"),\n", " [\"iOS\", \"Automatic\"],\n", " ],\n", " [(uuid4(), \"Python 基础快速入门\", \"快速入门 Python...\", \"2022-09-06\"), [\"Python\", \"Basic\"]],\n", " [\n", " (uuid4(), \"使用 Tablib 来完成数据表格操作\", \"有关 Tablib 的使用技巧...\", \"2022-10-01\"),\n", " [\"Python\", \"Python Library\"],\n", " ],\n", " [\n", " (uuid4(), \"使用 Django 从零打造一个博客系统\", \"Django 基础...\", \"2022-09-30\"),\n", " [\"Python\", \"Web\", \"Django\", \"Python Library\"],\n", " ],\n", "]\n", "\n", "for record, tags in records:\n", " blogs.append(record, tags=tags)\n", "\n", "print(blogs)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[OrderedDict([('id', UUID('f2097b2c-6d62-469a-a7ee-d44e53fc5458')),\n", " ('title', 'URL Schema 的 1000 种使用方式'),\n", " ('content', '如何使用 URL Schema...'),\n", " ('create_at', '2022-09-06')])]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blogs.filter([\"iOS\"]).dict" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[OrderedDict([('id', UUID('2cb43639-a76f-4e9f-8e7f-d7c5809b5489')),\n", " ('title', 'Python 基础快速入门'),\n", " ('content', '快速入门 Python...'),\n", " ('create_at', '2022-09-06')]),\n", " OrderedDict([('id', UUID('9eca4106-6520-4b82-93fd-a273eaf0cb6f')),\n", " ('title', '使用 Tablib 来完成数据表格操作'),\n", " ('content', '有关 Tablib 的使用技巧...'),\n", " ('create_at', '2022-10-01')]),\n", " OrderedDict([('id', UUID('cbcf0041-5198-47c7-84e5-e9a88bd09ecc')),\n", " ('title', '使用 Django 从零打造一个博客系统'),\n", " ('content', 'Django 基础...'),\n", " ('create_at', '2022-09-30')])]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blogs.filter([\"Python\"]).dict" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 动态列" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "title |content |create_at \n", "-----------------------|------------------|----------\n", "URL Schema 的 1000 种使用方式|如何使用 URL Schema...|2022-09-06\n", "Python 基础快速入门 |快速入门 Python... |2022-09-06\n", "使用 Tablib 来完成数据表格操作 |有关 Tablib 的使用技巧...|2022-10-01\n", "使用 Django 从零打造一个博客系统 |Django 基础... |2022-09-30\n" ] } ], "source": [ "del blogs[\"id\"]\n", "print(blogs)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "title |content |create_at |uid \n", "-----------------------|------------------|----------|------------------------------------\n", "URL Schema 的 1000 种使用方式|如何使用 URL Schema...|2022-09-06|140f7af0-63fc-43b9-847b-666c76075cfb\n", "Python 基础快速入门 |快速入门 Python... |2022-09-06|5ec020fd-6a10-4c4b-935c-19220f642b92\n", "使用 Tablib 来完成数据表格操作 |有关 Tablib 的使用技巧...|2022-10-01|6c3f75f6-a9f0-490e-9725-de7ec2712323\n", "使用 Django 从零打造一个博客系统 |Django 基础... |2022-09-30|d5e9c8ac-3d8e-4c98-be5f-d9ba4182cecf\n" ] } ], "source": [ "def uid(row):\n", " return uuid4()\n", "\n", "blogs.append_col(uid, header=\"uid\")\n", "print(blogs)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "title |content |create_at |uid |month\n", "-----------------------|------------------|----------|------------------------------------|-----\n", "URL Schema 的 1000 种使用方式|如何使用 URL Schema...|2022-09-06|140f7af0-63fc-43b9-847b-666c76075cfb|9 \n", "Python 基础快速入门 |快速入门 Python... |2022-09-06|5ec020fd-6a10-4c4b-935c-19220f642b92|9 \n", "使用 Tablib 来完成数据表格操作 |有关 Tablib 的使用技巧...|2022-10-01|6c3f75f6-a9f0-490e-9725-de7ec2712323|10 \n", "使用 Django 从零打造一个博客系统 |Django 基础... |2022-09-30|d5e9c8ac-3d8e-4c98-be5f-d9ba4182cecf|9 \n" ] } ], "source": [ "from datetime import datetime\n", "\n", "def month(row):\n", " date_str = row[2]\n", " date = datetime.strptime(date_str, \"%Y-%m-%d\")\n", " return date.month\n", "\n", "\n", "blogs.append_col(month, header=\"month\")\n", "print(blogs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 读取数据并转换成 Dataset" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id|name |email \n", "--|------|------------------\n", "1 |100gle|100gle@example.com\n", "2 |Steve |steve@example.com \n", "3 |Harry |harry@exmaple.com \n" ] } ], "source": [ "import tablib\n", "\n", "csv = \"\"\"\\\n", "id,name,email\n", "1,100gle,100gle@example.com\n", "2,Steve,steve@example.com\n", "3,Harry,harry@exmaple.com\n", "\"\"\"\n", "\n", "data = tablib.Dataset().load(csv)\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id|name |email \n", "--|------|------------------\n", "1 |100gle|100gle@example.com\n", "2 |Steve |steve@example.com \n", "3 |Harry |harry@example.com \n" ] } ], "source": [ "json = \"\"\"\\\n", "[\n", " {\n", " \"id\": 1,\n", " \"name\": \"100gle\",\n", " \"email\": \"100gle@example.com\"\n", " },\n", " {\n", " \"id\": 2,\n", " \"name\": \"Steve\",\n", " \"email\": \"steve@example.com\"\n", " },\n", " {\n", " \"id\": 3,\n", " \"name\": \"Harry\",\n", " \"email\": \"harry@example.com\"\n", " }\n", "]\n", "\"\"\"\n", "\n", "data = tablib.Dataset().load(json, format=\"json\")\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 将 Dataset 数据导出" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[OrderedDict([('id', 1), ('name', '100gle'), ('email', '100gle@example.com')]),\n", " OrderedDict([('id', 2), ('name', 'Steve'), ('email', 'steve@example.com')]),\n", " OrderedDict([('id', 3), ('name', 'Harry'), ('email', 'harry@example.com')])]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.dict" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'[{\"id\": 1, \"name\": \"100gle\", \"email\": \"100gle@example.com\"}, {\"id\": 2, \"name\": \"Steve\", \"email\": \"steve@example.com\"}, {\"id\": 3, \"name\": \"Harry\", \"email\": \"harry@example.com\"}]'" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.json" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'- {email: 100gle@example.com, id: 1, name: 100gle}\\n- {email: steve@example.com, id: 2, name: Steve}\\n- {email: harry@example.com, id: 3, name: Harry}\\n'" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.yaml" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "' id name email\\n 1 100gle 100gle@example.com\\n 2 Steve steve@example.com\\n 3 Harry harry@example.com'" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.cli" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+------+--------+--------------------+\n", "| id | name | email |\n", "+======+========+====================+\n", "| 1 | 100gle | 100gle@example.com |\n", "+------+--------+--------------------+\n", "| 2 | Steve | steve@example.com |\n", "+------+--------+--------------------+\n", "| 3 | Harry | harry@example.com |\n", "+------+--------+--------------------+\n" ] } ], "source": [ "print(data.export(\"cli\", tablefmt=\"grid\"))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "╒══════╤════════╤════════════════════╕\n", "│ id │ name │ email │\n", "╞══════╪════════╪════════════════════╡\n", "│ 1 │ 100gle │ 100gle@example.com │\n", "├──────┼────────┼────────────────────┤\n", "│ 2 │ Steve │ steve@example.com │\n", "├──────┼────────┼────────────────────┤\n", "│ 3 │ Harry │ harry@example.com │\n", "╘══════╧════════╧════════════════════╛\n" ] } ], "source": [ "print(data.export(\"cli\", tablefmt=\"fancy_grid\"))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "||id||name||email||\n", "|1|100gle|100gle@example.com|\n", "|2|Steve|steve@example.com|\n", "|3|Harry|harry@example.com|\n" ] } ], "source": [ "print(data.export(\"jira\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.0 ('pandas-startup')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f" } } }, "nbformat": 4, "nbformat_minor": 2 }