diff --git a/code/12/io.ipynb b/code/12/io.ipynb new file mode 100644 index 0000000..7c95e80 --- /dev/null +++ b/code/12/io.ipynb @@ -0,0 +1,1079 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# I/O:CSV" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Example:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidnamecompany
0246000b6-0256-476b-b125-0f0b95719a1dSteve JobsApple
12155499d-3e0b-4e8a-965f-acd3f31ac2fcBill GatesMicrosoft
25f51767f-f743-43d1-aeca-5eaaad5e47abMark ZuckerbergFacebook
3e86c9b7e-53ba-41bf-98e2-6979f1d462cfLarry PageGoogle
45aa2d881-8721-4eba-a73d-e5f0a19a5edeSergey BrinYandex
\n", + "
" + ], + "text/plain": [ + " uid name company\n", + "0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", + "1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n", + "2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n", + "3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n", + "4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pathlib\n", + "import uuid\n", + "\n", + "import pandas as pd\n", + "\n", + "csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n", + "txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n", + "data = dict(\n", + " uid=[uuid.uuid4() for _ in range(5)],\n", + " name=[\"Steve Jobs\", \"Bill Gates\", \"Mark Zuckerberg\", \"Larry Page\", \"Sergey Brin\"],\n", + " company=[\"Apple\", \"Microsoft\", \"Facebook\", \"Google\", \"Yandex\"],\n", + ")\n", + "df = pd.DataFrame(data)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "df.to_csv(csvfile, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0muid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n" + ] + } + ], + "source": [ + "%pycat ~/Desktop/pandas.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "df.to_csv(txtfile, index=False, sep=\"\\t\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0muid\u001b[0m \u001b[0mname\u001b[0m \u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m \u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m \u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m \u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m \u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m \u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m \u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m \u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m \u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n", + "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m \u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m \u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n" + ] + } + ], + "source": [ + "%pycat ~/Desktop/pandas.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## read" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### sep & delimeter" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " uid name company\n", + "0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", + "1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n", + "2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n", + "3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n", + "4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex\n" + ] + } + ], + "source": [ + "import pathlib\n", + "\n", + "txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n", + "\n", + "df = pd.read_csv(txtfile, sep=\"\\t\")\n", + "print(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### usecols" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " name company\n", + "0 Steve Jobs Apple\n", + "1 Bill Gates Microsoft\n", + "2 Mark Zuckerberg Facebook\n", + "3 Larry Page Google\n", + "4 Sergey Brin Yandex\n" + ] + } + ], + "source": [ + "import pathlib\n", + "csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n", + "\n", + "df = pd.read_csv(csvfile, usecols=[\"name\", \"company\"])\n", + "print(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### skiprows" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pathlib\n", + "\n", + "csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidnamecompany
02155499d-3e0b-4e8a-965f-acd3f31ac2fcBill GatesMicrosoft
1e86c9b7e-53ba-41bf-98e2-6979f1d462cfLarry PageGoogle
25aa2d881-8721-4eba-a73d-e5f0a19a5edeSergey BrinYandex
\n", + "
" + ], + "text/plain": [ + " uid name company\n", + "0 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n", + "1 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n", + "2 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv(csvfile, skiprows=[1, 3])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
246000b6-0256-476b-b125-0f0b95719a1dSteve JobsApple
05f51767f-f743-43d1-aeca-5eaaad5e47abMark ZuckerbergFacebook
15aa2d881-8721-4eba-a73d-e5f0a19a5edeSergey BrinYandex
\n", + "
" + ], + "text/plain": [ + " 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", + "0 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n", + "1 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv(csvfile, skiprows=lambda x: x % 2 == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidnamecompany
0246000b6-0256-476b-b125-0f0b95719a1dSteve JobsApple
12155499d-3e0b-4e8a-965f-acd3f31ac2fcBill GatesMicrosoft
\n", + "
" + ], + "text/plain": [ + " uid name company\n", + "0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", + "1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv(csvfile, nrows=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## write" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import csv\n", + "from io import StringIO\n", + "\n", + "import pandas as pd\n", + "\n", + "data = StringIO(\n", + " \"\"\"a,b,c\\n\n", + " 1,\"Hello, world\",3\\n\n", + " \"\"\"\n", + ")\n", + "\n", + "df = pd.read_csv(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"a\",\"b\",\"c\"\n", + "\"1\",\"Hello, world\",\"3\"\n", + "\n" + ] + } + ], + "source": [ + "print(df.to_csv(index=False, quoting=csv.QUOTE_ALL))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"a\",\"b\",\"c\"\n", + "1,\"Hello, world\",3\n", + "\n" + ] + } + ], + "source": [ + "print(df.to_csv(index=False, quoting=csv.QUOTE_NONNUMERIC))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"a\",\"b\",\"c\"\n", + "\"1\",\"Hello, world\",\"3\"\n", + "\n" + ] + } + ], + "source": [ + "csv_string = df.to_csv(index=False, quoting=csv.QUOTE_ALL)\n", + "print(csv_string)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "type: \n" + ] + } + ], + "source": [ + "print(f\"type: {type(csv_string)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# I/O:Excel" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "如果未安装 `openpyxl`,则可以通过取消如下命令的注释后执行安装:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# !pip install openpyxl" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## read" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id name category\n", + "0 0 Apple Tech\n", + "1 1 Google Tech\n", + "2 2 Microsoft Tech\n", + "3 3 Facebook Social\n", + "4 4 Amazon E-Commerce\n" + ] + } + ], + "source": [ + "import pathlib\n", + "\n", + "import pandas as pd\n", + "\n", + "excel_file = pathlib.Path(\"../../data\") / \"excel_io_example.xlsx\"\n", + "\n", + "df = pd.read_excel(excel_file, engine=\"openpyxl\")\n", + "print(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## write" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " NAME CATEGORY\n", + "0 Apple Tech\n", + "1 Google Tech\n", + "2 Microsoft Tech\n", + "3 Facebook Social\n", + "4 Amazon E-Commerce\n" + ] + } + ], + "source": [ + "excel_output = (\n", + " pathlib.Path(\"~/Desktop\").expanduser() / \"excel_io_example_output.xlsx\"\n", + ")\n", + "\n", + "df.columns = df.columns.str.upper()\n", + "df = df.drop(\"ID\", axis=1)\n", + "print(df)\n", + "df.to_excel(excel_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# I/O:Database" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# generate fake data\n", + "\n", + "import pathlib\n", + "import sqlite3\n", + "\n", + "DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n", + "\n", + "db = sqlite3.connect(DB_PATH)\n", + "\n", + "\n", + "def generate_fake_data(conn: sqlite3.Connection):\n", + " conn.execute(\n", + " \"\"\"\n", + " CREATE TABLE IF NOT EXISTS items (\n", + " ID int not null, -- 自增ID\n", + " `NAME` VARCHAR(10), -- 商品名\n", + " PRICE float(2), -- 价格\n", + " `NUMBER` int(10) -- 数量\n", + " );\n", + " \"\"\"\n", + " )\n", + "\n", + " data = [\n", + " (0, \"Apple\", 6.99, 4),\n", + " (1, \"Orange\", 4.92, 10),\n", + " (2, \"Peach\", 11.00, 3),\n", + " (3, \"Banana\", 2.33, 7),\n", + " (4, \"Purple\", 7.7, 8),\n", + " ]\n", + " has_data = conn.execute(\"SELECT COUNT(0) FROM items\").fetchone()[0]\n", + " if not has_data:\n", + " conn.executemany(\n", + " \"INSERT INTO ITEMS(ID, NAME, PRICE, NUMBER) VALUES(?, ?, ?, ?)\", data\n", + " )\n", + " conn.commit()\n", + "\n", + "\n", + "generate_fake_data(conn=db)\n", + "db.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## read" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "SQL query example:\n", + "```sql\n", + "SELECT s.name,\n", + " s.category\n", + "FROM (\n", + " SELECT `name`,\n", + " case\n", + " WHEN number % 2 == 0 THEN 0\n", + " WHEN number % 3 >= 1 THEN 1\n", + " WHEN number % 4 != 0 THEN 3\n", + " ELSE number\n", + " END as category\n", + " FROM items\n", + " ) s\n", + "WHERE s.category > 0;\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namecategory
2Peach3
3Banana1
\n", + "
" + ], + "text/plain": [ + " name category\n", + "2 Peach 3\n", + "3 Banana 1" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pathlib\n", + "import sqlite3\n", + "\n", + "import pandas as pd\n", + "\n", + "DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n", + "\n", + "db = sqlite3.connect(DB_PATH)\n", + "data = pd.read_sql_query(\"SELECT * FROM items\", db)\n", + "data.columns = data.columns.str.lower()\n", + "\n", + "\n", + "def case_when(number):\n", + " if number % 2 == 0:\n", + " return 0\n", + " elif number % 3 >= 1:\n", + " return 1\n", + " elif number % 4 != 0:\n", + " return 3\n", + " else:\n", + " return number\n", + "\n", + "\n", + "data[\"category\"] = data[\"number\"].map(case_when)\n", + "data.loc[data[\"category\"] > 0, [\"name\", \"category\"]]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "## write" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id name price number\n", + "0 5 Cherry 15 20\n", + "1 6 BlackBerry 20 11\n", + "2 7 Corn 3 100\n" + ] + } + ], + "source": [ + "items = [\n", + " dict(id=5, name=\"Cherry\", price=15, number=20),\n", + " dict(id=6, name=\"BlackBerry\", price=20, number=11),\n", + " dict(id=7, name=\"Corn\", price=3, number=100),\n", + "]\n", + "\n", + "df = pd.DataFrame(items)\n", + "df.to_sql(name=\"items\", con=db, if_exists=\"append\", index=False)\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(0, 'Apple', 6.99, 4), (1, 'Orange', 4.92, 10), (2, 'Peach', 11.0, 3), (3, 'Banana', 2.33, 7), (4, 'Purple', 7.7, 8), (5, 'Cherry', 15.0, 20), (6, 'BlackBerry', 20.0, 11), (7, 'Corn', 3.0, 100)]\n" + ] + } + ], + "source": [ + "rows = db.execute(\"SELECT * FROM items\").fetchall()\n", + "print(rows)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f" + }, + "kernelspec": { + "display_name": "Python 3.9.0 ('pandas-startup')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/data/excel_io_example.xlsx b/data/excel_io_example.xlsx new file mode 100644 index 0000000..acb832f Binary files /dev/null and b/data/excel_io_example.xlsx differ diff --git a/data/pandas.db b/data/pandas.db new file mode 100644 index 0000000..6f57f18 Binary files /dev/null and b/data/pandas.db differ