{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# I/O:CSV"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"Example:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" name | \n",
" company | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 246000b6-0256-476b-b125-0f0b95719a1d | \n",
" Steve Jobs | \n",
" Apple | \n",
"
\n",
" \n",
" | 1 | \n",
" 2155499d-3e0b-4e8a-965f-acd3f31ac2fc | \n",
" Bill Gates | \n",
" Microsoft | \n",
"
\n",
" \n",
" | 2 | \n",
" 5f51767f-f743-43d1-aeca-5eaaad5e47ab | \n",
" Mark Zuckerberg | \n",
" Facebook | \n",
"
\n",
" \n",
" | 3 | \n",
" e86c9b7e-53ba-41bf-98e2-6979f1d462cf | \n",
" Larry Page | \n",
" Google | \n",
"
\n",
" \n",
" | 4 | \n",
" 5aa2d881-8721-4eba-a73d-e5f0a19a5ede | \n",
" Sergey Brin | \n",
" Yandex | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid name company\n",
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
"2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
"3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
"4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pathlib\n",
"import uuid\n",
"\n",
"import pandas as pd\n",
"\n",
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n",
"txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n",
"data = dict(\n",
" uid=[uuid.uuid4() for _ in range(5)],\n",
" name=[\"Steve Jobs\", \"Bill Gates\", \"Mark Zuckerberg\", \"Larry Page\", \"Sergey Brin\"],\n",
" company=[\"Apple\", \"Microsoft\", \"Facebook\", \"Google\", \"Yandex\"],\n",
")\n",
"df = pd.DataFrame(data)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"df.to_csv(csvfile, index=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0muid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n"
]
}
],
"source": [
"%pycat ~/Desktop/pandas.csv"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"df.to_csv(txtfile, index=False, sep=\"\\t\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0muid\u001b[0m \u001b[0mname\u001b[0m \u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m \u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m \u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m \u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m \u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m \u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m \u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m \u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m \u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m \u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m \u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n"
]
}
],
"source": [
"%pycat ~/Desktop/pandas.txt"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## read"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### sep & delimeter"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" uid name company\n",
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
"2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
"3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
"4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex\n"
]
}
],
"source": [
"import pathlib\n",
"\n",
"txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n",
"\n",
"df = pd.read_csv(txtfile, sep=\"\\t\")\n",
"print(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### usecols"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" name company\n",
"0 Steve Jobs Apple\n",
"1 Bill Gates Microsoft\n",
"2 Mark Zuckerberg Facebook\n",
"3 Larry Page Google\n",
"4 Sergey Brin Yandex\n"
]
}
],
"source": [
"import pathlib\n",
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n",
"\n",
"df = pd.read_csv(csvfile, usecols=[\"name\", \"company\"])\n",
"print(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### skiprows"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import pathlib\n",
"\n",
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" name | \n",
" company | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2155499d-3e0b-4e8a-965f-acd3f31ac2fc | \n",
" Bill Gates | \n",
" Microsoft | \n",
"
\n",
" \n",
" | 1 | \n",
" e86c9b7e-53ba-41bf-98e2-6979f1d462cf | \n",
" Larry Page | \n",
" Google | \n",
"
\n",
" \n",
" | 2 | \n",
" 5aa2d881-8721-4eba-a73d-e5f0a19a5ede | \n",
" Sergey Brin | \n",
" Yandex | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid name company\n",
"0 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
"1 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
"2 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(csvfile, skiprows=[1, 3])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 246000b6-0256-476b-b125-0f0b95719a1d | \n",
" Steve Jobs | \n",
" Apple | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 5f51767f-f743-43d1-aeca-5eaaad5e47ab | \n",
" Mark Zuckerberg | \n",
" Facebook | \n",
"
\n",
" \n",
" | 1 | \n",
" 5aa2d881-8721-4eba-a73d-e5f0a19a5ede | \n",
" Sergey Brin | \n",
" Yandex | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"0 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
"1 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(csvfile, skiprows=lambda x: x % 2 == 0)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" uid | \n",
" name | \n",
" company | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 246000b6-0256-476b-b125-0f0b95719a1d | \n",
" Steve Jobs | \n",
" Apple | \n",
"
\n",
" \n",
" | 1 | \n",
" 2155499d-3e0b-4e8a-965f-acd3f31ac2fc | \n",
" Bill Gates | \n",
" Microsoft | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" uid name company\n",
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(csvfile, nrows=2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## write"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import csv\n",
"from io import StringIO\n",
"\n",
"import pandas as pd\n",
"\n",
"data = StringIO(\n",
" \"\"\"a,b,c\\n\n",
" 1,\"Hello, world\",3\\n\n",
" \"\"\"\n",
")\n",
"\n",
"df = pd.read_csv(data)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"a\",\"b\",\"c\"\n",
"\"1\",\"Hello, world\",\"3\"\n",
"\n"
]
}
],
"source": [
"print(df.to_csv(index=False, quoting=csv.QUOTE_ALL))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"a\",\"b\",\"c\"\n",
"1,\"Hello, world\",3\n",
"\n"
]
}
],
"source": [
"print(df.to_csv(index=False, quoting=csv.QUOTE_NONNUMERIC))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"a\",\"b\",\"c\"\n",
"\"1\",\"Hello, world\",\"3\"\n",
"\n"
]
}
],
"source": [
"csv_string = df.to_csv(index=False, quoting=csv.QUOTE_ALL)\n",
"print(csv_string)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"type: \n"
]
}
],
"source": [
"print(f\"type: {type(csv_string)}\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# I/O:Excel"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"如果未安装 `openpyxl`,则可以通过取消如下命令的注释后执行安装:"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# !pip install openpyxl"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## read"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id name category\n",
"0 0 Apple Tech\n",
"1 1 Google Tech\n",
"2 2 Microsoft Tech\n",
"3 3 Facebook Social\n",
"4 4 Amazon E-Commerce\n"
]
}
],
"source": [
"import pathlib\n",
"\n",
"import pandas as pd\n",
"\n",
"excel_file = pathlib.Path(\"../../data\") / \"excel_io_example.xlsx\"\n",
"\n",
"df = pd.read_excel(excel_file, engine=\"openpyxl\")\n",
"print(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"## write"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NAME CATEGORY\n",
"0 Apple Tech\n",
"1 Google Tech\n",
"2 Microsoft Tech\n",
"3 Facebook Social\n",
"4 Amazon E-Commerce\n"
]
}
],
"source": [
"excel_output = (\n",
" pathlib.Path(\"~/Desktop\").expanduser() / \"excel_io_example_output.xlsx\"\n",
")\n",
"\n",
"df.columns = df.columns.str.upper()\n",
"df = df.drop(\"ID\", axis=1)\n",
"print(df)\n",
"df.to_excel(excel_output)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# I/O:Database"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# generate fake data\n",
"\n",
"import pathlib\n",
"import sqlite3\n",
"\n",
"DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n",
"\n",
"db = sqlite3.connect(DB_PATH)\n",
"\n",
"\n",
"def generate_fake_data(conn: sqlite3.Connection):\n",
" conn.execute(\n",
" \"\"\"\n",
" CREATE TABLE IF NOT EXISTS items (\n",
" ID int not null, -- 自增ID\n",
" `NAME` VARCHAR(10), -- 商品名\n",
" PRICE float(2), -- 价格\n",
" `NUMBER` int(10) -- 数量\n",
" );\n",
" \"\"\"\n",
" )\n",
"\n",
" data = [\n",
" (0, \"Apple\", 6.99, 4),\n",
" (1, \"Orange\", 4.92, 10),\n",
" (2, \"Peach\", 11.00, 3),\n",
" (3, \"Banana\", 2.33, 7),\n",
" (4, \"Purple\", 7.7, 8),\n",
" ]\n",
" has_data = conn.execute(\"SELECT COUNT(0) FROM items\").fetchone()[0]\n",
" if not has_data:\n",
" conn.executemany(\n",
" \"INSERT INTO ITEMS(ID, NAME, PRICE, NUMBER) VALUES(?, ?, ?, ?)\", data\n",
" )\n",
" conn.commit()\n",
"\n",
"\n",
"generate_fake_data(conn=db)\n",
"db.close()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## read"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"SQL query example:\n",
"```sql\n",
"SELECT s.name,\n",
" s.category\n",
"FROM (\n",
" SELECT `name`,\n",
" case\n",
" WHEN number % 2 == 0 THEN 0\n",
" WHEN number % 3 >= 1 THEN 1\n",
" WHEN number % 4 != 0 THEN 3\n",
" ELSE number\n",
" END as category\n",
" FROM items\n",
" ) s\n",
"WHERE s.category > 0;\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" category | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2 | \n",
" Peach | \n",
" 3 | \n",
"
\n",
" \n",
" | 3 | \n",
" Banana | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" name category\n",
"2 Peach 3\n",
"3 Banana 1"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pathlib\n",
"import sqlite3\n",
"\n",
"import pandas as pd\n",
"\n",
"DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n",
"\n",
"db = sqlite3.connect(DB_PATH)\n",
"data = pd.read_sql_query(\"SELECT * FROM items\", db)\n",
"data.columns = data.columns.str.lower()\n",
"\n",
"\n",
"def case_when(number):\n",
" if number % 2 == 0:\n",
" return 0\n",
" elif number % 3 >= 1:\n",
" return 1\n",
" elif number % 4 != 0:\n",
" return 3\n",
" else:\n",
" return number\n",
"\n",
"\n",
"data[\"category\"] = data[\"number\"].map(case_when)\n",
"data.loc[data[\"category\"] > 0, [\"name\", \"category\"]]\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"## write"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id name price number\n",
"0 5 Cherry 15 20\n",
"1 6 BlackBerry 20 11\n",
"2 7 Corn 3 100\n"
]
}
],
"source": [
"items = [\n",
" dict(id=5, name=\"Cherry\", price=15, number=20),\n",
" dict(id=6, name=\"BlackBerry\", price=20, number=11),\n",
" dict(id=7, name=\"Corn\", price=3, number=100),\n",
"]\n",
"\n",
"df = pd.DataFrame(items)\n",
"df.to_sql(name=\"items\", con=db, if_exists=\"append\", index=False)\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(0, 'Apple', 6.99, 4), (1, 'Orange', 4.92, 10), (2, 'Peach', 11.0, 3), (3, 'Banana', 2.33, 7), (4, 'Purple', 7.7, 8), (5, 'Cherry', 15.0, 20), (6, 'BlackBerry', 20.0, 11), (7, 'Corn', 3.0, 100)]\n"
]
}
],
"source": [
"rows = db.execute(\"SELECT * FROM items\").fetchall()\n",
"print(rows)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f"
},
"kernelspec": {
"display_name": "Python 3.9.0 ('pandas-startup')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 1
}