{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true, "pycharm": { "name": "#%% md\n" } }, "source": [ "# I/O:CSV" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "Example:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidnamecompany
0246000b6-0256-476b-b125-0f0b95719a1dSteve JobsApple
12155499d-3e0b-4e8a-965f-acd3f31ac2fcBill GatesMicrosoft
25f51767f-f743-43d1-aeca-5eaaad5e47abMark ZuckerbergFacebook
3e86c9b7e-53ba-41bf-98e2-6979f1d462cfLarry PageGoogle
45aa2d881-8721-4eba-a73d-e5f0a19a5edeSergey BrinYandex
\n", "
" ], "text/plain": [ " uid name company\n", "0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", "1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n", "2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n", "3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n", "4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pathlib\n", "import uuid\n", "\n", "import pandas as pd\n", "\n", "csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n", "txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n", "data = dict(\n", " uid=[uuid.uuid4() for _ in range(5)],\n", " name=[\"Steve Jobs\", \"Bill Gates\", \"Mark Zuckerberg\", \"Larry Page\", \"Sergey Brin\"],\n", " company=[\"Apple\", \"Microsoft\", \"Facebook\", \"Google\", \"Yandex\"],\n", ")\n", "df = pd.DataFrame(data)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "df.to_csv(csvfile, index=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0muid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n" ] } ], "source": [ "%pycat ~/Desktop/pandas.csv" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "df.to_csv(txtfile, index=False, sep=\"\\t\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[0muid\u001b[0m \u001b[0mname\u001b[0m \u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m \u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m \u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m \u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m \u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m \u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m \u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m \u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m \u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n", "\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m \u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m \u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n" ] } ], "source": [ "%pycat ~/Desktop/pandas.txt" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## read" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "### sep & delimeter" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " uid name company\n", "0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", "1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n", "2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n", "3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n", "4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex\n" ] } ], "source": [ "import pathlib\n", "\n", "txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n", "\n", "df = pd.read_csv(txtfile, sep=\"\\t\")\n", "print(df)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "### usecols" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " name company\n", "0 Steve Jobs Apple\n", "1 Bill Gates Microsoft\n", "2 Mark Zuckerberg Facebook\n", "3 Larry Page Google\n", "4 Sergey Brin Yandex\n" ] } ], "source": [ "import pathlib\n", "csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n", "\n", "df = pd.read_csv(csvfile, usecols=[\"name\", \"company\"])\n", "print(df)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "### skiprows" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import pathlib\n", "\n", "csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidnamecompany
02155499d-3e0b-4e8a-965f-acd3f31ac2fcBill GatesMicrosoft
1e86c9b7e-53ba-41bf-98e2-6979f1d462cfLarry PageGoogle
25aa2d881-8721-4eba-a73d-e5f0a19a5edeSergey BrinYandex
\n", "
" ], "text/plain": [ " uid name company\n", "0 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n", "1 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n", "2 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.read_csv(csvfile, skiprows=[1, 3])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
246000b6-0256-476b-b125-0f0b95719a1dSteve JobsApple
05f51767f-f743-43d1-aeca-5eaaad5e47abMark ZuckerbergFacebook
15aa2d881-8721-4eba-a73d-e5f0a19a5edeSergey BrinYandex
\n", "
" ], "text/plain": [ " 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", "0 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n", "1 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.read_csv(csvfile, skiprows=lambda x: x % 2 == 0)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
uidnamecompany
0246000b6-0256-476b-b125-0f0b95719a1dSteve JobsApple
12155499d-3e0b-4e8a-965f-acd3f31ac2fcBill GatesMicrosoft
\n", "
" ], "text/plain": [ " uid name company\n", "0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n", "1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.read_csv(csvfile, nrows=2)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## write" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import csv\n", "from io import StringIO\n", "\n", "import pandas as pd\n", "\n", "data = StringIO(\n", " \"\"\"a,b,c\\n\n", " 1,\"Hello, world\",3\\n\n", " \"\"\"\n", ")\n", "\n", "df = pd.read_csv(data)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\"a\",\"b\",\"c\"\n", "\"1\",\"Hello, world\",\"3\"\n", "\n" ] } ], "source": [ "print(df.to_csv(index=False, quoting=csv.QUOTE_ALL))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\"a\",\"b\",\"c\"\n", "1,\"Hello, world\",3\n", "\n" ] } ], "source": [ "print(df.to_csv(index=False, quoting=csv.QUOTE_NONNUMERIC))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\"a\",\"b\",\"c\"\n", "\"1\",\"Hello, world\",\"3\"\n", "\n" ] } ], "source": [ "csv_string = df.to_csv(index=False, quoting=csv.QUOTE_ALL)\n", "print(csv_string)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type: \n" ] } ], "source": [ "print(f\"type: {type(csv_string)}\")" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "# I/O:Excel" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "如果未安装 `openpyxl`,则可以通过取消如下命令的注释后执行安装:" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# !pip install openpyxl" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "## read" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id name category\n", "0 0 Apple Tech\n", "1 1 Google Tech\n", "2 2 Microsoft Tech\n", "3 3 Facebook Social\n", "4 4 Amazon E-Commerce\n" ] } ], "source": [ "import pathlib\n", "\n", "import pandas as pd\n", "\n", "excel_file = pathlib.Path(\"../../data\") / \"excel_io_example.xlsx\"\n", "\n", "df = pd.read_excel(excel_file, engine=\"openpyxl\")\n", "print(df)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## write" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " NAME CATEGORY\n", "0 Apple Tech\n", "1 Google Tech\n", "2 Microsoft Tech\n", "3 Facebook Social\n", "4 Amazon E-Commerce\n" ] } ], "source": [ "excel_output = (\n", " pathlib.Path(\"~/Desktop\").expanduser() / \"excel_io_example_output.xlsx\"\n", ")\n", "\n", "df.columns = df.columns.str.upper()\n", "df = df.drop(\"ID\", axis=1)\n", "print(df)\n", "df.to_excel(excel_output)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "# I/O:Database" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# generate fake data\n", "\n", "import pathlib\n", "import sqlite3\n", "\n", "DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n", "\n", "db = sqlite3.connect(DB_PATH)\n", "\n", "\n", "def generate_fake_data(conn: sqlite3.Connection):\n", " conn.execute(\n", " \"\"\"\n", " CREATE TABLE IF NOT EXISTS items (\n", " ID int not null, -- 自增ID\n", " `NAME` VARCHAR(10), -- 商品名\n", " PRICE float(2), -- 价格\n", " `NUMBER` int(10) -- 数量\n", " );\n", " \"\"\"\n", " )\n", "\n", " data = [\n", " (0, \"Apple\", 6.99, 4),\n", " (1, \"Orange\", 4.92, 10),\n", " (2, \"Peach\", 11.00, 3),\n", " (3, \"Banana\", 2.33, 7),\n", " (4, \"Purple\", 7.7, 8),\n", " ]\n", " has_data = conn.execute(\"SELECT COUNT(0) FROM items\").fetchone()[0]\n", " if not has_data:\n", " conn.executemany(\n", " \"INSERT INTO ITEMS(ID, NAME, PRICE, NUMBER) VALUES(?, ?, ?, ?)\", data\n", " )\n", " conn.commit()\n", "\n", "\n", "generate_fake_data(conn=db)\n", "db.close()" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "## read" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } }, "source": [ "SQL query example:\n", "```sql\n", "SELECT s.name,\n", " s.category\n", "FROM (\n", " SELECT `name`,\n", " case\n", " WHEN number % 2 == 0 THEN 0\n", " WHEN number % 3 >= 1 THEN 1\n", " WHEN number % 4 != 0 THEN 3\n", " ELSE number\n", " END as category\n", " FROM items\n", " ) s\n", "WHERE s.category > 0;\n", "```" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namecategory
2Peach3
3Banana1
\n", "
" ], "text/plain": [ " name category\n", "2 Peach 3\n", "3 Banana 1" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pathlib\n", "import sqlite3\n", "\n", "import pandas as pd\n", "\n", "DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n", "\n", "db = sqlite3.connect(DB_PATH)\n", "data = pd.read_sql_query(\"SELECT * FROM items\", db)\n", "data.columns = data.columns.str.lower()\n", "\n", "\n", "def case_when(number):\n", " if number % 2 == 0:\n", " return 0\n", " elif number % 3 >= 1:\n", " return 1\n", " elif number % 4 != 0:\n", " return 3\n", " else:\n", " return number\n", "\n", "\n", "data[\"category\"] = data[\"number\"].map(case_when)\n", "data.loc[data[\"category\"] > 0, [\"name\", \"category\"]]\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## write" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id name price number\n", "0 5 Cherry 15 20\n", "1 6 BlackBerry 20 11\n", "2 7 Corn 3 100\n" ] } ], "source": [ "items = [\n", " dict(id=5, name=\"Cherry\", price=15, number=20),\n", " dict(id=6, name=\"BlackBerry\", price=20, number=11),\n", " dict(id=7, name=\"Corn\", price=3, number=100),\n", "]\n", "\n", "df = pd.DataFrame(items)\n", "df.to_sql(name=\"items\", con=db, if_exists=\"append\", index=False)\n", "print(df)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[(0, 'Apple', 6.99, 4), (1, 'Orange', 4.92, 10), (2, 'Peach', 11.0, 3), (3, 'Banana', 2.33, 7), (4, 'Purple', 7.7, 8), (5, 'Cherry', 15.0, 20), (6, 'BlackBerry', 20.0, 11), (7, 'Corn', 3.0, 100)]\n" ] } ], "source": [ "rows = db.execute(\"SELECT * FROM items\").fetchall()\n", "print(rows)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f" }, "kernelspec": { "display_name": "Python 3.9.0 ('pandas-startup')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 1 }