Files

1080 lines
28 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# I/OCSV"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"Example"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>name</th>\n",
" <th>company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>246000b6-0256-476b-b125-0f0b95719a1d</td>\n",
" <td>Steve Jobs</td>\n",
" <td>Apple</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2155499d-3e0b-4e8a-965f-acd3f31ac2fc</td>\n",
" <td>Bill Gates</td>\n",
" <td>Microsoft</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5f51767f-f743-43d1-aeca-5eaaad5e47ab</td>\n",
" <td>Mark Zuckerberg</td>\n",
" <td>Facebook</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>e86c9b7e-53ba-41bf-98e2-6979f1d462cf</td>\n",
" <td>Larry Page</td>\n",
" <td>Google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5aa2d881-8721-4eba-a73d-e5f0a19a5ede</td>\n",
" <td>Sergey Brin</td>\n",
" <td>Yandex</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid name company\n",
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
"2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
"3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
"4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pathlib\n",
"import uuid\n",
"\n",
"import pandas as pd\n",
"\n",
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n",
"txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n",
"data = dict(\n",
" uid=[uuid.uuid4() for _ in range(5)],\n",
" name=[\"Steve Jobs\", \"Bill Gates\", \"Mark Zuckerberg\", \"Larry Page\", \"Sergey Brin\"],\n",
" company=[\"Apple\", \"Microsoft\", \"Facebook\", \"Google\", \"Yandex\"],\n",
")\n",
"df = pd.DataFrame(data)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"df.to_csv(csvfile, index=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0muid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n"
]
}
],
"source": [
"%pycat ~/Desktop/pandas.csv"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"df.to_csv(txtfile, index=False, sep=\"\\t\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[0muid\u001b[0m \u001b[0mname\u001b[0m \u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m \u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m \u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m \u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m \u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m \u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m \u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m \u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m \u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m \u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m \u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n"
]
}
],
"source": [
"%pycat ~/Desktop/pandas.txt"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## read"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### sep & delimeter"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" uid name company\n",
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
"2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
"3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
"4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex\n"
]
}
],
"source": [
"import pathlib\n",
"\n",
"txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n",
"\n",
"df = pd.read_csv(txtfile, sep=\"\\t\")\n",
"print(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### usecols"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" name company\n",
"0 Steve Jobs Apple\n",
"1 Bill Gates Microsoft\n",
"2 Mark Zuckerberg Facebook\n",
"3 Larry Page Google\n",
"4 Sergey Brin Yandex\n"
]
}
],
"source": [
"import pathlib\n",
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n",
"\n",
"df = pd.read_csv(csvfile, usecols=[\"name\", \"company\"])\n",
"print(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### skiprows"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import pathlib\n",
"\n",
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>name</th>\n",
" <th>company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2155499d-3e0b-4e8a-965f-acd3f31ac2fc</td>\n",
" <td>Bill Gates</td>\n",
" <td>Microsoft</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>e86c9b7e-53ba-41bf-98e2-6979f1d462cf</td>\n",
" <td>Larry Page</td>\n",
" <td>Google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5aa2d881-8721-4eba-a73d-e5f0a19a5ede</td>\n",
" <td>Sergey Brin</td>\n",
" <td>Yandex</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid name company\n",
"0 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
"1 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
"2 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(csvfile, skiprows=[1, 3])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>246000b6-0256-476b-b125-0f0b95719a1d</th>\n",
" <th>Steve Jobs</th>\n",
" <th>Apple</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5f51767f-f743-43d1-aeca-5eaaad5e47ab</td>\n",
" <td>Mark Zuckerberg</td>\n",
" <td>Facebook</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5aa2d881-8721-4eba-a73d-e5f0a19a5ede</td>\n",
" <td>Sergey Brin</td>\n",
" <td>Yandex</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"0 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
"1 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(csvfile, skiprows=lambda x: x % 2 == 0)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uid</th>\n",
" <th>name</th>\n",
" <th>company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>246000b6-0256-476b-b125-0f0b95719a1d</td>\n",
" <td>Steve Jobs</td>\n",
" <td>Apple</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2155499d-3e0b-4e8a-965f-acd3f31ac2fc</td>\n",
" <td>Bill Gates</td>\n",
" <td>Microsoft</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" uid name company\n",
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(csvfile, nrows=2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## write"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import csv\n",
"from io import StringIO\n",
"\n",
"import pandas as pd\n",
"\n",
"data = StringIO(\n",
" \"\"\"a,b,c\\n\n",
" 1,\"Hello, world\",3\\n\n",
" \"\"\"\n",
")\n",
"\n",
"df = pd.read_csv(data)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"a\",\"b\",\"c\"\n",
"\"1\",\"Hello, world\",\"3\"\n",
"\n"
]
}
],
"source": [
"print(df.to_csv(index=False, quoting=csv.QUOTE_ALL))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"a\",\"b\",\"c\"\n",
"1,\"Hello, world\",3\n",
"\n"
]
}
],
"source": [
"print(df.to_csv(index=False, quoting=csv.QUOTE_NONNUMERIC))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\"a\",\"b\",\"c\"\n",
"\"1\",\"Hello, world\",\"3\"\n",
"\n"
]
}
],
"source": [
"csv_string = df.to_csv(index=False, quoting=csv.QUOTE_ALL)\n",
"print(csv_string)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"type: <class 'str'>\n"
]
}
],
"source": [
"print(f\"type: {type(csv_string)}\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# I/OExcel"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"如果未安装 `openpyxl`,则可以通过取消如下命令的注释后执行安装:"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# !pip install openpyxl"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## read"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id name category\n",
"0 0 Apple Tech\n",
"1 1 Google Tech\n",
"2 2 Microsoft Tech\n",
"3 3 Facebook Social\n",
"4 4 Amazon E-Commerce\n"
]
}
],
"source": [
"import pathlib\n",
"\n",
"import pandas as pd\n",
"\n",
"excel_file = pathlib.Path(\"../../data\") / \"excel_io_example.xlsx\"\n",
"\n",
"df = pd.read_excel(excel_file, engine=\"openpyxl\")\n",
"print(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"## write"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" NAME CATEGORY\n",
"0 Apple Tech\n",
"1 Google Tech\n",
"2 Microsoft Tech\n",
"3 Facebook Social\n",
"4 Amazon E-Commerce\n"
]
}
],
"source": [
"excel_output = (\n",
" pathlib.Path(\"~/Desktop\").expanduser() / \"excel_io_example_output.xlsx\"\n",
")\n",
"\n",
"df.columns = df.columns.str.upper()\n",
"df = df.drop(\"ID\", axis=1)\n",
"print(df)\n",
"df.to_excel(excel_output)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# I/ODatabase"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# generate fake data\n",
"\n",
"import pathlib\n",
"import sqlite3\n",
"\n",
"DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n",
"\n",
"db = sqlite3.connect(DB_PATH)\n",
"\n",
"\n",
"def generate_fake_data(conn: sqlite3.Connection):\n",
" conn.execute(\n",
" \"\"\"\n",
" CREATE TABLE IF NOT EXISTS items (\n",
" ID int not null, -- 自增ID\n",
" `NAME` VARCHAR(10), -- 商品名\n",
" PRICE float(2), -- 价格\n",
" `NUMBER` int(10) -- 数量\n",
" );\n",
" \"\"\"\n",
" )\n",
"\n",
" data = [\n",
" (0, \"Apple\", 6.99, 4),\n",
" (1, \"Orange\", 4.92, 10),\n",
" (2, \"Peach\", 11.00, 3),\n",
" (3, \"Banana\", 2.33, 7),\n",
" (4, \"Purple\", 7.7, 8),\n",
" ]\n",
" has_data = conn.execute(\"SELECT COUNT(0) FROM items\").fetchone()[0]\n",
" if not has_data:\n",
" conn.executemany(\n",
" \"INSERT INTO ITEMS(ID, NAME, PRICE, NUMBER) VALUES(?, ?, ?, ?)\", data\n",
" )\n",
" conn.commit()\n",
"\n",
"\n",
"generate_fake_data(conn=db)\n",
"db.close()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## read"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"SQL query example\n",
"```sql\n",
"SELECT s.name,\n",
" s.category\n",
"FROM (\n",
" SELECT `name`,\n",
" case\n",
" WHEN number % 2 == 0 THEN 0\n",
" WHEN number % 3 >= 1 THEN 1\n",
" WHEN number % 4 != 0 THEN 3\n",
" ELSE number\n",
" END as category\n",
" FROM items\n",
" ) s\n",
"WHERE s.category > 0;\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>category</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Peach</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Banana</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name category\n",
"2 Peach 3\n",
"3 Banana 1"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pathlib\n",
"import sqlite3\n",
"\n",
"import pandas as pd\n",
"\n",
"DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n",
"\n",
"db = sqlite3.connect(DB_PATH)\n",
"data = pd.read_sql_query(\"SELECT * FROM items\", db)\n",
"data.columns = data.columns.str.lower()\n",
"\n",
"\n",
"def case_when(number):\n",
" if number % 2 == 0:\n",
" return 0\n",
" elif number % 3 >= 1:\n",
" return 1\n",
" elif number % 4 != 0:\n",
" return 3\n",
" else:\n",
" return number\n",
"\n",
"\n",
"data[\"category\"] = data[\"number\"].map(case_when)\n",
"data.loc[data[\"category\"] > 0, [\"name\", \"category\"]]\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"## write"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id name price number\n",
"0 5 Cherry 15 20\n",
"1 6 BlackBerry 20 11\n",
"2 7 Corn 3 100\n"
]
}
],
"source": [
"items = [\n",
" dict(id=5, name=\"Cherry\", price=15, number=20),\n",
" dict(id=6, name=\"BlackBerry\", price=20, number=11),\n",
" dict(id=7, name=\"Corn\", price=3, number=100),\n",
"]\n",
"\n",
"df = pd.DataFrame(items)\n",
"df.to_sql(name=\"items\", con=db, if_exists=\"append\", index=False)\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(0, 'Apple', 6.99, 4), (1, 'Orange', 4.92, 10), (2, 'Peach', 11.0, 3), (3, 'Banana', 2.33, 7), (4, 'Purple', 7.7, 8), (5, 'Cherry', 15.0, 20), (6, 'BlackBerry', 20.0, 11), (7, 'Corn', 3.0, 100)]\n"
]
}
],
"source": [
"rows = db.execute(\"SELECT * FROM items\").fetchall()\n",
"print(rows)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f"
},
"kernelspec": {
"display_name": "Python 3.9.0 ('pandas-startup')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 1
}