1080 lines
28 KiB
Plaintext
1080 lines
28 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"# I/O:CSV"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"Example:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>uid</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>company</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>246000b6-0256-476b-b125-0f0b95719a1d</td>\n",
|
||
" <td>Steve Jobs</td>\n",
|
||
" <td>Apple</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2155499d-3e0b-4e8a-965f-acd3f31ac2fc</td>\n",
|
||
" <td>Bill Gates</td>\n",
|
||
" <td>Microsoft</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>5f51767f-f743-43d1-aeca-5eaaad5e47ab</td>\n",
|
||
" <td>Mark Zuckerberg</td>\n",
|
||
" <td>Facebook</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>e86c9b7e-53ba-41bf-98e2-6979f1d462cf</td>\n",
|
||
" <td>Larry Page</td>\n",
|
||
" <td>Google</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5aa2d881-8721-4eba-a73d-e5f0a19a5ede</td>\n",
|
||
" <td>Sergey Brin</td>\n",
|
||
" <td>Yandex</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" uid name company\n",
|
||
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
|
||
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
|
||
"2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
|
||
"3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
|
||
"4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pathlib\n",
|
||
"import uuid\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n",
|
||
"txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n",
|
||
"data = dict(\n",
|
||
" uid=[uuid.uuid4() for _ in range(5)],\n",
|
||
" name=[\"Steve Jobs\", \"Bill Gates\", \"Mark Zuckerberg\", \"Larry Page\", \"Sergey Brin\"],\n",
|
||
" company=[\"Apple\", \"Microsoft\", \"Facebook\", \"Google\", \"Yandex\"],\n",
|
||
")\n",
|
||
"df = pd.DataFrame(data)\n",
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df.to_csv(csvfile, index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\u001b[0muid\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%pycat ~/Desktop/pandas.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"df.to_csv(txtfile, index=False, sep=\"\\t\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\u001b[0muid\u001b[0m \u001b[0mname\u001b[0m \u001b[0mcompany\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m246000\u001b[0m\u001b[0mb6\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;36m256\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m476\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mb125\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0mf0b95719a1d\u001b[0m \u001b[0mSteve\u001b[0m \u001b[0mJobs\u001b[0m \u001b[0mApple\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m2155499\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3e0\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4e8\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m965\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0macd3f31ac2fc\u001b[0m \u001b[0mBill\u001b[0m \u001b[0mGates\u001b[0m \u001b[0mMicrosoft\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0mf51767f\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mf743\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m43\u001b[0m\u001b[0md1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0maeca\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0meaaad5e47ab\u001b[0m \u001b[0mMark\u001b[0m \u001b[0mZuckerberg\u001b[0m \u001b[0mFacebook\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0me86c9b7e\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m53\u001b[0m\u001b[0mba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m41\u001b[0m\u001b[0mbf\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m98e2\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m6979\u001b[0m\u001b[0mf1d462cf\u001b[0m \u001b[0mLarry\u001b[0m \u001b[0mPage\u001b[0m \u001b[0mGoogle\u001b[0m\u001b[0;34m\u001b[0m\n",
|
||
"\u001b[0;34m\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0maa2d881\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m8721\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0meba\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0ma73d\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0me5f0a19a5ede\u001b[0m \u001b[0mSergey\u001b[0m \u001b[0mBrin\u001b[0m \u001b[0mYandex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%pycat ~/Desktop/pandas.txt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"## read"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### sep & delimeter"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" uid name company\n",
|
||
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
|
||
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
|
||
"2 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
|
||
"3 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
|
||
"4 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pathlib\n",
|
||
"\n",
|
||
"txtfile = pathlib.Path(r\"~/Desktop/pandas.txt\").expanduser()\n",
|
||
"\n",
|
||
"df = pd.read_csv(txtfile, sep=\"\\t\")\n",
|
||
"print(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### usecols"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" name company\n",
|
||
"0 Steve Jobs Apple\n",
|
||
"1 Bill Gates Microsoft\n",
|
||
"2 Mark Zuckerberg Facebook\n",
|
||
"3 Larry Page Google\n",
|
||
"4 Sergey Brin Yandex\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pathlib\n",
|
||
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()\n",
|
||
"\n",
|
||
"df = pd.read_csv(csvfile, usecols=[\"name\", \"company\"])\n",
|
||
"print(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### skiprows"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pathlib\n",
|
||
"\n",
|
||
"csvfile = pathlib.Path(r\"~/Desktop/pandas.csv\").expanduser()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>uid</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>company</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2155499d-3e0b-4e8a-965f-acd3f31ac2fc</td>\n",
|
||
" <td>Bill Gates</td>\n",
|
||
" <td>Microsoft</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>e86c9b7e-53ba-41bf-98e2-6979f1d462cf</td>\n",
|
||
" <td>Larry Page</td>\n",
|
||
" <td>Google</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>5aa2d881-8721-4eba-a73d-e5f0a19a5ede</td>\n",
|
||
" <td>Sergey Brin</td>\n",
|
||
" <td>Yandex</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" uid name company\n",
|
||
"0 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft\n",
|
||
"1 e86c9b7e-53ba-41bf-98e2-6979f1d462cf Larry Page Google\n",
|
||
"2 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.read_csv(csvfile, skiprows=[1, 3])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>246000b6-0256-476b-b125-0f0b95719a1d</th>\n",
|
||
" <th>Steve Jobs</th>\n",
|
||
" <th>Apple</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>5f51767f-f743-43d1-aeca-5eaaad5e47ab</td>\n",
|
||
" <td>Mark Zuckerberg</td>\n",
|
||
" <td>Facebook</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>5aa2d881-8721-4eba-a73d-e5f0a19a5ede</td>\n",
|
||
" <td>Sergey Brin</td>\n",
|
||
" <td>Yandex</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
|
||
"0 5f51767f-f743-43d1-aeca-5eaaad5e47ab Mark Zuckerberg Facebook\n",
|
||
"1 5aa2d881-8721-4eba-a73d-e5f0a19a5ede Sergey Brin Yandex"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.read_csv(csvfile, skiprows=lambda x: x % 2 == 0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>uid</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>company</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>246000b6-0256-476b-b125-0f0b95719a1d</td>\n",
|
||
" <td>Steve Jobs</td>\n",
|
||
" <td>Apple</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2155499d-3e0b-4e8a-965f-acd3f31ac2fc</td>\n",
|
||
" <td>Bill Gates</td>\n",
|
||
" <td>Microsoft</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" uid name company\n",
|
||
"0 246000b6-0256-476b-b125-0f0b95719a1d Steve Jobs Apple\n",
|
||
"1 2155499d-3e0b-4e8a-965f-acd3f31ac2fc Bill Gates Microsoft"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.read_csv(csvfile, nrows=2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"## write"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import csv\n",
|
||
"from io import StringIO\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"data = StringIO(\n",
|
||
" \"\"\"a,b,c\\n\n",
|
||
" 1,\"Hello, world\",3\\n\n",
|
||
" \"\"\"\n",
|
||
")\n",
|
||
"\n",
|
||
"df = pd.read_csv(data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\"a\",\"b\",\"c\"\n",
|
||
"\"1\",\"Hello, world\",\"3\"\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(df.to_csv(index=False, quoting=csv.QUOTE_ALL))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\"a\",\"b\",\"c\"\n",
|
||
"1,\"Hello, world\",3\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(df.to_csv(index=False, quoting=csv.QUOTE_NONNUMERIC))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\"a\",\"b\",\"c\"\n",
|
||
"\"1\",\"Hello, world\",\"3\"\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"csv_string = df.to_csv(index=False, quoting=csv.QUOTE_ALL)\n",
|
||
"print(csv_string)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"type: <class 'str'>\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(f\"type: {type(csv_string)}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"# I/O:Excel"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"如果未安装 `openpyxl`,则可以通过取消如下命令的注释后执行安装:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# !pip install openpyxl"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"## read"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" id name category\n",
|
||
"0 0 Apple Tech\n",
|
||
"1 1 Google Tech\n",
|
||
"2 2 Microsoft Tech\n",
|
||
"3 3 Facebook Social\n",
|
||
"4 4 Amazon E-Commerce\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pathlib\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"excel_file = pathlib.Path(\"../../data\") / \"excel_io_example.xlsx\"\n",
|
||
"\n",
|
||
"df = pd.read_excel(excel_file, engine=\"openpyxl\")\n",
|
||
"print(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"source": [
|
||
"## write"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" NAME CATEGORY\n",
|
||
"0 Apple Tech\n",
|
||
"1 Google Tech\n",
|
||
"2 Microsoft Tech\n",
|
||
"3 Facebook Social\n",
|
||
"4 Amazon E-Commerce\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"excel_output = (\n",
|
||
" pathlib.Path(\"~/Desktop\").expanduser() / \"excel_io_example_output.xlsx\"\n",
|
||
")\n",
|
||
"\n",
|
||
"df.columns = df.columns.str.upper()\n",
|
||
"df = df.drop(\"ID\", axis=1)\n",
|
||
"print(df)\n",
|
||
"df.to_excel(excel_output)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"# I/O:Database"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# generate fake data\n",
|
||
"\n",
|
||
"import pathlib\n",
|
||
"import sqlite3\n",
|
||
"\n",
|
||
"DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n",
|
||
"\n",
|
||
"db = sqlite3.connect(DB_PATH)\n",
|
||
"\n",
|
||
"\n",
|
||
"def generate_fake_data(conn: sqlite3.Connection):\n",
|
||
" conn.execute(\n",
|
||
" \"\"\"\n",
|
||
" CREATE TABLE IF NOT EXISTS items (\n",
|
||
" ID int not null, -- 自增ID\n",
|
||
" `NAME` VARCHAR(10), -- 商品名\n",
|
||
" PRICE float(2), -- 价格\n",
|
||
" `NUMBER` int(10) -- 数量\n",
|
||
" );\n",
|
||
" \"\"\"\n",
|
||
" )\n",
|
||
"\n",
|
||
" data = [\n",
|
||
" (0, \"Apple\", 6.99, 4),\n",
|
||
" (1, \"Orange\", 4.92, 10),\n",
|
||
" (2, \"Peach\", 11.00, 3),\n",
|
||
" (3, \"Banana\", 2.33, 7),\n",
|
||
" (4, \"Purple\", 7.7, 8),\n",
|
||
" ]\n",
|
||
" has_data = conn.execute(\"SELECT COUNT(0) FROM items\").fetchone()[0]\n",
|
||
" if not has_data:\n",
|
||
" conn.executemany(\n",
|
||
" \"INSERT INTO ITEMS(ID, NAME, PRICE, NUMBER) VALUES(?, ?, ?, ?)\", data\n",
|
||
" )\n",
|
||
" conn.commit()\n",
|
||
"\n",
|
||
"\n",
|
||
"generate_fake_data(conn=db)\n",
|
||
"db.close()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"## read"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"SQL query example:\n",
|
||
"```sql\n",
|
||
"SELECT s.name,\n",
|
||
" s.category\n",
|
||
"FROM (\n",
|
||
" SELECT `name`,\n",
|
||
" case\n",
|
||
" WHEN number % 2 == 0 THEN 0\n",
|
||
" WHEN number % 3 >= 1 THEN 1\n",
|
||
" WHEN number % 4 != 0 THEN 3\n",
|
||
" ELSE number\n",
|
||
" END as category\n",
|
||
" FROM items\n",
|
||
" ) s\n",
|
||
"WHERE s.category > 0;\n",
|
||
"```"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>category</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Peach</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Banana</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" name category\n",
|
||
"2 Peach 3\n",
|
||
"3 Banana 1"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pathlib\n",
|
||
"import sqlite3\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"DB_PATH = pathlib.Path(\"~/Desktop/pandas.db\").expanduser()\n",
|
||
"\n",
|
||
"db = sqlite3.connect(DB_PATH)\n",
|
||
"data = pd.read_sql_query(\"SELECT * FROM items\", db)\n",
|
||
"data.columns = data.columns.str.lower()\n",
|
||
"\n",
|
||
"\n",
|
||
"def case_when(number):\n",
|
||
" if number % 2 == 0:\n",
|
||
" return 0\n",
|
||
" elif number % 3 >= 1:\n",
|
||
" return 1\n",
|
||
" elif number % 4 != 0:\n",
|
||
" return 3\n",
|
||
" else:\n",
|
||
" return number\n",
|
||
"\n",
|
||
"\n",
|
||
"data[\"category\"] = data[\"number\"].map(case_when)\n",
|
||
"data.loc[data[\"category\"] > 0, [\"name\", \"category\"]]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"source": [
|
||
"## write"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" id name price number\n",
|
||
"0 5 Cherry 15 20\n",
|
||
"1 6 BlackBerry 20 11\n",
|
||
"2 7 Corn 3 100\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"items = [\n",
|
||
" dict(id=5, name=\"Cherry\", price=15, number=20),\n",
|
||
" dict(id=6, name=\"BlackBerry\", price=20, number=11),\n",
|
||
" dict(id=7, name=\"Corn\", price=3, number=100),\n",
|
||
"]\n",
|
||
"\n",
|
||
"df = pd.DataFrame(items)\n",
|
||
"df.to_sql(name=\"items\", con=db, if_exists=\"append\", index=False)\n",
|
||
"print(df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[(0, 'Apple', 6.99, 4), (1, 'Orange', 4.92, 10), (2, 'Peach', 11.0, 3), (3, 'Banana', 2.33, 7), (4, 'Purple', 7.7, 8), (5, 'Cherry', 15.0, 20), (6, 'BlackBerry', 20.0, 11), (7, 'Corn', 3.0, 100)]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"rows = db.execute(\"SELECT * FROM items\").fetchall()\n",
|
||
"print(rows)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"interpreter": {
|
||
"hash": "13977d4cc82dee5f9d9535ceb495bd0ab12a43c33c664e5f0d53c24cf634b67f"
|
||
},
|
||
"kernelspec": {
|
||
"display_name": "Python 3.9.0 ('pandas-startup')",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.9.0"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 1
|
||
}
|