This commit is contained in:
lostecho
2023-10-21 20:51:16 +08:00
parent 95e7aaf099
commit 8b29af4c71
9 changed files with 1620 additions and 299 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 490 KiB

BIN
8093/8093.mp3 Normal file

Binary file not shown.

6
8093/8093note.txt Normal file
View File

@@ -0,0 +1,6 @@
“想尽一切办法集中注意力 不让自己被灾难(或任何坏事)改变”
* 思考都是物理器官分泌的化学物质的化学反应,有时非常不稳定;
* 晃晃脑袋真的管用
* 逼自己运动、睡觉,缓几天,想法会非常不一样。
* 专注于做原来一直做的事情 —— 比如,李笑来写书
* 拼命想,除了当下对自己冲击最大的事情之外,还有什么值得好好想想? “以冲击为起点不保证正确、全面、完整”……

1115
audio.json

File diff suppressed because it is too large Load Diff

399
feishu_downloader.ipynb Normal file
View File

@@ -0,0 +1,399 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "5ab50d0f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2023-10-21 13:58:26\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading Meetings: 50%|█████████████▌ | 1/2 [00:17<00:17, 17.92s/ meeting]Exception in thread Thread-38 (download_range):\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib/python3/dist-packages/urllib3/connectionpool.py\", line 467, in _make_request\n",
" six.raise_from(e, None)\n",
" File \"<string>\", line 3, in raise_from\n",
" File \"/usr/lib/python3/dist-packages/urllib3/connectionpool.py\", line 462, in _make_request\n",
" httplib_response = conn.getresponse()\n",
" ^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.11/http/client.py\", line 1378, in getresponse\n",
" response.begin()\n",
" File \"/usr/lib/python3.11/http/client.py\", line 318, in begin\n",
" version, status, reason = self._read_status()\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.11/http/client.py\", line 279, in _read_status\n",
" line = str(self.fp.readline(_MAXLINE + 1), \"iso-8859-1\")\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.11/socket.py\", line 706, in readinto\n",
" return self._sock.recv_into(b)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.11/ssl.py\", line 1311, in recv_into\n",
" return self.read(nbytes, buffer)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3.11/ssl.py\", line 1167, in read\n",
" return self._sslobj.read(len, buffer)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
"TimeoutError: The read operation timed out\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib/python3/dist-packages/requests/adapters.py\", line 486, in send\n",
" resp = conn.urlopen(\n",
" ^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/urllib3/connectionpool.py\", line 799, in urlopen\n",
" retries = retries.increment(\n",
" ^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/urllib3/util/retry.py\", line 550, in increment\n",
" raise six.reraise(type(error), error, _stacktrace)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/six.py\", line 719, in reraise\n",
" raise value\n",
" File \"/usr/lib/python3/dist-packages/urllib3/connectionpool.py\", line 715, in urlopen\n",
" httplib_response = self._make_request(\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/urllib3/connectionpool.py\", line 469, in _make_request\n",
" self._raise_timeout(err=e, url=url, timeout_value=read_timeout)\n",
" File \"/usr/lib/python3/dist-packages/urllib3/connectionpool.py\", line 358, in _raise_timeout\n",
" raise ReadTimeoutError(\n",
"urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='internal-api-drive-stream.feishu.cn', port=443): Read timed out. (read timeout=30)\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib/python3.11/threading.py\", line 1045, in _bootstrap_inner\n",
" self.run()\n",
" File \"/usr/lib/python3.11/threading.py\", line 982, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/tmp/ipykernel_25915/2918984942.py\", line 45, in download_range\n",
" File \"/usr/lib/python3/dist-packages/requests/api.py\", line 73, in get\n",
" return request(\"get\", url, params=params, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/requests/api.py\", line 59, in request\n",
" return session.request(method=method, url=url, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/requests/sessions.py\", line 589, in request\n",
" resp = self.send(prep, **send_kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/requests/sessions.py\", line 703, in send\n",
" r = adapter.send(request, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/usr/lib/python3/dist-packages/requests/adapters.py\", line 532, in send\n",
" raise ReadTimeout(e, request=request)\n",
"requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='internal-api-drive-stream.feishu.cn', port=443): Read timed out. (read timeout=30)\n",
"Downloading Meetings: 100%|███████████████████████████| 2/2 [00:50<00:00, 25.42s/ meeting]\n",
"Deleting Meetings: 100%|██████████████████████████████| 1/1 [00:00<00:00, 1.18 meeting/s]\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 227\u001b[0m\n\u001b[1;32m 225\u001b[0m downloader\u001b[38;5;241m.\u001b[39mcheck_meetings()\n\u001b[1;32m 226\u001b[0m downloader\u001b[38;5;241m.\u001b[39mdelete_minutes(\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 227\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3600\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;66;03m# 如果填写了管理参数,则定时查询妙记空间使用情况,超出指定额度则删除最早的指定数量的会议\u001b[39;00m\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m :\n\u001b[1;32m 231\u001b[0m \u001b[38;5;66;03m# 从manager_cookie中获取X-Csrf-Token\u001b[39;00m\n\u001b[1;32m 232\u001b[0m x_csrf_token \u001b[38;5;241m=\u001b[39m manager_cookie[manager_cookie\u001b[38;5;241m.\u001b[39mfind(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m csrf_token=\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m csrf_token=\u001b[39m\u001b[38;5;124m'\u001b[39m):manager_cookie\u001b[38;5;241m.\u001b[39mfind(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m;\u001b[39m\u001b[38;5;124m'\u001b[39m, manager_cookie\u001b[38;5;241m.\u001b[39mfind(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m csrf_token=\u001b[39m\u001b[38;5;124m'\u001b[39m))]\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"import os, re, shutil, time, threading\n",
"import requests\n",
"from tqdm import tqdm\n",
"\n",
"\n",
"# 不使用系统代理\n",
"proxies = {\"http\": None, \"https\": None}\n",
"\n",
"# 多线程下载器\n",
"class MultiDownloader:\n",
" def __init__(self, headers, url, file_name, thread_count=20):\n",
" self.headers = headers\n",
" self.url = url\n",
" self.file_name = file_name\n",
" self.thread_count = thread_count\n",
" self.chunk_size = 1024 * 1024\n",
" self.total_range = self.get_file_size()\n",
" self.file_lock = threading.Lock()\n",
"\n",
" def get_file_size(self):\n",
" res = requests.head(self.url, headers=self.headers, proxies=proxies)\n",
" if res.status_code == 200:\n",
" return int(res.headers.get('Content-Length'))\n",
" return None\n",
"\n",
" def page_dispatcher(self, content_size):\n",
" page_size = content_size // self.thread_count\n",
" start_pos = 0\n",
" while start_pos + page_size < content_size:\n",
" yield {\n",
" 'start_pos': start_pos,\n",
" 'end_pos': start_pos + page_size\n",
" }\n",
" start_pos += page_size + 1\n",
" yield {\n",
" 'start_pos': start_pos,\n",
" 'end_pos': content_size - 1\n",
" }\n",
"\n",
" def download_range(self, thread_name, page, file_handler):\n",
" range_headers = {\"Range\": f\"bytes={page['start_pos']}-{page['end_pos']}\"}\n",
" range_headers |= self.headers\n",
" try_times = 3\n",
" for _ in range(try_times):\n",
" with requests.get(url=self.url, headers=range_headers, stream=True, timeout=30, proxies=proxies) as res:\n",
" if res.status_code == 206:\n",
" for data in res.iter_content(chunk_size=self.chunk_size):\n",
" with self.file_lock:\n",
" file_handler.seek(page[\"start_pos\"])\n",
" file_handler.write(data)\n",
" page[\"start_pos\"] += len(data)\n",
" break\n",
"\n",
" def run(self):\n",
" if not self.total_range or self.total_range < 1024:\n",
" raise Exception(\"get file total size failed\")\n",
" if os.path.exists(self.file_name.split('/')[0]) and os.path.exists(self.file_name) and os.path.getsize(self.file_name) != 0:\n",
" while True:\n",
" choice = input(f\" {self.file_name.split('/')[0]}已存在,是否覆盖(Y/n)?\")\n",
" if choice == 'y' or choice == 'Y' or choice == '':\n",
" break\n",
" elif choice == 'n' or choice == 'N':\n",
" return\n",
" else:\n",
" os.mkdir(self.file_name.split('/')[0])\n",
" thread_list = []\n",
" with open(self.file_name, \"wb+\") as f:\n",
" for i, page in enumerate(self.page_dispatcher(self.total_range)):\n",
" thread_list.append(threading.Thread(target=self.download_range, args=(i, page, f)))\n",
" for thread in thread_list:\n",
" thread.start()\n",
" for thread in thread_list:\n",
" thread.join()\n",
"\n",
"\n",
"# 会议下载器\n",
"class MeetingDownloader:\n",
" def __init__(self, cookie):\n",
" self.headers = {\n",
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',\n",
" 'cookie': cookie,\n",
" 'bv-csrf-token': cookie[cookie.find('bv_csrf_token=') + len('bv_csrf_token='):cookie.find(';', cookie.find('bv_csrf_token='))],\n",
" 'referer': f'https://meetings.feishu.cn/minutes/me',\n",
" 'content-type': 'application/x-www-form-urlencoded'\n",
" }\n",
" if len(self.headers.get('bv-csrf-token')) != 36:\n",
" raise Exception(\"cookie中不包含bv_csrf_token请确保从请求`list?size=20&`中获取!\")\n",
" \n",
" def get_meeting_info(self):\n",
" \"\"\"\n",
" 批量获取妙记信息\n",
"\n",
" size的取值获取的妙记数量\n",
"\n",
" space_name的取值\n",
" 1主页包含企业内部妙记与外部妙记\n",
" 2我的内容只包含归属人为自己的妙记\n",
" \"\"\"\n",
" get_rec_url = f\"https://meetings.feishu.cn/minutes/api/space/list?&size=1000&space_name=2\"\n",
" resp = requests.get(url=get_rec_url, headers=self.headers, proxies=proxies)\n",
" return list(reversed(resp.json()['data']['list'])) # 返回按时间正序排列的妙记信息(从旧到新)\n",
"\n",
" def download_video(self, minutes_info):\n",
" \"\"\"\n",
" 下载单个妙记视频\n",
" \"\"\"\n",
" # 获取妙记视频的下载链接\n",
" video_url_url = f\"https://meetings.feishu.cn/minutes/api/status?object_token={minutes_info['object_token']}&language=zh_cn&_t={int(time.time() * 1000)}\"\n",
" resp = requests.get(url=video_url_url, headers=self.headers, proxies=proxies)\n",
" video_url = resp.json()['data']['video_info']['video_download_url']\n",
"\n",
" # 根据会议的起止时间和会议标题来设置文件名\n",
" start_time = time.strftime(\"%Y年%m月%d日%H时%M分\", time.localtime(minutes_info['start_time'] / 1000))\n",
" stop_time = time.strftime(\"%Y年%m月%d日%H时%M分\", time.localtime(minutes_info['stop_time'] / 1000))\n",
" file_name = start_time+\"至\"+stop_time+minutes_info['topic']\n",
"\n",
" # 将文件名中的特殊字符替换为下划线\n",
" rstr = r\"[\\/\\\\\\:\\*\\?\\\"\\<\\>\\|]\" # '/ \\ : * ? \" < > |'\n",
" file_name = re.sub(rstr, \"_\", file_name)\n",
"\n",
" # 多线程下载\n",
" run_params = {'headers': self.headers,\n",
" 'url': video_url,\n",
" 'file_name': f'{file_name}/{file_name}.mp4',\n",
" 'thread_count': 20\n",
" }\n",
" downloader = MultiDownloader(**run_params)\n",
" downloader.run()\n",
"\n",
" return file_name\n",
"\n",
" def download_subtitle(self, object_token, file_name, file_mtime):\n",
" \"\"\"\n",
" 下载单个妙记字幕\n",
" \"\"\"\n",
" srt_url = f\"https://meetings.feishu.cn/minutes/api/export\"\n",
" params = {'add_speaker': 'true', # 包含说话人\n",
" 'add_timestamp': 'true', # 包含时间戳\n",
" 'format': '3', # SRT格式\n",
" 'object_token': object_token, # 妙记id\n",
" }\n",
" resp = requests.post(url=srt_url, params=params, headers=self.headers, proxies=proxies)\n",
"\n",
" # 如果cookie选择的不对可能会出现能下载视频但无法下载字幕的情况\n",
" if resp.status_code != 200:\n",
" raise Exception(f\"下载字幕失败请检查你的cookie\\nStatus code: {resp.status_code}\")\n",
" \n",
" # 写入对应视频的文件夹\n",
" resp.encoding = \"utf-8\"\n",
" with open(f\"{file_name}/{file_name}.srt\", \"w+\") as f:\n",
" f.write(resp.text)\n",
"\n",
" # 将文件最后修改时间改为会议结束时间\n",
" os.utime(f\"{file_name}/{file_name}.srt\", (file_mtime, file_mtime))\n",
" os.utime(f\"{file_name}/{file_name}.mp4\", (file_mtime, file_mtime))\n",
" os.utime(f\"{file_name}\", (file_mtime, file_mtime))\n",
"\n",
" def check_meetings(self):\n",
" \"\"\"\n",
" 检查需要下载的会议\n",
" \"\"\"\n",
" all_meetings = self.get_meeting_info()\n",
" need_download_meetings = []\n",
"\n",
" # 检查记录中不存在的会议id进行下载\n",
" if os.path.exists('meetings.txt'):\n",
" with open('meetings.txt', 'r') as f:\n",
" downloaded_meetings = f.readlines()\n",
" need_download_meetings = [index for index in all_meetings if index['meeting_id']+'\\n' not in downloaded_meetings]\n",
" else:\n",
" need_download_meetings = all_meetings\n",
" # 如果有需要下载的会议则进行下载\n",
" if need_download_meetings:\n",
" for index in tqdm(need_download_meetings, desc='Downloading Meetings', unit=' meeting'):\n",
" # 下载妙记视频\n",
" file_name = self.download_video(index)\n",
" # 下载妙记字幕\n",
" self.download_subtitle(index['object_token'], file_name, index['stop_time']/1000)\n",
" # 将已下载的妙记所对应的会议id记录到文件中\n",
" with open('meetings.txt', 'a+') as f:\n",
" f.write(index['meeting_id'] + '\\n')\n",
"\n",
" def delete_minutes(self, num):\n",
" \"\"\"\n",
" 删除指定数量的最早几个妙记\n",
" \"\"\"\n",
" all_meetings = self.get_meeting_info()\n",
" num = num if num <= len(all_meetings) else 1\n",
" need_delete_meetings = all_meetings[:num]\n",
"\n",
" for index in tqdm(need_delete_meetings, desc='Deleting Meetings', unit=' meeting'):\n",
" \n",
" # 将该妙记放入回收站\n",
" delete_url = f\"https://meetings.feishu.cn/minutes/api/space/delete\"\n",
" params = {'object_tokens': index['object_token'],\n",
" 'is_destroyed': 'false',\n",
" 'language': 'zh_cn'}\n",
" resp = requests.post(url=delete_url, params=params, headers=self.headers, proxies=proxies)\n",
" if resp.status_code != 200:\n",
" raise Exception(f\"删除会议{index['meeting_id']}失败!{resp.json()}\")\n",
" \n",
" # 将该妙记彻底删除\n",
" params['is_destroyed'] = 'true'\n",
" resp = requests.post(url=delete_url, params=params, headers=self.headers, proxies=proxies)\n",
" if resp.status_code != 200:\n",
" raise Exception(f\"删除会议{index['meeting_id']}失败!{resp.json()}\")\n",
"\n",
"\n",
"if __name__ == '__main__':\n",
"\n",
" # 在飞书妙记主页 https://meetings.feishu.cn/minutes/home 获取cookie\n",
" minutes_cookie = \"minutes_csrf_token=ea0b6372-d541-49fd-9fca-e6e579ed6c56; m_ce8f16=65613062363337322d643534312d343966642d396663612d653665353739656436633536b37b91f4efa89b27b410d5626626f1b2ebf3ed82d6b510030362344fb5776178; __tea__ug__uid=2399591691287585247; Hm_lvt_e78c0cb1b97ef970304b53d2097845fd=1691287586; Hm_lpvt_e78c0cb1b97ef970304b53d2097845fd=1691287586; _gcl_au=1.1.1631485248.1691287586; passport_web_did=7264024950279913500; QXV0aHpDb250ZXh0=825bfa61b8ce47d6a89b987cdb581ff9; locale=zh-CN; trust_browser_id=48711549-c255-44b4-820f-442ea9608738; fid=80a777ac-f638-404f-8e7c-0902ac3cf140; lang=zh; _csrf_token=d0a421e1a93d873197fbf7134ab4c8f6a76769dd-1697598422; landing_url=https://login.feishu.cn/accounts/page/login?redirect_uri=https%3A%2F%2Fmeetings.feishu.cn%2Fminutes%2Fhome&app_id=16&should_pass_through=1&from=byteview_meeting_object; _ga=GA1.2.1302241623.1691287588; _gid=GA1.2.2068635412.1697867029; session=XN0YXJ0-31cg8369-b829-495d-ab17-8d31a56fc100-WVuZA; session_list=XN0YXJ0-31cg8369-b829-495d-ab17-8d31a56fc100-WVuZA; bv_csrf_token=25758c71-aa97-4174-b6e0-92625ce7ea3d; m_e09b70=32353735386337312d616139372d343137342d623665302d393236323563653765613364b37b91f4efa89b27b410d5626626f1b2ebf3ed82d6b510030362344fb5776178; _ga_VPYRHN104D=GS1.1.1697867029.2.1.1697867180.0.0.0; MM_U_ID=006c90d75f171009579f3ec6e755529eb47d86a0; sl_session=eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTc5MTAzODIsInVuaXQiOiJldV9uYyIsInJhdyI6eyJtZXRhIjoiQVYrN0Z3OUxBSUFEWDdzWEQwSEJBQVJrendBMHNRSkFIR1RQQURTeEFrQWNaVE5scXN6QVFCd0NLZ0VBUVVGQlFVRkJRVUZCUVVKc1RUSlhjVEkwVWtGQlVUMDkiLCJpZGMiOlsxLDJdLCJzdW0iOiJlNDhlNDZjMTIzZWM1ZTk3MGIxYjY1OWU1MmUxOTUwNmEwNmQ2ZDAwYmNjMjY1MjllYzYzMzQwY2QxOThiMTNmIiwibG9jIjoiemhfY24iLCJhcGMiOiJSZWxlYXNlIiwiaWF0IjoxNjk3ODY3MTgyLCJzYWMiOnsiVXNlclN0YWZmU3RhdHVzIjoiMSIsIlVzZXJUeXBlIjoiNDIifSwibG9kIjpudWxsLCJucyI6ImxhcmsiLCJuc191aWQiOiI2ODk4MTMyNjA4Njk4MzE4ODUxIiwibnNfdGlkIjoiNjg5ODEzMjYwODU0MzE2MjM3MiIsIm90IjowfX0.JfoJxXHUlKLPJGpM3Td-Qg2dAYG3ntlBqOgbCKO5XU33lHGELeZhq7dYZnp8tgKnQ3QhlPO9NlAz_fU8zEeb7Q; home8e9bfaeded6957ef07dfd71b5753f855065e9207={%22filterOption%22:{%22rankType%22:1%2C%22order%22:%22desc%22}%2C%22objectOwnerType%22:1%2C%22recentOpenTab%22:1%2C%22timeColumnKey%22:%22time%22}; passport_app_access_token=eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTc5MTAzOTgsInVuaXQiOiJldV9uYyIsInJhdyI6eyJtX2FjY2Vzc19pbmZvIjp7IjE2Ijp7ImlhdCI6MTY5Nzg2NzE4NSwiYWNjZXNzIjp0cnVlfSwiMiI6eyJpYXQiOjE2OTc4NjcxOTgsImFjY2VzcyI6dHJ1ZX19LCJzdW0iOiJlNDhlNDZjMTIzZWM1ZTk3MGIxYjY1OWU1MmUxOTUwNmEwNmQ2ZDAwYmNjMjY1MjllYzYzMzQwY2QxOThiMTNmIn19.9kJ3WnK16ZwRTfiXw5536Wcmf2zPzVKFMTVrW3ajrOF1Xs_6ewG8q_t_jPrpC3KmV5HzQZkX5WpHoYiBt16RXA; is_anonymous_session=; _tea_utm_cache_1229=undefined; shortscc=3; swp_csrf_token=041eb6e5-1ad7-4d53-b0b2-819a3f66a214; t_beda37=978c60ee89273d7dea2a9898b4a964a5cb2330836f28d8a5726141475f44ad4d\"\n",
"\n",
" # 可选需身份为企业创建人、超级管理员或普通管理员在飞书管理后台获取cookie\n",
" manager_cookie = \"\"\n",
"\n",
" if not minutes_cookie:\n",
" raise Exception(\"cookie不能为空\")\n",
" \n",
" # 如果未填写管理参数,则定时下载会议\n",
" elif not manager_cookie:\n",
" while True:\n",
" print(time.strftime(\"%Y-%m-%d %H:%M:%S\", time.localtime()))\n",
" downloader = MeetingDownloader(minutes_cookie)\n",
" downloader.check_meetings()\n",
" downloader.delete_minutes(1)\n",
" time.sleep(3600)\n",
"\n",
" # 如果填写了管理参数,则定时查询妙记空间使用情况,超出指定额度则删除最早的指定数量的会议\n",
" else :\n",
" # 从manager_cookie中获取X-Csrf-Token\n",
" x_csrf_token = manager_cookie[manager_cookie.find(' csrf_token=') + len(' csrf_token='):manager_cookie.find(';', manager_cookie.find(' csrf_token='))]\n",
" if len(x_csrf_token) != 36:\n",
" raise Exception(\"manager_cookie中不包含csrf_token请确保从请求`count?_t=`中获取!\")\n",
"\n",
" usage_bytes_old = 0 # 上次记录的已经使用的字节数\n",
"\n",
" # 定期查询已使用的妙记空间字节数\n",
" while True:\n",
" print(time.strftime(\"%Y-%m-%d %H:%M:%S\", time.localtime()))\n",
"\n",
" # 查询妙记空间已用字节数\n",
" query_url = f\"https://www.feishu.cn/suite/admin/api/gaea/usages\"\n",
" manager_headers = {'cookie': manager_cookie, 'X-Csrf-Token':x_csrf_token}\n",
" res = requests.get(url=query_url, headers=manager_headers, proxies=proxies)\n",
" usage_bytes = int(res.json()['data']['items'][6]['usage']) # 查询到的目前已用字节数\n",
" print(f\"已用空间:{usage_bytes / 2 ** 30:.2f}GB\")\n",
"\n",
" # 如果已用字节数有变化则下载会议\n",
" if usage_bytes != usage_bytes_old:\n",
" downloader = MeetingDownloader(minutes_cookie)\n",
" downloader.check_meetings()\n",
" # 如果已用超过9.65G则删除最早的两个会议\n",
" if usage_bytes > 2 ** 30 * 9.65:\n",
" downloader.delete_minutes(2)\n",
" usage_bytes_old = usage_bytes # 更新已用字节数\n",
" time.sleep(3600)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9712c75e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

244
feishu_uploader.ipynb Normal file
View File

@@ -0,0 +1,244 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d67ee4f8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Block 0: 4194304\n",
"Block 1: 4194304\n",
"Block 2: 4194304\n",
"Block 3: 836012\n",
"{'code': 0, 'message': 'Success', 'data': {'file_token': 'Iby8btwdjobxowxOOJGcl8GtnXn', 'version': '7292285803852611612', 'data_version': '7292285803852611612'}}\n",
"{'code': 0, 'msg': 'success', 'data': '[]'}\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'topic': '8093', 'duration': 1677340, 'expire_time': -1, 'in_trash': False, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 4, 'transcript_progress': {'current': '0.72', 'rate': '0.72'}, 'scheduler_type': 0}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.01%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'expire_time': -1, 'in_trash': False, 'scheduler_execute_delta_time': -1, 'object_status': 4, 'topic': '8093', 'transcript_progress': {'current': '2.86', 'rate': '0.72'}, 'scheduler_type': 0, 'object_token': 'obcnae9145x56f52e2aj5761', 'duration': 1677340}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.03%\n",
"{'code': 0, 'msg': 'success', 'data': {'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}, 'status': [{'duration': 1677340, 'expire_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 4, 'topic': '8093', 'transcript_progress': {'current': '5.72', 'rate': '0.72'}, 'in_trash': False, 'scheduler_type': 0, 'scheduler_execute_delta_time': -1}]}}\n",
"转写中...0.06%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'duration': 1677340, 'scheduler_execute_delta_time': -1, 'transcript_progress': {'current': '7.87', 'rate': '0.72'}, 'expire_time': -1, 'in_trash': False, 'scheduler_type': 0, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 4, 'topic': '8093'}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.08%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'duration': 1677340, 'in_trash': False, 'scheduler_type': 0, 'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'transcript_progress': {'current': '10.73', 'rate': '0.72'}, 'expire_time': -1, 'scheduler_execute_delta_time': -1, 'object_status': 4}], 'ws_config': {'heartbeat_interval': 20, 'http_interval': 60, 'ws_enable': False}}}\n",
"转写中...0.11%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'topic': '8093', 'transcript_progress': {'rate': '0.72', 'current': '13.59'}, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 4, 'in_trash': False, 'scheduler_type': 0, 'duration': 1677340, 'expire_time': -1}], 'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}}}\n",
"转写中...0.14%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'transcript_progress': {'current': '15.74', 'rate': '0.72'}, 'expire_time': -1, 'scheduler_execute_delta_time': -1, 'object_status': 4, 'duration': 1677340, 'in_trash': False, 'scheduler_type': 0}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.16%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'expire_time': -1, 'in_trash': False, 'scheduler_type': 0, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 4, 'topic': '8093', 'duration': 1677340, 'transcript_progress': {'current': '18.6', 'rate': '0.72'}, 'scheduler_execute_delta_time': -1}], 'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}}}\n",
"转写中...0.19%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 4, 'topic': '8093', 'duration': 1677340, 'scheduler_execute_delta_time': -1, 'transcript_progress': {'rate': '0.72', 'current': '20.75'}, 'expire_time': -1, 'in_trash': False, 'scheduler_type': 0}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.21%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_token': 'obcnae9145x56f52e2aj5761', 'transcript_progress': {'current': '23.61', 'rate': '0.72'}, 'expire_time': -1, 'scheduler_type': 0, 'scheduler_execute_delta_time': -1, 'object_status': 4, 'topic': '8093', 'duration': 1677340, 'in_trash': False}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.24%\n",
"{'data': {'status': [{'duration': 1677340, 'expire_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1, 'topic': '8093', 'scheduler_execute_delta_time': -1, 'transcript_progress': {'current': '25.76', 'rate': '0.72'}, 'in_trash': False, 'scheduler_type': 0}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}, 'code': 0, 'msg': 'success'}\n",
"转写中...0.26%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_status': 1, 'topic': '8093', 'expire_time': -1, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'duration': 1677340, 'transcript_progress': {'current': '28.62', 'rate': '0.72'}, 'in_trash': False, 'scheduler_type': 0}], 'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}}}\n",
"转写中...0.29%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'transcript_progress': {'current': '31.48', 'rate': '0.72'}, 'expire_time': -1, 'in_trash': False, 'scheduler_type': 0, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1, 'scheduler_execute_delta_time': -1, 'topic': '8093', 'duration': 1677340}], 'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}}}\n",
"转写中...0.31%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'in_trash': False, 'scheduler_type': 0, 'object_status': 1, 'duration': 1677340, 'transcript_progress': {'current': '33.62', 'rate': '0.72'}, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'expire_time': -1}], 'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}}}\n",
"转写中...0.34%\n",
"{'code': 0, 'msg': 'success', 'data': {'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}, 'status': [{'topic': '8093', 'duration': 1677340, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1, 'in_trash': False, 'scheduler_type': 0, 'transcript_progress': {'current': '36.49', 'rate': '0.72'}, 'expire_time': -1}]}}\n",
"转写中...0.36%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_status': 1, 'topic': '8093', 'in_trash': False, 'scheduler_type': 0, 'object_token': 'obcnae9145x56f52e2aj5761', 'duration': 1677340, 'transcript_progress': {'current': '38.63', 'rate': '0.72'}, 'expire_time': -1, 'scheduler_execute_delta_time': -1}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.39%\n",
"{'code': 0, 'msg': 'success', 'data': {'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}, 'status': [{'object_status': 1, 'duration': 1677340, 'expire_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'transcript_progress': {'current': '41.49', 'rate': '0.72'}, 'in_trash': False, 'scheduler_type': 0, 'scheduler_execute_delta_time': -1}]}}\n",
"转写中...0.41%\n",
"{'code': 0, 'msg': 'success', 'data': {'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}, 'status': [{'expire_time': -1, 'scheduler_type': 0, 'topic': '8093', 'duration': 1677340, 'transcript_progress': {'current': '43.64', 'rate': '0.72'}, 'in_trash': False, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1}]}}\n",
"转写中...0.44%\n",
"{'code': 0, 'msg': 'success', 'data': {'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}, 'status': [{'duration': 1677340, 'expire_time': -1, 'in_trash': False, 'transcript_progress': {'current': '46.5', 'rate': '0.72'}, 'scheduler_type': 0, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1, 'topic': '8093'}]}}\n",
"转写中...0.47%\n",
"{'code': 0, 'msg': 'success', 'data': {'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}, 'status': [{'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'in_trash': False, 'expire_time': -1, 'scheduler_type': 0, 'object_status': 1, 'duration': 1677340, 'transcript_progress': {'current': '48.65', 'rate': '0.72'}}]}}\n",
"转写中...0.49%\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'code': 0, 'msg': 'success', 'data': {'status': [{'scheduler_type': 0, 'scheduler_execute_delta_time': -1, 'object_status': 1, 'duration': 1677340, 'transcript_progress': {'current': '51.51', 'rate': '0.72'}, 'in_trash': False, 'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'expire_time': -1}], 'ws_config': {'http_interval': 60, 'ws_enable': False, 'heartbeat_interval': 20}}}\n",
"转写中...0.52%\n",
"{'msg': 'success', 'data': {'status': [{'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1, 'topic': '8093', 'expire_time': -1, 'in_trash': False, 'scheduler_execute_delta_time': -1, 'duration': 1677340, 'transcript_progress': {'current': '54.37', 'rate': '0.72'}, 'scheduler_type': 0}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}, 'code': 0}\n",
"转写中...0.54%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_token': 'obcnae9145x56f52e2aj5761', 'duration': 1677340, 'expire_time': -1, 'in_trash': False, 'scheduler_type': 0, 'object_status': 1, 'topic': '8093', 'transcript_progress': {'current': '57.23', 'rate': '0.72'}, 'scheduler_execute_delta_time': -1}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.57%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_token': 'obcnae9145x56f52e2aj5761', 'duration': 1677340, 'expire_time': -1, 'scheduler_execute_delta_time': -1, 'object_status': 1, 'topic': '8093', 'transcript_progress': {'rate': '0.72', 'current': '59.38'}, 'in_trash': False, 'scheduler_type': 0}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.59%\n",
"{'data': {'status': [{'topic': '8093', 'in_trash': False, 'duration': 1677340, 'transcript_progress': {'current': '61.53', 'rate': '0.72'}, 'expire_time': -1, 'scheduler_type': 0, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1}], 'ws_config': {'heartbeat_interval': 20, 'http_interval': 60, 'ws_enable': False}}, 'code': 0, 'msg': 'success'}\n",
"转写中...0.62%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'duration': 1677340, 'transcript_progress': {'rate': '0.72', 'current': '64.39'}, 'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'in_trash': False, 'scheduler_type': 0, 'scheduler_execute_delta_time': -1, 'object_status': 1, 'expire_time': -1}], 'ws_config': {'heartbeat_interval': 20, 'http_interval': 60, 'ws_enable': False}}}\n",
"转写中...0.64%\n",
"{'code': 0, 'msg': 'success', 'data': {'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}, 'status': [{'object_status': 1, 'duration': 1677340, 'transcript_progress': {'current': '66.53', 'rate': '0.72'}, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'topic': '8093', 'expire_time': -1, 'in_trash': False, 'scheduler_type': 0}]}}\n",
"转写中...0.67%\n",
"{'code': 0, 'msg': 'success', 'data': {'status': [{'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 1, 'topic': '8093', 'duration': 1677340, 'expire_time': -1, 'in_trash': False, 'scheduler_type': 0, 'transcript_progress': {'rate': '0.72', 'current': '69.4'}, 'scheduler_execute_delta_time': -1}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}}\n",
"转写中...0.69%\n",
"{'data': {'status': [{'topic': '8093', 'scheduler_type': 0, 'scheduler_execute_delta_time': -1, 'object_token': 'obcnae9145x56f52e2aj5761', 'object_status': 2, 'duration': 1677340, 'transcript_progress': {'current': '', 'rate': ''}, 'expire_time': -1, 'in_trash': False}], 'ws_config': {'ws_enable': False, 'heartbeat_interval': 20, 'http_interval': 60}}, 'code': 0, 'msg': 'success'}\n",
"转写完成http://meetings.feishu.cn/minutes/obcnae9145x56f52e2aj5761\n"
]
}
],
"source": [
"import base64, time, uuid, zlib\n",
"import requests\n",
"from multiprocessing import Pool\n",
"\n",
"\n",
"# 不使用系统代理\n",
"proxies = {\"http\": None, \"https\": None}\n",
"\n",
"class FeishuUploader:\n",
" def __init__(self, file_path, cookie):\n",
" self.file_path = file_path\n",
" self.block_size = 2**20*4\n",
" self.headers = {\n",
" 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',\n",
" 'cookie' : cookie,\n",
" 'bv-csrf-token' : cookie[cookie.find('bv_csrf_token=') + len('bv_csrf_token='):cookie.find(';', cookie.find('bv_csrf_token='))],\n",
" 'referer' : f'https://meetings.feishu.cn/minutes/home'\n",
" }\n",
" if len(self.headers.get('bv-csrf-token')) != 36:\n",
" raise Exception(\"cookie中不包含bv_csrf_token请确保从请求`list?size=20&`中获取!\")\n",
"\n",
" self.upload_token = None\n",
" self.vhid = None\n",
" self.upload_id = None\n",
" self.object_token = None\n",
"\n",
" with open(self.file_path, \"rb\") as f:\n",
" self.file_size = f.seek(0, 2)\n",
" f.seek(0)\n",
" self.file_header = base64.b64encode(f.read(512)).decode()\n",
"\n",
" def get_quota(self):\n",
" file_info = f'{uuid.uuid1()}_{self.file_size}'\n",
" quota_url = f'https://meetings.feishu.cn/minutes/api/quota?file_info[]={file_info}&language=zh_cn'\n",
" quota_res = requests.get(quota_url, headers=self.headers, proxies=proxies).json()\n",
" self.upload_token = quota_res['data']['upload_token'][file_info]\n",
"\n",
" # 分片上传文件(预上传)\n",
" # doc: https://open.feishu.cn/document/server-docs/docs/drive-v1/upload/multipart-upload-file-/upload_prepare\n",
" def prepare_upload(self):\n",
" file_name = self.file_path.split(\"\\\\\")[-1]\n",
" prepare_url = f'https://meetings.feishu.cn/minutes/api/upload/prepare'\n",
" data = {\n",
" \"name\" : file_name,\n",
" \"file_size\": self.file_size,\n",
" \"file_header\": self.file_header,\n",
" \"drive_upload\" :True,\n",
" \"upload_token\" : self.upload_token,\n",
" }\n",
" prepare_res = requests.post(prepare_url, headers=self.headers, proxies=proxies, json=data).json()\n",
" self.vhid = prepare_res['data']['vhid']\n",
" self.upload_id = prepare_res['data']['upload_id']\n",
" self.object_token = prepare_res['data']['object_token']\n",
"\n",
" # 分片上传文件(上传分片)\n",
" # doc: https://open.feishu.cn/document/server-docs/docs/drive-v1/upload/multipart-upload-file-/upload_part\n",
" def upload_blocks(self):\n",
" with open(self.file_path, \"rb\") as f:\n",
" f.seek(0)\n",
" block_count = (self.file_size + self.block_size - 1) // self.block_size\n",
" pool = Pool(processes=6)\n",
" for i in range(block_count):\n",
" block_data = f.read(self.block_size)\n",
" block_size = len(block_data)\n",
" print(f\"Block {i}: {block_size}\")\n",
" checksum = zlib.adler32(block_data) % (10 ** 10)\n",
" upload_url = f'https://internal-api-space.feishu.cn/space/api/box/stream/upload/block?upload_id={self.upload_id}&seq={i}&size={block_size}&checksum={checksum}'\n",
" pool.apply_async(requests.post, args=(upload_url,), kwds={'headers': self.headers, 'proxies': proxies, 'data': block_data})\n",
" pool.close()\n",
" pool.join()\n",
"\n",
" # 分片上传文件(完成上传)\n",
" # doc: https://open.feishu.cn/document/server-docs/docs/drive-v1/upload/multipart-upload-file-/upload_finish\n",
" def complete_upload(self):\n",
" complete_url1 = f'https://internal-api-space.feishu.cn/space/api/box/upload/finish/'\n",
" json = {\n",
" \"upload_id\": self.upload_id,\n",
" \"num_blocks\": (self.file_size + self.block_size - 1) // self.block_size,\n",
" \"vhid\": self.vhid,\n",
" \"risk_detection_extra\" : \"{\\\"source_terminal\\\":1,\\\"file_operate_usage\\\":3,\\\"locale\\\":\\\"zh_cn\\\"}\"\n",
" }\n",
" res = requests.post(complete_url1, headers=self.headers, proxies=proxies, json=json).json()\n",
" print(res)\n",
"\n",
" complete_url2 = f'https://meetings.feishu.cn/minutes/api/upload/finish'\n",
" json = {\n",
" \"auto_transcribe\" : True,\n",
" \"language\" : \"mixed\",\n",
" \"num_blocks\": (self.file_size + self.block_size - 1) // self.block_size,\n",
" \"upload_id\": self.upload_id,\n",
" \"vhid\": self.vhid,\n",
" \"upload_token\" : self.upload_token,\n",
" \"object_token\" : self.object_token,\n",
" }\n",
" res = requests.post(complete_url2, headers=self.headers, proxies=proxies, json=json).json()\n",
" print(res)\n",
"\n",
" # 上传完成后检查是否转写完成\n",
" while True:\n",
" object_status_url = f'https://meetings.feishu.cn/minutes/api/batch-status?object_token[]={self.object_token}&language=zh_cn'\n",
" object_status = requests.get(object_status_url, headers=self.headers, proxies=proxies).json()\n",
" print(object_status)\n",
" if object_status['data']['status'][0]['object_status'] == 2:\n",
" print(f\"转写完成http://meetings.feishu.cn/minutes/{object_status['data']['status'][0]['object_token']}\")\n",
" break\n",
" print(f\"转写中...{float(object_status['data']['status'][0]['transcript_progress']['current'])/100:.2f}%\")\n",
" time.sleep(3)\n",
"\n",
" def upload(self):\n",
" self.get_quota()\n",
" self.prepare_upload()\n",
" self.upload_blocks()\n",
" self.complete_upload()\n",
"\n",
"\n",
"if __name__ == '__main__':\n",
"\n",
" # 在飞书妙记主页获取\n",
" cookie = \"minutes_csrf_token=ea0b6372-d541-49fd-9fca-e6e579ed6c56; m_ce8f16=65613062363337322d643534312d343966642d396663612d653665353739656436633536b37b91f4efa89b27b410d5626626f1b2ebf3ed82d6b510030362344fb5776178; __tea__ug__uid=2399591691287585247; Hm_lvt_e78c0cb1b97ef970304b53d2097845fd=1691287586; Hm_lpvt_e78c0cb1b97ef970304b53d2097845fd=1691287586; _gcl_au=1.1.1631485248.1691287586; passport_web_did=7264024950279913500; QXV0aHpDb250ZXh0=825bfa61b8ce47d6a89b987cdb581ff9; locale=zh-CN; trust_browser_id=48711549-c255-44b4-820f-442ea9608738; fid=80a777ac-f638-404f-8e7c-0902ac3cf140; lang=zh; _csrf_token=d0a421e1a93d873197fbf7134ab4c8f6a76769dd-1697598422; landing_url=https://login.feishu.cn/accounts/page/login?redirect_uri=https%3A%2F%2Fmeetings.feishu.cn%2Fminutes%2Fhome&app_id=16&should_pass_through=1&from=byteview_meeting_object; _ga=GA1.2.1302241623.1691287588; _gid=GA1.2.2068635412.1697867029; session=XN0YXJ0-31cg8369-b829-495d-ab17-8d31a56fc100-WVuZA; session_list=XN0YXJ0-31cg8369-b829-495d-ab17-8d31a56fc100-WVuZA; bv_csrf_token=25758c71-aa97-4174-b6e0-92625ce7ea3d; m_e09b70=32353735386337312d616139372d343137342d623665302d393236323563653765613364b37b91f4efa89b27b410d5626626f1b2ebf3ed82d6b510030362344fb5776178; _ga_VPYRHN104D=GS1.1.1697867029.2.1.1697867180.0.0.0; MM_U_ID=006c90d75f171009579f3ec6e755529eb47d86a0; sl_session=eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTc5MTAzODIsInVuaXQiOiJldV9uYyIsInJhdyI6eyJtZXRhIjoiQVYrN0Z3OUxBSUFEWDdzWEQwSEJBQVJrendBMHNRSkFIR1RQQURTeEFrQWNaVE5scXN6QVFCd0NLZ0VBUVVGQlFVRkJRVUZCUVVKc1RUSlhjVEkwVWtGQlVUMDkiLCJpZGMiOlsxLDJdLCJzdW0iOiJlNDhlNDZjMTIzZWM1ZTk3MGIxYjY1OWU1MmUxOTUwNmEwNmQ2ZDAwYmNjMjY1MjllYzYzMzQwY2QxOThiMTNmIiwibG9jIjoiemhfY24iLCJhcGMiOiJSZWxlYXNlIiwiaWF0IjoxNjk3ODY3MTgyLCJzYWMiOnsiVXNlclN0YWZmU3RhdHVzIjoiMSIsIlVzZXJUeXBlIjoiNDIifSwibG9kIjpudWxsLCJucyI6ImxhcmsiLCJuc191aWQiOiI2ODk4MTMyNjA4Njk4MzE4ODUxIiwibnNfdGlkIjoiNjg5ODEzMjYwODU0MzE2MjM3MiIsIm90IjowfX0.JfoJxXHUlKLPJGpM3Td-Qg2dAYG3ntlBqOgbCKO5XU33lHGELeZhq7dYZnp8tgKnQ3QhlPO9NlAz_fU8zEeb7Q; home8e9bfaeded6957ef07dfd71b5753f855065e9207={%22filterOption%22:{%22rankType%22:1%2C%22order%22:%22desc%22}%2C%22objectOwnerType%22:1%2C%22recentOpenTab%22:1%2C%22timeColumnKey%22:%22time%22}; passport_app_access_token=eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE2OTc5MTAzOTgsInVuaXQiOiJldV9uYyIsInJhdyI6eyJtX2FjY2Vzc19pbmZvIjp7IjE2Ijp7ImlhdCI6MTY5Nzg2NzE4NSwiYWNjZXNzIjp0cnVlfSwiMiI6eyJpYXQiOjE2OTc4NjcxOTgsImFjY2VzcyI6dHJ1ZX19LCJzdW0iOiJlNDhlNDZjMTIzZWM1ZTk3MGIxYjY1OWU1MmUxOTUwNmEwNmQ2ZDAwYmNjMjY1MjllYzYzMzQwY2QxOThiMTNmIn19.9kJ3WnK16ZwRTfiXw5536Wcmf2zPzVKFMTVrW3ajrOF1Xs_6ewG8q_t_jPrpC3KmV5HzQZkX5WpHoYiBt16RXA; is_anonymous_session=; _tea_utm_cache_1229=undefined; shortscc=4; swp_csrf_token=a1f81dc5-6619-43b2-9762-aa6f43ad0dfe; t_beda37=da2d9afc338f12aa28cec8ecdf932dd739bc59ea887ce80e42108695bd57284b\"\n",
"\n",
" # 你要上传的文件所在路径\n",
" file_path = r\"8093.mp3\"\n",
"\n",
" uploader = FeishuUploader(file_path, cookie)\n",
" uploader.upload()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ec4f4d1",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

62
srt2txt.ipynb Normal file
View File

@@ -0,0 +1,62 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "1c256e43",
"metadata": {},
"outputs": [],
"source": [
"a = 1\n",
"b = 2\n",
"c = 3\n",
"state = a\n",
"text = ''\n",
"with open('test.srt', 'r') as f: #打开srt字幕文件并去掉文件开头的\\ufeff\n",
" for line in f.readlines(): #遍历srt字幕文件\n",
" if state == a: #跳过第一行\n",
" state = b\n",
" elif state == b: #跳过第二行\n",
" state = c\n",
" elif state == c: #读取第三行字幕文本\n",
" if len(line.strip()) !=0:\n",
" text += ' ' + line.strip() #将同一时间段的字幕文本拼接\n",
" state = c\n",
" elif len(line.strip()) ==0:\n",
" with open('test1.txt', 'a') as fa: #写入txt文本文件中\n",
" fa.write(text)\n",
" text = '\\n'\n",
" state = a"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fcc101e2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

BIN
temp.mp3

Binary file not shown.