keep updated

This commit is contained in:
xiaolai
2024-08-23 19:43:40 +08:00
parent 650a245cba
commit ae61c6a8d5
25 changed files with 1033 additions and 54 deletions

View File

@@ -2,25 +2,163 @@
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1980\n"
]
},
{
"ename": "TypeError",
"evalue": "can only concatenate list (not \"str\") to list",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 18\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m line \u001b[38;5;129;01min\u001b[39;00m lines:\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maudios/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m line:\n\u001b[0;32m---> 18\u001b[0m audios_in_md \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[43mre\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfindall\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43maudios/(.*?).mp3\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mline\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m.mp3\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(audios_in_md))\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# remove duplicates\u001b[39;00m\n",
"\u001b[0;31mTypeError\u001b[0m: can only concatenate list (not \"str\") to list"
"bear,\n",
"steer,\n",
"velour,\n",
"house,\n",
"English,\n",
"day,\n",
"reign,\n",
"English,\n",
"show,\n",
"eight,\n",
"buy,\n",
"reign,\n",
"dough,\n",
"play,\n",
"cheer,\n",
"pier,\n",
"grow,\n",
"break,\n",
"so,\n",
"June,\n",
"bear,\n",
"pair,\n",
"so,\n",
"toe,\n",
"cheer,\n",
"June,\n",
"supply,\n",
"cow,\n",
"out,\n",
"fierce,\n",
"moor,\n",
"align,\n",
"show,\n",
"join,\n",
"flower,\n",
"buy,\n",
"boat,\n",
"survey,\n",
"no,\n",
"cure,\n",
"survey,\n",
"boy,\n",
"fear,\n",
"make,\n",
"though,\n",
"brown,\n",
"velour,\n",
"moor,\n",
"grow,\n",
"near,\n",
"care,\n",
"eight,\n",
"pat,\n",
"blow,\n",
"play,\n",
"weight,\n",
"lies,\n",
"make,\n",
"fare,\n",
"spider,\n",
"pair,\n",
"pier,\n",
"though,\n",
"light,\n",
"out,\n",
"grey,\n",
"table,\n",
"supply,\n",
"steak,\n",
"fair,\n",
"vein,\n",
"fair,\n",
"paint,\n",
"cake,\n",
"blow,\n",
"they,\n",
"stay,\n",
"cure,\n",
"spider,\n",
"sew,\n",
"train,\n",
"great,\n",
"stay,\n",
"deer,\n",
"break,\n",
"guy,\n",
"Joe,\n",
"weight,\n",
"steer,\n",
"align,\n",
"dough,\n",
"boat,\n",
"toe,\n",
"kraut,\n",
"train,\n",
"great,\n",
"boy,\n",
"kraut,\n",
"deer,\n",
"ware,\n",
"rain,\n",
"grey,\n",
"tour,\n",
"toy,\n",
"near,\n",
"cow,\n",
"join,\n",
"lies,\n",
"table,\n",
"word,\n",
"toy,\n",
"rain,\n",
"clear,\n",
"ice,\n",
"ice,\n",
"fly,\n",
"fear,\n",
"fly,\n",
"serendipity,\n",
"care,\n",
"steak,\n",
"paint,\n",
"no,\n",
"vein,\n",
"tour,\n",
"clear,\n",
"soap,\n",
"pie,\n",
"cake,\n",
"brown,\n",
"sew,\n",
"fierce,\n",
"light,\n",
"fare,\n",
"pie,\n",
"pat,\n",
"Joe,\n",
"foe,\n",
"house,\n",
"tie,\n",
"word,\n",
"flower,\n",
"day,\n",
"they,\n",
"soap,\n",
"guy,\n",
"foe,\n",
"ware,\n",
"tie,\n",
"serendipity,\n"
]
}
],
@@ -32,7 +170,7 @@
"md_files = [f for f in os.listdir(md_path) if f.endswith('.md')]\n",
"# get all mp3 files in mp3_path\n",
"mp3_files = [f for f in os.listdir(mp3_path) if f.endswith('.mp3')]\n",
"print(len(mp3_files))\n",
"# print(len(mp3_files))\n",
"\n",
"# read md files, and get all sub-string between \"audios/\" and \".mp3\", using regex\n",
"import re\n",
@@ -42,15 +180,19 @@
" lines = f.readlines()\n",
" for line in lines:\n",
" if \"audios/\" in line:\n",
" audios_in_md += re.findall(r'audios/(.*?).mp3', line) + '.mp3'\n",
"print(len(audios_in_md))\n",
" audios_in_md += re.findall(r'audios/(.*?).mp3', line)\n",
"\n",
"# remove duplicates\n",
"audios_in_md = list(set(audios_in_md))\n",
"# print(len(audios_in_md))\n",
"\n",
"for audio in audios_in_md:\n",
" if not f'{audio}.mp3' in mp3_files:\n",
" print(f'{audio}.mp3')\n",
" print(f'{audio.split(\"-\")[0].strip()},')\n",
"\n",
"# for mp3 in mp3_files:\n",
"# if not mp3.replace('.mp3', '') in audios_in_md:\n",
"# print(mp3)\n",
"\n"
]
}