222 lines
4.4 KiB
Plaintext
222 lines
4.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"bear,\n",
|
|
"steer,\n",
|
|
"velour,\n",
|
|
"house,\n",
|
|
"English,\n",
|
|
"day,\n",
|
|
"reign,\n",
|
|
"English,\n",
|
|
"show,\n",
|
|
"eight,\n",
|
|
"buy,\n",
|
|
"reign,\n",
|
|
"dough,\n",
|
|
"play,\n",
|
|
"cheer,\n",
|
|
"pier,\n",
|
|
"grow,\n",
|
|
"break,\n",
|
|
"so,\n",
|
|
"June,\n",
|
|
"bear,\n",
|
|
"pair,\n",
|
|
"so,\n",
|
|
"toe,\n",
|
|
"cheer,\n",
|
|
"June,\n",
|
|
"supply,\n",
|
|
"cow,\n",
|
|
"out,\n",
|
|
"fierce,\n",
|
|
"moor,\n",
|
|
"align,\n",
|
|
"show,\n",
|
|
"join,\n",
|
|
"flower,\n",
|
|
"buy,\n",
|
|
"boat,\n",
|
|
"survey,\n",
|
|
"no,\n",
|
|
"cure,\n",
|
|
"survey,\n",
|
|
"boy,\n",
|
|
"fear,\n",
|
|
"make,\n",
|
|
"though,\n",
|
|
"brown,\n",
|
|
"velour,\n",
|
|
"moor,\n",
|
|
"grow,\n",
|
|
"near,\n",
|
|
"care,\n",
|
|
"eight,\n",
|
|
"pat,\n",
|
|
"blow,\n",
|
|
"play,\n",
|
|
"weight,\n",
|
|
"lies,\n",
|
|
"make,\n",
|
|
"fare,\n",
|
|
"spider,\n",
|
|
"pair,\n",
|
|
"pier,\n",
|
|
"though,\n",
|
|
"light,\n",
|
|
"out,\n",
|
|
"grey,\n",
|
|
"table,\n",
|
|
"supply,\n",
|
|
"steak,\n",
|
|
"fair,\n",
|
|
"vein,\n",
|
|
"fair,\n",
|
|
"paint,\n",
|
|
"cake,\n",
|
|
"blow,\n",
|
|
"they,\n",
|
|
"stay,\n",
|
|
"cure,\n",
|
|
"spider,\n",
|
|
"sew,\n",
|
|
"train,\n",
|
|
"great,\n",
|
|
"stay,\n",
|
|
"deer,\n",
|
|
"break,\n",
|
|
"guy,\n",
|
|
"Joe,\n",
|
|
"weight,\n",
|
|
"steer,\n",
|
|
"align,\n",
|
|
"dough,\n",
|
|
"boat,\n",
|
|
"toe,\n",
|
|
"kraut,\n",
|
|
"train,\n",
|
|
"great,\n",
|
|
"boy,\n",
|
|
"kraut,\n",
|
|
"deer,\n",
|
|
"ware,\n",
|
|
"rain,\n",
|
|
"grey,\n",
|
|
"tour,\n",
|
|
"toy,\n",
|
|
"near,\n",
|
|
"cow,\n",
|
|
"join,\n",
|
|
"lies,\n",
|
|
"table,\n",
|
|
"word,\n",
|
|
"toy,\n",
|
|
"rain,\n",
|
|
"clear,\n",
|
|
"ice,\n",
|
|
"ice,\n",
|
|
"fly,\n",
|
|
"fear,\n",
|
|
"fly,\n",
|
|
"serendipity,\n",
|
|
"care,\n",
|
|
"steak,\n",
|
|
"paint,\n",
|
|
"no,\n",
|
|
"vein,\n",
|
|
"tour,\n",
|
|
"clear,\n",
|
|
"soap,\n",
|
|
"pie,\n",
|
|
"cake,\n",
|
|
"brown,\n",
|
|
"sew,\n",
|
|
"fierce,\n",
|
|
"light,\n",
|
|
"fare,\n",
|
|
"pie,\n",
|
|
"pat,\n",
|
|
"Joe,\n",
|
|
"foe,\n",
|
|
"house,\n",
|
|
"tie,\n",
|
|
"word,\n",
|
|
"flower,\n",
|
|
"day,\n",
|
|
"they,\n",
|
|
"soap,\n",
|
|
"guy,\n",
|
|
"foe,\n",
|
|
"ware,\n",
|
|
"tie,\n",
|
|
"serendipity,\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import os\n",
|
|
"md_path = \"../../sounds-of-american-english/\"\n",
|
|
"mp3_path = \"../audios/\"\n",
|
|
"# get md files in md_path\n",
|
|
"md_files = [f for f in os.listdir(md_path) if f.endswith('.md')]\n",
|
|
"# get all mp3 files in mp3_path\n",
|
|
"mp3_files = [f for f in os.listdir(mp3_path) if f.endswith('.mp3')]\n",
|
|
"# print(len(mp3_files))\n",
|
|
"\n",
|
|
"# read md files, and get all sub-string between \"audios/\" and \".mp3\", using regex\n",
|
|
"import re\n",
|
|
"audios_in_md = []\n",
|
|
"for md_file in md_files:\n",
|
|
" with open(md_path + md_file, 'r') as f:\n",
|
|
" lines = f.readlines()\n",
|
|
" for line in lines:\n",
|
|
" if \"audios/\" in line:\n",
|
|
" audios_in_md += re.findall(r'audios/(.*?).mp3', line)\n",
|
|
"\n",
|
|
"# remove duplicates\n",
|
|
"audios_in_md = list(set(audios_in_md))\n",
|
|
"# print(len(audios_in_md))\n",
|
|
"\n",
|
|
"for audio in audios_in_md:\n",
|
|
" if not f'{audio}.mp3' in mp3_files:\n",
|
|
" print(f'{audio.split(\"-\")[0].strip()},')\n",
|
|
"\n",
|
|
"# for mp3 in mp3_files:\n",
|
|
"# if not mp3.replace('.mp3', '') in audios_in_md:\n",
|
|
"# print(mp3)\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "base",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|