{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "bear,\n", "steer,\n", "velour,\n", "house,\n", "English,\n", "day,\n", "reign,\n", "English,\n", "show,\n", "eight,\n", "buy,\n", "reign,\n", "dough,\n", "play,\n", "cheer,\n", "pier,\n", "grow,\n", "break,\n", "so,\n", "June,\n", "bear,\n", "pair,\n", "so,\n", "toe,\n", "cheer,\n", "June,\n", "supply,\n", "cow,\n", "out,\n", "fierce,\n", "moor,\n", "align,\n", "show,\n", "join,\n", "flower,\n", "buy,\n", "boat,\n", "survey,\n", "no,\n", "cure,\n", "survey,\n", "boy,\n", "fear,\n", "make,\n", "though,\n", "brown,\n", "velour,\n", "moor,\n", "grow,\n", "near,\n", "care,\n", "eight,\n", "pat,\n", "blow,\n", "play,\n", "weight,\n", "lies,\n", "make,\n", "fare,\n", "spider,\n", "pair,\n", "pier,\n", "though,\n", "light,\n", "out,\n", "grey,\n", "table,\n", "supply,\n", "steak,\n", "fair,\n", "vein,\n", "fair,\n", "paint,\n", "cake,\n", "blow,\n", "they,\n", "stay,\n", "cure,\n", "spider,\n", "sew,\n", "train,\n", "great,\n", "stay,\n", "deer,\n", "break,\n", "guy,\n", "Joe,\n", "weight,\n", "steer,\n", "align,\n", "dough,\n", "boat,\n", "toe,\n", "kraut,\n", "train,\n", "great,\n", "boy,\n", "kraut,\n", "deer,\n", "ware,\n", "rain,\n", "grey,\n", "tour,\n", "toy,\n", "near,\n", "cow,\n", "join,\n", "lies,\n", "table,\n", "word,\n", "toy,\n", "rain,\n", "clear,\n", "ice,\n", "ice,\n", "fly,\n", "fear,\n", "fly,\n", "serendipity,\n", "care,\n", "steak,\n", "paint,\n", "no,\n", "vein,\n", "tour,\n", "clear,\n", "soap,\n", "pie,\n", "cake,\n", "brown,\n", "sew,\n", "fierce,\n", "light,\n", "fare,\n", "pie,\n", "pat,\n", "Joe,\n", "foe,\n", "house,\n", "tie,\n", "word,\n", "flower,\n", "day,\n", "they,\n", "soap,\n", "guy,\n", "foe,\n", "ware,\n", "tie,\n", "serendipity,\n" ] } ], "source": [ "import os\n", "md_path = \"../../sounds-of-american-english/\"\n", "mp3_path = \"../audios/\"\n", "# get md files in md_path\n", "md_files = [f for f in os.listdir(md_path) if f.endswith('.md')]\n", "# get all mp3 files in mp3_path\n", "mp3_files = [f for f in os.listdir(mp3_path) if f.endswith('.mp3')]\n", "# print(len(mp3_files))\n", "\n", "# read md files, and get all sub-string between \"audios/\" and \".mp3\", using regex\n", "import re\n", "audios_in_md = []\n", "for md_file in md_files:\n", " with open(md_path + md_file, 'r') as f:\n", " lines = f.readlines()\n", " for line in lines:\n", " if \"audios/\" in line:\n", " audios_in_md += re.findall(r'audios/(.*?).mp3', line)\n", "\n", "# remove duplicates\n", "audios_in_md = list(set(audios_in_md))\n", "# print(len(audios_in_md))\n", "\n", "for audio in audios_in_md:\n", " if not f'{audio}.mp3' in mp3_files:\n", " print(f'{audio.split(\"-\")[0].strip()},')\n", "\n", "# for mp3 in mp3_files:\n", "# if not mp3.replace('.mp3', '') in audios_in_md:\n", "# print(mp3)\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }