Files
everyone-can-use-english/1000-hours/public/jupyter-notebooks/chapter4.ipynb
2024-07-27 13:50:02 +08:00

421 lines
28 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import requests\n",
"import json\n",
"import vlc\n",
"import re\n",
"import random\n",
"from IPython.display import Audio\n",
"\n",
"def load_json_database(url):\n",
" records = []\n",
" try:\n",
" response = requests.get(url)\n",
" response.raise_for_status() # Raise an error for bad status codes\n",
" for line in response.iter_lines(decode_unicode=True):\n",
" if line:\n",
" try:\n",
" record = json.loads(line)\n",
" records.append(record)\n",
" except json.JSONDecodeError as e:\n",
" print(f\"Error parsing JSON: {e}\")\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"Error fetching data from URL: {e}\")\n",
" return records\n",
"\n",
"def search_in_json_database(database, search_word, region):\n",
" for record in database:\n",
" # 检查 word 字段是否匹配\n",
" if record.get('word') == search_word:\n",
" # 找到匹配项后,获取美式发音信息\n",
" pos_items = record.get('pos_items', [])\n",
" for pos_item in pos_items:\n",
" pronunciations = pos_item.get('pronunciations', [])\n",
" for pronunciation in pronunciations:\n",
" if pronunciation.get('region') == region:\n",
" # 找到美式发音,返回相关信息\n",
" return {\n",
" 'pronunciation': pronunciation.get('pronunciation'),\n",
" 'audio': pronunciation.get('audio')\n",
" }\n",
" # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
" return 'not exist'\n",
"\n",
"url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n",
"\n",
"json_database = load_json_database(url)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"之前讲过,非重音音节里的元音可能会被弱化为 schwa <span class=\"pho\">ə</span>…… 在自然语流中,连 schwa <span class=\"pho\">ə</span> 都可能会被进一步弱化,变成非常轻的 <span class=\"pho\">ɤ</span>。比如,常用词 reasonable</span>/ˈriːzənəbəl/</span>,实际听到的常常是</span> /ˈriːzɤnəbəl/</span>……\n",
"\n",
"phonetics_not_exist: \n",
"\n",
"\n"
]
}
],
"source": [
"text =\"\"\"\n",
"之前讲过,非重音音节里的元音可能会被弱化为 schwa `ə`…… 在自然语流中,连 schwa `ə` 都可能会被进一步弱化,变成非常轻的 `ɤ`。比如,常用词 reasonable`/ˈriːzənəbəl/`,实际听到的常常是` /ˈriːzɤnəbəl/`……\n",
"\"\"\"\n",
"\n",
"words = \"\"\n",
"phonetics_not_exist = ''\n",
"lines = text.split(\"\\n\")\n",
"for line in lines:\n",
" line = line.replace(\" `\", \" <span class=\\\"pho\\\">\")\n",
" line = line.replace(\"`\", \"</span>\")\n",
" if '*' in line:\n",
" line = line.replace('\"', \"**\")\n",
" examples = line.split(\"-\")[1].split(\",\")\n",
" examples = [x.strip() for x in examples]\n",
" line = line.replace(\" - \", \"\\n\")\n",
" # print(examples)\n",
" # wrap examples in span,\n",
" for e in examples:\n",
" # join e in words with ','\n",
" words += e + \",\"\n",
" entry_us = search_in_json_database(json_database, e, 'us')\n",
" if entry_us == 'not exist':\n",
" phonetics = entry_us\n",
" phonetics_not_exist += f'{e},'\n",
" else:\n",
" phonetics = entry_us['pronunciation'] \n",
" wrapped_e = f'\\t- {e} <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{e}-us-male.mp3\" data-audio-us-female=\"/audios/us/{e}-us-female.mp3\"></span>\\n'\n",
" line = line.replace(e, wrapped_e).replace(',', '').strip()\n",
" \n",
"\n",
" print(line)\n",
"print(f'phonetics_not_exist: {phonetics_not_exist}')\n",
"print('\\n'+words)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['reasonable']\n",
"reasonable\n",
"Files created!\n"
]
}
],
"source": [
"import asyncio\n",
"import edge_tts\n",
"import pygame\n",
"text = words.rstrip(\",\")\n",
"Wordlist = text.split(\",\")\n",
"# Wordlist = ['reasonable']\n",
"\n",
"print(Wordlist)\n",
"for w in Wordlist:\n",
" # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n",
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
" w = w.strip()\n",
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
" OUTPUT_FILE = f\"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
" communicate = edge_tts.Communicate(w, VOICE)\n",
" await communicate.save(OUTPUT_FILE) \n",
" print(w)\n",
"print(\"Files created!\") "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"举个例子,*ichthyosaur*,这个一看就知道并非常用的词汇,其实很简单,先从表音构成去看,<span class=\"pho alt\">ˈɪk.θi.ə.sɔːr</span> —— 剑桥词典把它划分成了 4 个音节…… 但感觉上,第二第三个音节可以合并,<span class=\"pho alt\">ˈɪk.θiə.sɔːr</span>*ich* ⭤ <span class=\"pho alt\">ˈɪk</span>, *thyo* ⭤ <span class=\"pho alt\">θiə</span>, *saur* ⭤ <span class=\"pho alt\">sɔːr</span>…… 而从表意的角度去看呢?前半部 *ichthyo-* 的意思是 “与鱼有关的”…… 后半部 *-saur* 是什么意思呢?各种恐龙的 “龙” 都是 -saur 结尾,于是,这个词的意思是 “鱼龙”…… 换言之,这个单词的两个部分,都是拉丁词根词缀,也都是 “既表音又表意” 的,事实上很简单 —— 虽然拼写乍看起来很复杂。\n",
"\n",
"Audio files for ichthyosaur created!\n",
"\n",
"举个例子,*ichthyosaur* <span class=\"pho alt\">ˈɪk.θi.ə.sɔːr</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/ichthyosaur-us-male.mp3\" data-audio-us-female=\"/audios/us/ichthyosaur-us-female.mp3\"></span>\n",
",这个一看就知道并非常用的词汇,其实很简单,先从表音构成去看,<span class=\"pho alt\">ˈɪk.θi.ə.sɔːr</span> —— 剑桥词典把它划分成了 4 个音节…… 但感觉上,第二第三个音节可以合并,<span class=\"pho alt\">ˈɪk.θiə.sɔːr</span>*ich* ⭤ <span class=\"pho alt\">ˈɪk</span>, *thyo* ⭤ <span class=\"pho alt\">θiə</span>, *saur* ⭤ <span class=\"pho alt\">sɔːr</span>…… 而从表意的角度去看呢?前半部 *ichthyo-* 的意思是 “与鱼有关的”…… 后半部 *-saur* 是什么意思呢?各种恐龙的 “龙” 都是 -saur 结尾,于是,这个词的意思是 “鱼龙”…… 换言之,这个单词的两个部分,都是拉丁词根词缀,也都是 “既表音又表意” 的,事实上很简单 —— 虽然拼写乍看起来很复杂。\n",
"\n",
"Text copied to clipboard!\n"
]
}
],
"source": [
"text =\"\"\"\n",
"举个例子,*ichthyosaur*,这个一看就知道并非常用的词汇,其实很简单,先从表音构成去看,`/ˈɪk.θi.ə.sɔːr/` —— 剑桥词典把它划分成了 4 个音节…… 但感觉上,第二第三个音节可以合并,`/ˈɪk.θiə.sɔːr/`*ich* ⭤ `/ˈɪk/`, *thyo* ⭤ `/θiə/`, *saur* ⭤ `/sɔːr/`…… 而从表意的角度去看呢?前半部 *ichthyo-* 的意思是 “与鱼有关的”…… 后半部 *-saur* 是什么意思呢?各种恐龙的 “龙” 都是 -saur 结尾,于是,这个词的意思是 “鱼龙”…… 换言之,这个单词的两个部分,都是拉丁词根词缀,也都是 “既表音又表意” 的,事实上很简单 —— 虽然拼写乍看起来很复杂。\n",
"\"\"\"\n",
"\n",
"sound_files = \"ichthyosaur\".split(\",\")\n",
"# regex, replace `...` with <span class=\"pho\">...</span>\n",
"import re\n",
"text = text.replace(\"/\", \"\")\n",
"text = re.sub(r'`([^`]+)`', r'<span class=\"pho alt\">\\1</span>', text)\n",
"print(text)\n",
"\n",
"# get sound files\n",
"for s in sound_files:\n",
" w = s.strip()\n",
" text = text.replace(f\"*{w}*\", w)\n",
" # get the audio file\n",
"\n",
" import asyncio\n",
" import edge_tts\n",
" import pygame\n",
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
" OUTPUT_FILE = f\"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
" communicate = edge_tts.Communicate(w, VOICE)\n",
" await communicate.save(OUTPUT_FILE) \n",
" print(f\"Audio files for {w} created!\") \n",
"\n",
" entry_us = search_in_json_database(json_database, w, 'us')\n",
" if entry_us == 'not exist':\n",
" phonetics = entry_us\n",
" else:\n",
" phonetics = entry_us['pronunciation'] \n",
" wrapped_w = f'*{w}* <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{w}-us-male.mp3\" data-audio-us-female=\"/audios/us/{w}-us-female.mp3\"></span>\\n'\n",
" text = text.replace(w, wrapped_w)\n",
" # get phonetics for the word\n",
"\n",
"print(text)\n",
"\n",
"# send text to clipboard\n",
"import pyperclip\n",
"pyperclip.copy(text.replace(\"\\n\", \"\"))\n",
"print(\"Text copied to clipboard!\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pyperclip\n",
" Downloading pyperclip-1.9.0.tar.gz (20 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25hBuilding wheels for collected packages: pyperclip\n",
" Building wheel for pyperclip (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for pyperclip: filename=pyperclip-1.9.0-py3-none-any.whl size=11002 sha256=b07922d96d27e2cc0dce4f31dd18f85c90c5f4b9298ca359a6ad9a13494461d6\n",
" Stored in directory: /Users/joker/Library/Caches/pip/wheels/e0/e8/fc/8ab8aa326e33bc066ccd5f3ca9646eab4299881af933f94f09\n",
"Successfully built pyperclip\n",
"Installing collected packages: pyperclip\n",
"Successfully installed pyperclip-1.9.0\n"
]
}
],
"source": [
"!pip install pyperclip"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1. **airplane** <span class=\"pho alt\">ˈer.pleɪn</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/airplane-us-male.mp3\" data-audio-us-female=\"/audios/us/airplane-us-female.mp3\"></span>\n",
"2. **airport** <span class=\"pho alt\">ˈer.pɔːrt</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/airport-us-male.mp3\" data-audio-us-female=\"/audios/us/airport-us-female.mp3\"></span>\n",
"3. **backyard** <span class=\"pho alt\">ˌbækˈjɑːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/backyard-us-male.mp3\" data-audio-us-female=\"/audios/us/backyard-us-female.mp3\"></span>\n",
"4. **bedroom** <span class=\"pho alt\">ˈbed.ruːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/bedroom-us-male.mp3\" data-audio-us-female=\"/audios/us/bedroom-us-female.mp3\"></span>\n",
"5. **birthday** <span class=\"pho alt\">ˈːθ.deɪ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/birthday-us-male.mp3\" data-audio-us-female=\"/audios/us/birthday-us-female.mp3\"></span>\n",
"6. **blackboard** <span class=\"pho alt\">ˈblæk.bɔːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/blackboard-us-male.mp3\" data-audio-us-female=\"/audios/us/blackboard-us-female.mp3\"></span>\n",
"7. **bookstore** <span class=\"pho alt\">ˈbʊk.stɔːr</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/bookstore-us-male.mp3\" data-audio-us-female=\"/audios/us/bookstore-us-female.mp3\"></span>\n",
"8. **brainstorm** <span class=\"pho alt\">ˈbreɪn.stɔːrm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/brainstorm-us-male.mp3\" data-audio-us-female=\"/audios/us/brainstorm-us-female.mp3\"></span>\n",
"9. **breakfast** <span class=\"pho alt\">ˈbrek.fəst</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/breakfast-us-male.mp3\" data-audio-us-female=\"/audios/us/breakfast-us-female.mp3\"></span>\n",
"10. **classroom** <span class=\"pho alt\">ˈklæs.ruːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/classroom-us-male.mp3\" data-audio-us-female=\"/audios/us/classroom-us-female.mp3\"></span>\n",
"11. **cupcake** <span class=\"pho alt\">ˈkʌp.keɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/cupcake-us-male.mp3\" data-audio-us-female=\"/audios/us/cupcake-us-female.mp3\"></span>\n",
"12. **daydream** <span class=\"pho alt\">ˈdeɪ.driːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/daydream-us-male.mp3\" data-audio-us-female=\"/audios/us/daydream-us-female.mp3\"></span>\n",
"13. **dishwasher** <span class=\"pho alt\">ˈdɪʃˌwɑː.ʃɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/dishwasher-us-male.mp3\" data-audio-us-female=\"/audios/us/dishwasher-us-female.mp3\"></span>\n",
"14. **doorbell** <span class=\"pho alt\">ˈːr.bel</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/doorbell-us-male.mp3\" data-audio-us-female=\"/audios/us/doorbell-us-female.mp3\"></span>\n",
"15. **downtown** <span class=\"pho alt\">ˌdaʊnˈtaʊn</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/downtown-us-male.mp3\" data-audio-us-female=\"/audios/us/downtown-us-female.mp3\"></span>\n",
"16. **earthquake** <span class=\"pho alt\">ˈɝːθ.kweɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/earthquake-us-male.mp3\" data-audio-us-female=\"/audios/us/earthquake-us-female.mp3\"></span>\n",
"17. **everyday** <span class=\"pho alt\">ˈev.ri.deɪ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/everyday-us-male.mp3\" data-audio-us-female=\"/audios/us/everyday-us-female.mp3\"></span>\n",
"18. **eyewitness** <span class=\"pho alt\">ˈaɪˌwɪt.nəs</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/eyewitness-us-male.mp3\" data-audio-us-female=\"/audios/us/eyewitness-us-female.mp3\"></span>\n",
"19. **firefighter** <span class=\"pho alt\">ˈfaɪrˌfaɪ.t̬ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/firefighter-us-male.mp3\" data-audio-us-female=\"/audios/us/firefighter-us-female.mp3\"></span>\n",
"20. **football** <span class=\"pho alt\">ˈfʊt.bɑːl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/football-us-male.mp3\" data-audio-us-female=\"/audios/us/football-us-female.mp3\"></span>\n",
"21. **greenhouse** <span class=\"pho alt\">ˈɡriːn.haʊs</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/greenhouse-us-male.mp3\" data-audio-us-female=\"/audios/us/greenhouse-us-female.mp3\"></span>\n",
"22. **handwriting** <span class=\"pho alt\">ˈhændˌraɪ.t̬ɪŋ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/handwriting-us-male.mp3\" data-audio-us-female=\"/audios/us/handwriting-us-female.mp3\"></span>\n",
"23. **headache** <span class=\"pho alt\">ˈhed.eɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/headache-us-male.mp3\" data-audio-us-female=\"/audios/us/headache-us-female.mp3\"></span>\n",
"24. **highway** <span class=\"pho alt\">ˈhaɪ.weɪ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/highway-us-male.mp3\" data-audio-us-female=\"/audios/us/highway-us-female.mp3\"></span>\n",
"25. **homework** <span class=\"pho alt\">ˈhoʊm.wɝːk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/homework-us-male.mp3\" data-audio-us-female=\"/audios/us/homework-us-female.mp3\"></span>\n",
"26. **iceberg** <span class=\"pho alt\">ˈaɪs.bɝːɡ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/iceberg-us-male.mp3\" data-audio-us-female=\"/audios/us/iceberg-us-female.mp3\"></span>\n",
"27. **jellyfish** <span class=\"pho alt\">ˈdʒel.i.fɪʃ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/jellyfish-us-male.mp3\" data-audio-us-female=\"/audios/us/jellyfish-us-female.mp3\"></span>\n",
"28. **laptop** <span class=\"pho alt\">ˈlæp.tɑːp</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/laptop-us-male.mp3\" data-audio-us-female=\"/audios/us/laptop-us-female.mp3\"></span>\n",
"29. **lighthouse** <span class=\"pho alt\">ˈlaɪt.haʊs</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/lighthouse-us-male.mp3\" data-audio-us-female=\"/audios/us/lighthouse-us-female.mp3\"></span>\n",
"30. **mailbox** <span class=\"pho alt\">ˈmeɪl.bɑːks</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/mailbox-us-male.mp3\" data-audio-us-female=\"/audios/us/mailbox-us-female.mp3\"></span>\n",
"31. **moonlight** <span class=\"pho alt\">ˈmuːn.laɪt</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/moonlight-us-male.mp3\" data-audio-us-female=\"/audios/us/moonlight-us-female.mp3\"></span>\n",
"32. **notebook** <span class=\"pho alt\">ˈnoʊt.bʊk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/notebook-us-male.mp3\" data-audio-us-female=\"/audios/us/notebook-us-female.mp3\"></span>\n",
"33. **nobody** <span class=\"pho alt\">ˈnoʊ.bɑː.di</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/nobody-us-male.mp3\" data-audio-us-female=\"/audios/us/nobody-us-female.mp3\"></span>\n",
"34. **pancake** <span class=\"pho alt\">ˈpæn.keɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/pancake-us-male.mp3\" data-audio-us-female=\"/audios/us/pancake-us-female.mp3\"></span>\n",
"35. **postcard** <span class=\"pho alt\">ˈpoʊst.kɑːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/postcard-us-male.mp3\" data-audio-us-female=\"/audios/us/postcard-us-female.mp3\"></span>\n",
"36. **rainbow** <span class=\"pho alt\">ˈreɪn.boʊ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/rainbow-us-male.mp3\" data-audio-us-female=\"/audios/us/rainbow-us-female.mp3\"></span>\n",
"37. **sailboat** <span class=\"pho alt\">ˈseɪl.boʊt</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sailboat-us-male.mp3\" data-audio-us-female=\"/audios/us/sailboat-us-female.mp3\"></span>\n",
"38. **sandbox** <span class=\"pho alt\">ˈsænd.bɑːks</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sandbox-us-male.mp3\" data-audio-us-female=\"/audios/us/sandbox-us-female.mp3\"></span>\n",
"39. **seashore** <span class=\"pho alt\">ˈsiː.ʃɔːr</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/seashore-us-male.mp3\" data-audio-us-female=\"/audios/us/seashore-us-female.mp3\"></span>\n",
"40. **skateboard** <span class=\"pho alt\">ˈskeɪt.bɔːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/skateboard-us-male.mp3\" data-audio-us-female=\"/audios/us/skateboard-us-female.mp3\"></span>\n",
"41. **snowflake** <span class=\"pho alt\">ˈsnoʊ.fleɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/snowflake-us-male.mp3\" data-audio-us-female=\"/audios/us/snowflake-us-female.mp3\"></span>\n",
"42. **spaceship** <span class=\"pho alt\">ˈspeɪs.ʃɪp</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/spaceship-us-male.mp3\" data-audio-us-female=\"/audios/us/spaceship-us-female.mp3\"></span>\n",
"43. **sunflower** <span class=\"pho alt\">ˈsʌnˌflaʊ.ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sunflower-us-male.mp3\" data-audio-us-female=\"/audios/us/sunflower-us-female.mp3\"></span>\n",
"44. **sunshine** <span class=\"pho alt\">ˈsʌn.ʃaɪn</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sunshine-us-male.mp3\" data-audio-us-female=\"/audios/us/sunshine-us-female.mp3\"></span>\n",
"45. **superhero** <span class=\"pho alt\">ˈsuː.pɚˌhɪr.oʊ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/superhero-us-male.mp3\" data-audio-us-female=\"/audios/us/superhero-us-female.mp3\"></span>\n",
"46. **tablecloth** <span class=\"pho alt\">ˈteɪ.bəl.klɑːθ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/tablecloth-us-male.mp3\" data-audio-us-female=\"/audios/us/tablecloth-us-female.mp3\"></span>\n",
"47. **toothbrush** <span class=\"pho alt\">ˈtuːθ.brʌʃ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/toothbrush-us-male.mp3\" data-audio-us-female=\"/audios/us/toothbrush-us-female.mp3\"></span>\n",
"48. **toothpaste** <span class=\"pho alt\">ˈtuːθ.peɪst</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/toothpaste-us-male.mp3\" data-audio-us-female=\"/audios/us/toothpaste-us-female.mp3\"></span>\n",
"49. **typewriter** <span class=\"pho alt\">ˈtaɪpˌraɪ.t̬ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/typewriter-us-male.mp3\" data-audio-us-female=\"/audios/us/typewriter-us-female.mp3\"></span>\n",
"50. **underwater** <span class=\"pho alt\">ˌʌn.dɚˈwɑː.t̬ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/underwater-us-male.mp3\" data-audio-us-female=\"/audios/us/underwater-us-female.mp3\"></span>\n",
"51. **upstairs** <span class=\"pho alt\">ʌpˈsterz</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/upstairs-us-male.mp3\" data-audio-us-female=\"/audios/us/upstairs-us-female.mp3\"></span>\n",
"52. **volleyball** <span class=\"pho alt\">ˈvɑː.li.bɑːl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/volleyball-us-male.mp3\" data-audio-us-female=\"/audios/us/volleyball-us-female.mp3\"></span>\n",
"53. **waterfall** <span class=\"pho alt\">ˈwɑː.t̬ɚ.fɑːl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/waterfall-us-male.mp3\" data-audio-us-female=\"/audios/us/waterfall-us-female.mp3\"></span>\n",
"54. **watermelon** <span class=\"pho alt\">ˈwɑː.t̬ɚˌmel.ən</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/watermelon-us-male.mp3\" data-audio-us-female=\"/audios/us/watermelon-us-female.mp3\"></span>\n",
"55. **weekend** <span class=\"pho alt\">ˈwiːk.end</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/weekend-us-male.mp3\" data-audio-us-female=\"/audios/us/weekend-us-female.mp3\"></span>\n",
"56. **wheelchair** <span class=\"pho alt\">ˈwiːl.tʃer</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/wheelchair-us-male.mp3\" data-audio-us-female=\"/audios/us/wheelchair-us-female.mp3\"></span>\n",
"57. **windmill** <span class=\"pho alt\">ˈwɪnd.mɪl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/windmill-us-male.mp3\" data-audio-us-female=\"/audios/us/windmill-us-female.mp3\"></span>\n",
"58. **workshop** <span class=\"pho alt\">ˈːk.ʃɑːp</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/workshop-us-male.mp3\" data-audio-us-female=\"/audios/us/workshop-us-female.mp3\"></span>\n"
]
}
],
"source": [
"list = \"\"\"\n",
"1. **airplane**\n",
"2. **airport**\n",
"3. **backyard**\n",
"4. **bedroom**\n",
"5. **birthday**\n",
"6. **blackboard**\n",
"7. **bookstore**\n",
"8. **brainstorm**\n",
"9. **breakfast**\n",
"10. **classroom**\n",
"11. **cupcake**\n",
"12. **daydream**\n",
"13. **dishwasher**\n",
"14. **doorbell**\n",
"15. **downtown**\n",
"16. **earthquake**\n",
"17. **everyday**\n",
"18. **eyewitness**\n",
"19. **firefighter**\n",
"20. **football**\n",
"21. **greenhouse**\n",
"22. **handwriting**\n",
"23. **headache**\n",
"24. **highway**\n",
"25. **homework**\n",
"26. **iceberg**\n",
"27. **jellyfish**\n",
"28. **laptop**\n",
"29. **lighthouse**\n",
"30. **mailbox**\n",
"31. **moonlight**\n",
"32. **notebook**\n",
"33. **nobody**\n",
"34. **pancake**\n",
"35. **postcard**\n",
"36. **rainbow**\n",
"37. **sailboat**\n",
"38. **sandbox**\n",
"39. **seashore**\n",
"40. **skateboard**\n",
"41. **snowflake**\n",
"42. **spaceship**\n",
"43. **sunflower**\n",
"44. **sunshine**\n",
"45. **superhero**\n",
"46. **tablecloth**\n",
"47. **toothbrush**\n",
"48. **toothpaste**\n",
"49. **typewriter**\n",
"50. **underwater**\n",
"51. **upstairs**\n",
"52. **volleyball**\n",
"53. **waterfall**\n",
"54. **watermelon**\n",
"55. **weekend**\n",
"56. **wheelchair**\n",
"57. **windmill**\n",
"58. **workshop**\n",
"\"\"\"\n",
"\n",
"lines = list.split(\"\\n\")\n",
"for l in lines:\n",
" if l.strip() == \"\":\n",
" continue\n",
" # extract str between ** and **\n",
" word = re.search(r'\\*\\*(.*)\\*\\*', l).group(1)\n",
"\n",
" import asyncio\n",
" import edge_tts\n",
" import pygame\n",
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
" OUTPUT_FILE = f\"{word}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
" communicate = edge_tts.Communicate(word, VOICE)\n",
" await communicate.save(OUTPUT_FILE) \n",
" # print(f\"Audio files for {word} created!\") \n",
"\n",
" entry_us = search_in_json_database(json_database, word, 'us')\n",
" if entry_us == 'not exist':\n",
" phonetics = entry_us\n",
" else:\n",
" phonetics = entry_us['pronunciation'] \n",
" wrapped_p = f' <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{word}-us-male.mp3\" data-audio-us-female=\"/audios/us/{word}-us-female.mp3\"></span>'\n",
" l += wrapped_p\n",
"\n",
" print(l)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}