421 lines
28 KiB
Plaintext
421 lines
28 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import sys\n",
|
||
"import requests\n",
|
||
"import json\n",
|
||
"import vlc\n",
|
||
"import re\n",
|
||
"import random\n",
|
||
"from IPython.display import Audio\n",
|
||
"\n",
|
||
"def load_json_database(url):\n",
|
||
" records = []\n",
|
||
" try:\n",
|
||
" response = requests.get(url)\n",
|
||
" response.raise_for_status() # Raise an error for bad status codes\n",
|
||
" for line in response.iter_lines(decode_unicode=True):\n",
|
||
" if line:\n",
|
||
" try:\n",
|
||
" record = json.loads(line)\n",
|
||
" records.append(record)\n",
|
||
" except json.JSONDecodeError as e:\n",
|
||
" print(f\"Error parsing JSON: {e}\")\n",
|
||
" except requests.exceptions.RequestException as e:\n",
|
||
" print(f\"Error fetching data from URL: {e}\")\n",
|
||
" return records\n",
|
||
"\n",
|
||
"def search_in_json_database(database, search_word, region):\n",
|
||
" for record in database:\n",
|
||
" # 检查 word 字段是否匹配\n",
|
||
" if record.get('word') == search_word:\n",
|
||
" # 找到匹配项后,获取美式发音信息\n",
|
||
" pos_items = record.get('pos_items', [])\n",
|
||
" for pos_item in pos_items:\n",
|
||
" pronunciations = pos_item.get('pronunciations', [])\n",
|
||
" for pronunciation in pronunciations:\n",
|
||
" if pronunciation.get('region') == region:\n",
|
||
" # 找到美式发音,返回相关信息\n",
|
||
" return {\n",
|
||
" 'pronunciation': pronunciation.get('pronunciation'),\n",
|
||
" 'audio': pronunciation.get('audio')\n",
|
||
" }\n",
|
||
" # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
|
||
" return 'not exist'\n",
|
||
"\n",
|
||
"url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n",
|
||
"\n",
|
||
"json_database = load_json_database(url)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
"之前讲过,非重音音节里的元音可能会被弱化为 schwa <span class=\"pho\">ə</span>…… 在自然语流中,连 schwa <span class=\"pho\">ə</span> 都可能会被进一步弱化,变成非常轻的 <span class=\"pho\">ɤ</span>。比如,常用词 reasonable,</span>/ˈriːzənəbəl/</span>,实际听到的常常是</span> /ˈriːzɤnəbəl/</span>……\n",
|
||
"\n",
|
||
"phonetics_not_exist: \n",
|
||
"\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"text =\"\"\"\n",
|
||
"之前讲过,非重音音节里的元音可能会被弱化为 schwa `ə`…… 在自然语流中,连 schwa `ə` 都可能会被进一步弱化,变成非常轻的 `ɤ`。比如,常用词 reasonable,`/ˈriːzənəbəl/`,实际听到的常常是` /ˈriːzɤnəbəl/`……\n",
|
||
"\"\"\"\n",
|
||
"\n",
|
||
"words = \"\"\n",
|
||
"phonetics_not_exist = ''\n",
|
||
"lines = text.split(\"\\n\")\n",
|
||
"for line in lines:\n",
|
||
" line = line.replace(\" `\", \" <span class=\\\"pho\\\">\")\n",
|
||
" line = line.replace(\"`\", \"</span>\")\n",
|
||
" if '*' in line:\n",
|
||
" line = line.replace('\"', \"**\")\n",
|
||
" examples = line.split(\"-\")[1].split(\",\")\n",
|
||
" examples = [x.strip() for x in examples]\n",
|
||
" line = line.replace(\" - \", \"\\n\")\n",
|
||
" # print(examples)\n",
|
||
" # wrap examples in span,\n",
|
||
" for e in examples:\n",
|
||
" # join e in words with ','\n",
|
||
" words += e + \",\"\n",
|
||
" entry_us = search_in_json_database(json_database, e, 'us')\n",
|
||
" if entry_us == 'not exist':\n",
|
||
" phonetics = entry_us\n",
|
||
" phonetics_not_exist += f'{e},'\n",
|
||
" else:\n",
|
||
" phonetics = entry_us['pronunciation'] \n",
|
||
" wrapped_e = f'\\t- {e} <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{e}-us-male.mp3\" data-audio-us-female=\"/audios/us/{e}-us-female.mp3\"></span>\\n'\n",
|
||
" line = line.replace(e, wrapped_e).replace(',', '').strip()\n",
|
||
" \n",
|
||
"\n",
|
||
" print(line)\n",
|
||
"print(f'phonetics_not_exist: {phonetics_not_exist}')\n",
|
||
"print('\\n'+words)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"['reasonable']\n",
|
||
"reasonable\n",
|
||
"Files created!\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import asyncio\n",
|
||
"import edge_tts\n",
|
||
"import pygame\n",
|
||
"text = words.rstrip(\",\")\n",
|
||
"Wordlist = text.split(\",\")\n",
|
||
"# Wordlist = ['reasonable']\n",
|
||
"\n",
|
||
"print(Wordlist)\n",
|
||
"for w in Wordlist:\n",
|
||
" # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n",
|
||
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
|
||
" w = w.strip()\n",
|
||
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
|
||
" OUTPUT_FILE = f\"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
|
||
" communicate = edge_tts.Communicate(w, VOICE)\n",
|
||
" await communicate.save(OUTPUT_FILE) \n",
|
||
" print(w)\n",
|
||
"print(\"Files created!\") "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
"举个例子,*ichthyosaur*,这个一看就知道并非常用的词汇,其实很简单,先从表音构成去看,<span class=\"pho alt\">ˈɪk.θi.ə.sɔːr</span> —— 剑桥词典把它划分成了 4 个音节…… 但感觉上,第二第三个音节可以合并,<span class=\"pho alt\">ˈɪk.θiə.sɔːr</span>,*ich* ⭤ <span class=\"pho alt\">ˈɪk</span>, *thyo* ⭤ <span class=\"pho alt\">θiə</span>, *saur* ⭤ <span class=\"pho alt\">sɔːr</span>…… 而从表意的角度去看呢?前半部 *ichthyo-* 的意思是 “与鱼有关的”…… 后半部 *-saur* 是什么意思呢?各种恐龙的 “龙” 都是 -saur 结尾,于是,这个词的意思是 “鱼龙”…… 换言之,这个单词的两个部分,都是拉丁词根词缀,也都是 “既表音又表意” 的,事实上很简单 —— 虽然拼写乍看起来很复杂。\n",
|
||
"\n",
|
||
"Audio files for ichthyosaur created!\n",
|
||
"\n",
|
||
"举个例子,*ichthyosaur* <span class=\"pho alt\">ˈɪk.θi.ə.sɔːr</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/ichthyosaur-us-male.mp3\" data-audio-us-female=\"/audios/us/ichthyosaur-us-female.mp3\"></span>\n",
|
||
",这个一看就知道并非常用的词汇,其实很简单,先从表音构成去看,<span class=\"pho alt\">ˈɪk.θi.ə.sɔːr</span> —— 剑桥词典把它划分成了 4 个音节…… 但感觉上,第二第三个音节可以合并,<span class=\"pho alt\">ˈɪk.θiə.sɔːr</span>,*ich* ⭤ <span class=\"pho alt\">ˈɪk</span>, *thyo* ⭤ <span class=\"pho alt\">θiə</span>, *saur* ⭤ <span class=\"pho alt\">sɔːr</span>…… 而从表意的角度去看呢?前半部 *ichthyo-* 的意思是 “与鱼有关的”…… 后半部 *-saur* 是什么意思呢?各种恐龙的 “龙” 都是 -saur 结尾,于是,这个词的意思是 “鱼龙”…… 换言之,这个单词的两个部分,都是拉丁词根词缀,也都是 “既表音又表意” 的,事实上很简单 —— 虽然拼写乍看起来很复杂。\n",
|
||
"\n",
|
||
"Text copied to clipboard!\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"text =\"\"\"\n",
|
||
"举个例子,*ichthyosaur*,这个一看就知道并非常用的词汇,其实很简单,先从表音构成去看,`/ˈɪk.θi.ə.sɔːr/` —— 剑桥词典把它划分成了 4 个音节…… 但感觉上,第二第三个音节可以合并,`/ˈɪk.θiə.sɔːr/`,*ich* ⭤ `/ˈɪk/`, *thyo* ⭤ `/θiə/`, *saur* ⭤ `/sɔːr/`…… 而从表意的角度去看呢?前半部 *ichthyo-* 的意思是 “与鱼有关的”…… 后半部 *-saur* 是什么意思呢?各种恐龙的 “龙” 都是 -saur 结尾,于是,这个词的意思是 “鱼龙”…… 换言之,这个单词的两个部分,都是拉丁词根词缀,也都是 “既表音又表意” 的,事实上很简单 —— 虽然拼写乍看起来很复杂。\n",
|
||
"\"\"\"\n",
|
||
"\n",
|
||
"sound_files = \"ichthyosaur\".split(\",\")\n",
|
||
"# regex, replace `...` with <span class=\"pho\">...</span>\n",
|
||
"import re\n",
|
||
"text = text.replace(\"/\", \"\")\n",
|
||
"text = re.sub(r'`([^`]+)`', r'<span class=\"pho alt\">\\1</span>', text)\n",
|
||
"print(text)\n",
|
||
"\n",
|
||
"# get sound files\n",
|
||
"for s in sound_files:\n",
|
||
" w = s.strip()\n",
|
||
" text = text.replace(f\"*{w}*\", w)\n",
|
||
" # get the audio file\n",
|
||
"\n",
|
||
" import asyncio\n",
|
||
" import edge_tts\n",
|
||
" import pygame\n",
|
||
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
|
||
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
|
||
" OUTPUT_FILE = f\"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
|
||
" communicate = edge_tts.Communicate(w, VOICE)\n",
|
||
" await communicate.save(OUTPUT_FILE) \n",
|
||
" print(f\"Audio files for {w} created!\") \n",
|
||
"\n",
|
||
" entry_us = search_in_json_database(json_database, w, 'us')\n",
|
||
" if entry_us == 'not exist':\n",
|
||
" phonetics = entry_us\n",
|
||
" else:\n",
|
||
" phonetics = entry_us['pronunciation'] \n",
|
||
" wrapped_w = f'*{w}* <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{w}-us-male.mp3\" data-audio-us-female=\"/audios/us/{w}-us-female.mp3\"></span>\\n'\n",
|
||
" text = text.replace(w, wrapped_w)\n",
|
||
" # get phonetics for the word\n",
|
||
"\n",
|
||
"print(text)\n",
|
||
"\n",
|
||
"# send text to clipboard\n",
|
||
"import pyperclip\n",
|
||
"pyperclip.copy(text.replace(\"\\n\", \"\"))\n",
|
||
"print(\"Text copied to clipboard!\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Collecting pyperclip\n",
|
||
" Downloading pyperclip-1.9.0.tar.gz (20 kB)\n",
|
||
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
||
"\u001b[?25hBuilding wheels for collected packages: pyperclip\n",
|
||
" Building wheel for pyperclip (setup.py) ... \u001b[?25ldone\n",
|
||
"\u001b[?25h Created wheel for pyperclip: filename=pyperclip-1.9.0-py3-none-any.whl size=11002 sha256=b07922d96d27e2cc0dce4f31dd18f85c90c5f4b9298ca359a6ad9a13494461d6\n",
|
||
" Stored in directory: /Users/joker/Library/Caches/pip/wheels/e0/e8/fc/8ab8aa326e33bc066ccd5f3ca9646eab4299881af933f94f09\n",
|
||
"Successfully built pyperclip\n",
|
||
"Installing collected packages: pyperclip\n",
|
||
"Successfully installed pyperclip-1.9.0\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install pyperclip"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"1. **airplane** <span class=\"pho alt\">ˈer.pleɪn</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/airplane-us-male.mp3\" data-audio-us-female=\"/audios/us/airplane-us-female.mp3\"></span>\n",
|
||
"2. **airport** <span class=\"pho alt\">ˈer.pɔːrt</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/airport-us-male.mp3\" data-audio-us-female=\"/audios/us/airport-us-female.mp3\"></span>\n",
|
||
"3. **backyard** <span class=\"pho alt\">ˌbækˈjɑːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/backyard-us-male.mp3\" data-audio-us-female=\"/audios/us/backyard-us-female.mp3\"></span>\n",
|
||
"4. **bedroom** <span class=\"pho alt\">ˈbed.ruːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/bedroom-us-male.mp3\" data-audio-us-female=\"/audios/us/bedroom-us-female.mp3\"></span>\n",
|
||
"5. **birthday** <span class=\"pho alt\">ˈbɝːθ.deɪ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/birthday-us-male.mp3\" data-audio-us-female=\"/audios/us/birthday-us-female.mp3\"></span>\n",
|
||
"6. **blackboard** <span class=\"pho alt\">ˈblæk.bɔːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/blackboard-us-male.mp3\" data-audio-us-female=\"/audios/us/blackboard-us-female.mp3\"></span>\n",
|
||
"7. **bookstore** <span class=\"pho alt\">ˈbʊk.stɔːr</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/bookstore-us-male.mp3\" data-audio-us-female=\"/audios/us/bookstore-us-female.mp3\"></span>\n",
|
||
"8. **brainstorm** <span class=\"pho alt\">ˈbreɪn.stɔːrm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/brainstorm-us-male.mp3\" data-audio-us-female=\"/audios/us/brainstorm-us-female.mp3\"></span>\n",
|
||
"9. **breakfast** <span class=\"pho alt\">ˈbrek.fəst</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/breakfast-us-male.mp3\" data-audio-us-female=\"/audios/us/breakfast-us-female.mp3\"></span>\n",
|
||
"10. **classroom** <span class=\"pho alt\">ˈklæs.ruːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/classroom-us-male.mp3\" data-audio-us-female=\"/audios/us/classroom-us-female.mp3\"></span>\n",
|
||
"11. **cupcake** <span class=\"pho alt\">ˈkʌp.keɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/cupcake-us-male.mp3\" data-audio-us-female=\"/audios/us/cupcake-us-female.mp3\"></span>\n",
|
||
"12. **daydream** <span class=\"pho alt\">ˈdeɪ.driːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/daydream-us-male.mp3\" data-audio-us-female=\"/audios/us/daydream-us-female.mp3\"></span>\n",
|
||
"13. **dishwasher** <span class=\"pho alt\">ˈdɪʃˌwɑː.ʃɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/dishwasher-us-male.mp3\" data-audio-us-female=\"/audios/us/dishwasher-us-female.mp3\"></span>\n",
|
||
"14. **doorbell** <span class=\"pho alt\">ˈdɔːr.bel</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/doorbell-us-male.mp3\" data-audio-us-female=\"/audios/us/doorbell-us-female.mp3\"></span>\n",
|
||
"15. **downtown** <span class=\"pho alt\">ˌdaʊnˈtaʊn</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/downtown-us-male.mp3\" data-audio-us-female=\"/audios/us/downtown-us-female.mp3\"></span>\n",
|
||
"16. **earthquake** <span class=\"pho alt\">ˈɝːθ.kweɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/earthquake-us-male.mp3\" data-audio-us-female=\"/audios/us/earthquake-us-female.mp3\"></span>\n",
|
||
"17. **everyday** <span class=\"pho alt\">ˈev.ri.deɪ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/everyday-us-male.mp3\" data-audio-us-female=\"/audios/us/everyday-us-female.mp3\"></span>\n",
|
||
"18. **eyewitness** <span class=\"pho alt\">ˈaɪˌwɪt.nəs</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/eyewitness-us-male.mp3\" data-audio-us-female=\"/audios/us/eyewitness-us-female.mp3\"></span>\n",
|
||
"19. **firefighter** <span class=\"pho alt\">ˈfaɪrˌfaɪ.t̬ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/firefighter-us-male.mp3\" data-audio-us-female=\"/audios/us/firefighter-us-female.mp3\"></span>\n",
|
||
"20. **football** <span class=\"pho alt\">ˈfʊt.bɑːl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/football-us-male.mp3\" data-audio-us-female=\"/audios/us/football-us-female.mp3\"></span>\n",
|
||
"21. **greenhouse** <span class=\"pho alt\">ˈɡriːn.haʊs</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/greenhouse-us-male.mp3\" data-audio-us-female=\"/audios/us/greenhouse-us-female.mp3\"></span>\n",
|
||
"22. **handwriting** <span class=\"pho alt\">ˈhændˌraɪ.t̬ɪŋ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/handwriting-us-male.mp3\" data-audio-us-female=\"/audios/us/handwriting-us-female.mp3\"></span>\n",
|
||
"23. **headache** <span class=\"pho alt\">ˈhed.eɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/headache-us-male.mp3\" data-audio-us-female=\"/audios/us/headache-us-female.mp3\"></span>\n",
|
||
"24. **highway** <span class=\"pho alt\">ˈhaɪ.weɪ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/highway-us-male.mp3\" data-audio-us-female=\"/audios/us/highway-us-female.mp3\"></span>\n",
|
||
"25. **homework** <span class=\"pho alt\">ˈhoʊm.wɝːk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/homework-us-male.mp3\" data-audio-us-female=\"/audios/us/homework-us-female.mp3\"></span>\n",
|
||
"26. **iceberg** <span class=\"pho alt\">ˈaɪs.bɝːɡ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/iceberg-us-male.mp3\" data-audio-us-female=\"/audios/us/iceberg-us-female.mp3\"></span>\n",
|
||
"27. **jellyfish** <span class=\"pho alt\">ˈdʒel.i.fɪʃ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/jellyfish-us-male.mp3\" data-audio-us-female=\"/audios/us/jellyfish-us-female.mp3\"></span>\n",
|
||
"28. **laptop** <span class=\"pho alt\">ˈlæp.tɑːp</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/laptop-us-male.mp3\" data-audio-us-female=\"/audios/us/laptop-us-female.mp3\"></span>\n",
|
||
"29. **lighthouse** <span class=\"pho alt\">ˈlaɪt.haʊs</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/lighthouse-us-male.mp3\" data-audio-us-female=\"/audios/us/lighthouse-us-female.mp3\"></span>\n",
|
||
"30. **mailbox** <span class=\"pho alt\">ˈmeɪl.bɑːks</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/mailbox-us-male.mp3\" data-audio-us-female=\"/audios/us/mailbox-us-female.mp3\"></span>\n",
|
||
"31. **moonlight** <span class=\"pho alt\">ˈmuːn.laɪt</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/moonlight-us-male.mp3\" data-audio-us-female=\"/audios/us/moonlight-us-female.mp3\"></span>\n",
|
||
"32. **notebook** <span class=\"pho alt\">ˈnoʊt.bʊk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/notebook-us-male.mp3\" data-audio-us-female=\"/audios/us/notebook-us-female.mp3\"></span>\n",
|
||
"33. **nobody** <span class=\"pho alt\">ˈnoʊ.bɑː.di</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/nobody-us-male.mp3\" data-audio-us-female=\"/audios/us/nobody-us-female.mp3\"></span>\n",
|
||
"34. **pancake** <span class=\"pho alt\">ˈpæn.keɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/pancake-us-male.mp3\" data-audio-us-female=\"/audios/us/pancake-us-female.mp3\"></span>\n",
|
||
"35. **postcard** <span class=\"pho alt\">ˈpoʊst.kɑːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/postcard-us-male.mp3\" data-audio-us-female=\"/audios/us/postcard-us-female.mp3\"></span>\n",
|
||
"36. **rainbow** <span class=\"pho alt\">ˈreɪn.boʊ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/rainbow-us-male.mp3\" data-audio-us-female=\"/audios/us/rainbow-us-female.mp3\"></span>\n",
|
||
"37. **sailboat** <span class=\"pho alt\">ˈseɪl.boʊt</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sailboat-us-male.mp3\" data-audio-us-female=\"/audios/us/sailboat-us-female.mp3\"></span>\n",
|
||
"38. **sandbox** <span class=\"pho alt\">ˈsænd.bɑːks</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sandbox-us-male.mp3\" data-audio-us-female=\"/audios/us/sandbox-us-female.mp3\"></span>\n",
|
||
"39. **seashore** <span class=\"pho alt\">ˈsiː.ʃɔːr</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/seashore-us-male.mp3\" data-audio-us-female=\"/audios/us/seashore-us-female.mp3\"></span>\n",
|
||
"40. **skateboard** <span class=\"pho alt\">ˈskeɪt.bɔːrd</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/skateboard-us-male.mp3\" data-audio-us-female=\"/audios/us/skateboard-us-female.mp3\"></span>\n",
|
||
"41. **snowflake** <span class=\"pho alt\">ˈsnoʊ.fleɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/snowflake-us-male.mp3\" data-audio-us-female=\"/audios/us/snowflake-us-female.mp3\"></span>\n",
|
||
"42. **spaceship** <span class=\"pho alt\">ˈspeɪs.ʃɪp</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/spaceship-us-male.mp3\" data-audio-us-female=\"/audios/us/spaceship-us-female.mp3\"></span>\n",
|
||
"43. **sunflower** <span class=\"pho alt\">ˈsʌnˌflaʊ.ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sunflower-us-male.mp3\" data-audio-us-female=\"/audios/us/sunflower-us-female.mp3\"></span>\n",
|
||
"44. **sunshine** <span class=\"pho alt\">ˈsʌn.ʃaɪn</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/sunshine-us-male.mp3\" data-audio-us-female=\"/audios/us/sunshine-us-female.mp3\"></span>\n",
|
||
"45. **superhero** <span class=\"pho alt\">ˈsuː.pɚˌhɪr.oʊ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/superhero-us-male.mp3\" data-audio-us-female=\"/audios/us/superhero-us-female.mp3\"></span>\n",
|
||
"46. **tablecloth** <span class=\"pho alt\">ˈteɪ.bəl.klɑːθ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/tablecloth-us-male.mp3\" data-audio-us-female=\"/audios/us/tablecloth-us-female.mp3\"></span>\n",
|
||
"47. **toothbrush** <span class=\"pho alt\">ˈtuːθ.brʌʃ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/toothbrush-us-male.mp3\" data-audio-us-female=\"/audios/us/toothbrush-us-female.mp3\"></span>\n",
|
||
"48. **toothpaste** <span class=\"pho alt\">ˈtuːθ.peɪst</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/toothpaste-us-male.mp3\" data-audio-us-female=\"/audios/us/toothpaste-us-female.mp3\"></span>\n",
|
||
"49. **typewriter** <span class=\"pho alt\">ˈtaɪpˌraɪ.t̬ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/typewriter-us-male.mp3\" data-audio-us-female=\"/audios/us/typewriter-us-female.mp3\"></span>\n",
|
||
"50. **underwater** <span class=\"pho alt\">ˌʌn.dɚˈwɑː.t̬ɚ</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/underwater-us-male.mp3\" data-audio-us-female=\"/audios/us/underwater-us-female.mp3\"></span>\n",
|
||
"51. **upstairs** <span class=\"pho alt\">ʌpˈsterz</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/upstairs-us-male.mp3\" data-audio-us-female=\"/audios/us/upstairs-us-female.mp3\"></span>\n",
|
||
"52. **volleyball** <span class=\"pho alt\">ˈvɑː.li.bɑːl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/volleyball-us-male.mp3\" data-audio-us-female=\"/audios/us/volleyball-us-female.mp3\"></span>\n",
|
||
"53. **waterfall** <span class=\"pho alt\">ˈwɑː.t̬ɚ.fɑːl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/waterfall-us-male.mp3\" data-audio-us-female=\"/audios/us/waterfall-us-female.mp3\"></span>\n",
|
||
"54. **watermelon** <span class=\"pho alt\">ˈwɑː.t̬ɚˌmel.ən</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/watermelon-us-male.mp3\" data-audio-us-female=\"/audios/us/watermelon-us-female.mp3\"></span>\n",
|
||
"55. **weekend** <span class=\"pho alt\">ˈwiːk.end</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/weekend-us-male.mp3\" data-audio-us-female=\"/audios/us/weekend-us-female.mp3\"></span>\n",
|
||
"56. **wheelchair** <span class=\"pho alt\">ˈwiːl.tʃer</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/wheelchair-us-male.mp3\" data-audio-us-female=\"/audios/us/wheelchair-us-female.mp3\"></span>\n",
|
||
"57. **windmill** <span class=\"pho alt\">ˈwɪnd.mɪl</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/windmill-us-male.mp3\" data-audio-us-female=\"/audios/us/windmill-us-female.mp3\"></span>\n",
|
||
"58. **workshop** <span class=\"pho alt\">ˈwɝːk.ʃɑːp</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/workshop-us-male.mp3\" data-audio-us-female=\"/audios/us/workshop-us-female.mp3\"></span>\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"list = \"\"\"\n",
|
||
"1. **airplane**\n",
|
||
"2. **airport**\n",
|
||
"3. **backyard**\n",
|
||
"4. **bedroom**\n",
|
||
"5. **birthday**\n",
|
||
"6. **blackboard**\n",
|
||
"7. **bookstore**\n",
|
||
"8. **brainstorm**\n",
|
||
"9. **breakfast**\n",
|
||
"10. **classroom**\n",
|
||
"11. **cupcake**\n",
|
||
"12. **daydream**\n",
|
||
"13. **dishwasher**\n",
|
||
"14. **doorbell**\n",
|
||
"15. **downtown**\n",
|
||
"16. **earthquake**\n",
|
||
"17. **everyday**\n",
|
||
"18. **eyewitness**\n",
|
||
"19. **firefighter**\n",
|
||
"20. **football**\n",
|
||
"21. **greenhouse**\n",
|
||
"22. **handwriting**\n",
|
||
"23. **headache**\n",
|
||
"24. **highway**\n",
|
||
"25. **homework**\n",
|
||
"26. **iceberg**\n",
|
||
"27. **jellyfish**\n",
|
||
"28. **laptop**\n",
|
||
"29. **lighthouse**\n",
|
||
"30. **mailbox**\n",
|
||
"31. **moonlight**\n",
|
||
"32. **notebook**\n",
|
||
"33. **nobody**\n",
|
||
"34. **pancake**\n",
|
||
"35. **postcard**\n",
|
||
"36. **rainbow**\n",
|
||
"37. **sailboat**\n",
|
||
"38. **sandbox**\n",
|
||
"39. **seashore**\n",
|
||
"40. **skateboard**\n",
|
||
"41. **snowflake**\n",
|
||
"42. **spaceship**\n",
|
||
"43. **sunflower**\n",
|
||
"44. **sunshine**\n",
|
||
"45. **superhero**\n",
|
||
"46. **tablecloth**\n",
|
||
"47. **toothbrush**\n",
|
||
"48. **toothpaste**\n",
|
||
"49. **typewriter**\n",
|
||
"50. **underwater**\n",
|
||
"51. **upstairs**\n",
|
||
"52. **volleyball**\n",
|
||
"53. **waterfall**\n",
|
||
"54. **watermelon**\n",
|
||
"55. **weekend**\n",
|
||
"56. **wheelchair**\n",
|
||
"57. **windmill**\n",
|
||
"58. **workshop**\n",
|
||
"\"\"\"\n",
|
||
"\n",
|
||
"lines = list.split(\"\\n\")\n",
|
||
"for l in lines:\n",
|
||
" if l.strip() == \"\":\n",
|
||
" continue\n",
|
||
" # extract str between ** and **\n",
|
||
" word = re.search(r'\\*\\*(.*)\\*\\*', l).group(1)\n",
|
||
"\n",
|
||
" import asyncio\n",
|
||
" import edge_tts\n",
|
||
" import pygame\n",
|
||
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
|
||
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
|
||
" OUTPUT_FILE = f\"{word}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
|
||
" communicate = edge_tts.Communicate(word, VOICE)\n",
|
||
" await communicate.save(OUTPUT_FILE) \n",
|
||
" # print(f\"Audio files for {word} created!\") \n",
|
||
"\n",
|
||
" entry_us = search_in_json_database(json_database, word, 'us')\n",
|
||
" if entry_us == 'not exist':\n",
|
||
" phonetics = entry_us\n",
|
||
" else:\n",
|
||
" phonetics = entry_us['pronunciation'] \n",
|
||
" wrapped_p = f' <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{word}-us-male.mp3\" data-audio-us-female=\"/audios/us/{word}-us-female.mp3\"></span>'\n",
|
||
" l += wrapped_p\n",
|
||
"\n",
|
||
" print(l)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.4"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|