diff --git a/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb b/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb
index 49422b56..5ca36b08 100644
--- a/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb
+++ b/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb
@@ -84,21 +84,10 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": null,
"id": "4146f92e",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['en-US-GuyNeural', 'en-US-AriaNeural']\n",
- "important\n",
- "../audios/important-us-male.mp3 created\n",
- "../audios/important-us-female.mp3 created\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"\n",
"voices = [\"en-US-GuyNeural\", \"en-US-AriaNeural\", \"en-GB-RyanNeural\", \"en-GB-LibbyNeural\"]\n",
@@ -112,7 +101,9 @@
" print(voices)\n",
"\n",
"words = \"\"\"\n",
- "important\n",
+ "hello,\n",
+ "heat,\n",
+ "high,\n",
"\"\"\"\n",
"\n",
"for word in words.strip().split(','):\n",
@@ -126,7 +117,7 @@
},
{
"cell_type": "code",
- "execution_count": 69,
+ "execution_count": 63,
"id": "2d46cde4",
"metadata": {},
"outputs": [],
@@ -146,18 +137,17 @@
" ) as response:\n",
" response.stream_to_file(path)\n",
" \n",
- "sentence = \"It's a very important aspect\"\n",
+ "sentence = \"The explanation you gave was clear but I need a more detailed explanation.\"\n",
"\n",
"# remove all punctuation at the end of sentence,\n",
"# replace all spaces and punctuations in the sentence with dash\n",
- "# audio_filename_openai = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_openai.mp3'\n",
- "# audio_filename_msedge = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_msedge.mp3'\n",
- "audio_filename_openai = sentence.rstrip(\",.?!\").translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\")\n",
- "audio_filename_msedge = sentence.rstrip(\",.?!\").translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\")\n",
+ "audio_filename_openai = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_openai.mp3'\n",
+ "audio_filename_msedge = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_msedge.mp3'\n",
"# get_openai_tts_audio(sentence, audio_filename_openai, performer='alloy')\n",
"# await generate_edge_tts_audio(sentence, audio_filename_msedge, voice=\"en-US-GuyNeural\", verbose=True, overwrite=True, play=True)\n",
+ "\n",
"for voice in [\"alloy\", \"nova\"]:\n",
- " get_openai_tts_audio(sentence, f'../audios/{audio_filename_openai}-{voice}.mp3', performer=voice)\n"
+ " get_openai_tts_audio(sentence, f'../audios/{sentence.replace(\" \", \"-\")}-{voice}.mp3', performer=voice)\n"
]
},
{
@@ -271,62 +261,6 @@
"* voice = \"en-CA-ClaraNeural\" (Female)\n",
"* voice = \"en-CA-LiamNeural\" (Male)"
]
- },
- {
- "cell_type": "code",
- "execution_count": 79,
- "id": "215d423d",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<_io.BufferedRandom name='../audios/The-art-of-focus-in-our-whirlwind-existence-can-sometimes-feel-like-searching-for-a-needle-in-a-haystack-all-strong.mp3'>"
- ]
- },
- "execution_count": 79,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "\n",
- "text = \"\"\"\n",
- "The art of focus in our whirlwind existence can sometimes feel like searching for a needle in a haystack\n",
- "\"\"\"\n",
- "\n",
- "# 1 second silence with pydub\n",
- "from pydub import AudioSegment\n",
- "sentence = AudioSegment.silent(duration=1000)\n",
- "\n",
- "for word in text.strip().split(' '):\n",
- " w = word.strip().lower()\n",
- " if w == \"a\":\n",
- " w = \"uh\"\n",
- " if len(w) > 0:\n",
- " filename = f'../audios/temp-{w.replace(\" \", \"-\")}-{regions[i]}-{genders[i]}.mp3'\n",
- " get_openai_tts_audio(w, filename, performer=\"alloy\")\n",
- " sentence += AudioSegment.from_file(filename) + AudioSegment.silent(duration=200)\n",
- " # remove the temp file\n",
- " os.remove(filename)\n",
- "sentence += AudioSegment.silent(duration=1000)\n",
- "# save the sentence as a single audio file\n",
- "sentence.export(f'../audios/{text.strip().replace(\" \",\"-\")}-all-strong.mp3', format='mp3')\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 77,
- "id": "5a718cf9",
- "metadata": {},
- "outputs": [],
- "source": [
- "text = \"\"\"\n",
- "The art of focus in our whirlwind existence can sometimes feel like searching for a needle in a haystack\n",
- "\"\"\"\n",
- "filename = f'../audios/{text.strip().replace(\" \",\"-\")}-natural.mp3'\n",
- "get_openai_tts_audio(text, filename, performer=\"alloy\")"
- ]
}
],
"metadata": {
diff --git a/1000-hours/public/jupyter-notebooks/phonetics.ipynb b/1000-hours/public/jupyter-notebooks/phonetics.ipynb
index e69de29b..70735585 100644
--- a/1000-hours/public/jupyter-notebooks/phonetics.ipynb
+++ b/1000-hours/public/jupyter-notebooks/phonetics.ipynb
@@ -0,0 +1,127 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import json\n",
+ "import vlc\n",
+ "import re\n",
+ "\n",
+ "# 假设你的 JSON 数据库是一个 JSON 文件,我们将从文件中加载数据\n",
+ "# 如果 JSON 数据在内存中或其他格式,你可能需要修改这部分代码\n",
+ "def load_json_database(file_path):\n",
+ " records = []\n",
+ " with open(file_path, 'r') as file:\n",
+ " for line in file:\n",
+ " try:\n",
+ " record = json.loads(line)\n",
+ " records.append(record)\n",
+ " except json.JSONDecodeError as e:\n",
+ " print(f\"Error parsing JSON: {e}\")\n",
+ " return records\n",
+ "\n",
+ "# The rest of the code remains the same...\n",
+ "\n",
+ "# 在 JSON 数据库中检索 word\n",
+ "def search_in_json_database(database, search_word, region):\n",
+ " for record in database:\n",
+ " # 检查 word 字段是否匹配\n",
+ " if record.get('word') == search_word:\n",
+ " # 找到匹配项后,获取美式发音信息\n",
+ " pos_items = record.get('pos_items', [])\n",
+ " for pos_item in pos_items:\n",
+ " pronunciations = pos_item.get('pronunciations', [])\n",
+ " for pronunciation in pronunciations:\n",
+ " if pronunciation.get('region') == region:\n",
+ " # 找到美式发音,返回相关信息\n",
+ " return {\n",
+ " 'pronunciation': pronunciation.get('pronunciation'),\n",
+ " 'audio': pronunciation.get('audio')\n",
+ " }\n",
+ " # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
+ " return 'not exist'\n",
+ "\n",
+ "def search_pronunciation(database, pattern):\n",
+ " # Compile the regex pattern\n",
+ " regex = re.compile(pattern)\n",
+ " results = []\n",
+ " # Search in the database\n",
+ " for record in database:\n",
+ " for pos_item in record[\"pos_items\"]:\n",
+ " for pronunciation in pos_item[\"pronunciations\"]:\n",
+ " if regex.search(pronunciation[\"pronunciation\"]):\n",
+ " if pronunciation.get('region') == 'us':\n",
+ " results.append((record[\"word\"], pronunciation[\"pronunciation\"].replace(\".\", \"\"))) # record[\"_id\"][\"$oid\"],\n",
+ " # Return None if no match found\n",
+ " return results\n",
+ "\n",
+ "# 用于测试的 JSON 数据库文件路径\n",
+ "json_db_file_path = '/Users/joker/github/camdict/cam_dict.refined.json'\n",
+ "\n",
+ "json_database = load_json_database(json_db_file_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "*balls* bɑːlz\n",
+ "*cards* kɑrdz\n"
+ ]
+ }
+ ],
+ "source": [
+ "list = \"\"\"\n",
+ "balls,cards\n",
+ "\"\"\"\n",
+ "\n",
+ "for word in list.split(\",\"):\n",
+ " word = word.strip().lower()\n",
+ " result = search_in_json_database(json_database, word, 'us')\n",
+ " if result != 'not exist':\n",
+ " pho = result['pronunciation']\n",
+ " else:\n",
+ " pho = 'not exist'\n",
+ " line = f'*{word}* {pho}'\n",
+ " print(line)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/1000-hours/sounds-of-american-english/4.2-words.md b/1000-hours/sounds-of-american-english/4.2-words.md
index 52fdf2f3..f943cd7e 100644
--- a/1000-hours/sounds-of-american-english/4.2-words.md
+++ b/1000-hours/sounds-of-american-english/4.2-words.md
@@ -4,11 +4,11 @@
## 4.2.1. 重音、次重音、非重音、弱音
-当一个词汇由一个以上的音节构成之时,其中的某个音节可能带有重音(*stress*),在音标中使用 ˈ 作为标记。
+重点在于,英文的音节有**重音**、**次重音**、**非重音**的区别,这一点和亚洲语言明显不同。
+
+如果一个单词只有一个音节,单独读出的时候,就当作是**重音**(*stress*)音节读出。而一个多音节词汇中**有且只有一个**重音音节,但,可能还有另外一些音节是**次重音**(*secondary stress*),在音标中使用 ˌ 作为标记。比如,*serendipity* ˌserənˈdɪpət̬i,有一个**重音**和一个**次重音**。而 *[Pneumonoultramicroscopicsilicovolcanoconiosis](https://en.wikipedia.org/wiki/Pneumonoultramicroscopicsilicovolcanoconiosis)* 总计有 19 个音节,其中 7 个是次重音,唯一的重音是 oʊ……
-**一个多音节词汇中最多只有一个重音音节**,但,可能还有另外一些音节是**次重音**(*secondary stress*),在音标中使用 ˌ 作为标记。比如,*serendipity* ˌserənˈdɪpət̬i,有一个**重音**和一个**次重音**。而 *[Pneumonoultramicroscopicsilicovolcanoconiosis](https://en.wikipedia.org/wiki/Pneumonoultramicroscopicsilicovolcanoconiosis)* 总计有 19 个音节,其中 7 个是次重音,唯一的重音是 oʊ……
-另外一个重点在于,英文的音节有**重音**、**次重音**、**非重音**的区别,这一点也和亚洲语言明显不同。
形象地讲,5 个不分轻重的汉字(或者日文字、韩文字)排在一起大概是这样的: