diff --git a/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb b/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb index 49422b56..5ca36b08 100644 --- a/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb +++ b/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb @@ -84,21 +84,10 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "id": "4146f92e", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['en-US-GuyNeural', 'en-US-AriaNeural']\n", - "important\n", - "../audios/important-us-male.mp3 created\n", - "../audios/important-us-female.mp3 created\n" - ] - } - ], + "outputs": [], "source": [ "\n", "voices = [\"en-US-GuyNeural\", \"en-US-AriaNeural\", \"en-GB-RyanNeural\", \"en-GB-LibbyNeural\"]\n", @@ -112,7 +101,9 @@ " print(voices)\n", "\n", "words = \"\"\"\n", - "important\n", + "hello,\n", + "heat,\n", + "high,\n", "\"\"\"\n", "\n", "for word in words.strip().split(','):\n", @@ -126,7 +117,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 63, "id": "2d46cde4", "metadata": {}, "outputs": [], @@ -146,18 +137,17 @@ " ) as response:\n", " response.stream_to_file(path)\n", " \n", - "sentence = \"It's a very important aspect\"\n", + "sentence = \"The explanation you gave was clear but I need a more detailed explanation.\"\n", "\n", "# remove all punctuation at the end of sentence,\n", "# replace all spaces and punctuations in the sentence with dash\n", - "# audio_filename_openai = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_openai.mp3'\n", - "# audio_filename_msedge = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_msedge.mp3'\n", - "audio_filename_openai = sentence.rstrip(\",.?!\").translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\")\n", - "audio_filename_msedge = sentence.rstrip(\",.?!\").translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\")\n", + "audio_filename_openai = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_openai.mp3'\n", + "audio_filename_msedge = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_msedge.mp3'\n", "# get_openai_tts_audio(sentence, audio_filename_openai, performer='alloy')\n", "# await generate_edge_tts_audio(sentence, audio_filename_msedge, voice=\"en-US-GuyNeural\", verbose=True, overwrite=True, play=True)\n", + "\n", "for voice in [\"alloy\", \"nova\"]:\n", - " get_openai_tts_audio(sentence, f'../audios/{audio_filename_openai}-{voice}.mp3', performer=voice)\n" + " get_openai_tts_audio(sentence, f'../audios/{sentence.replace(\" \", \"-\")}-{voice}.mp3', performer=voice)\n" ] }, { @@ -271,62 +261,6 @@ "* voice = \"en-CA-ClaraNeural\" (Female)\n", "* voice = \"en-CA-LiamNeural\" (Male)" ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "215d423d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "<_io.BufferedRandom name='../audios/The-art-of-focus-in-our-whirlwind-existence-can-sometimes-feel-like-searching-for-a-needle-in-a-haystack-all-strong.mp3'>" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "text = \"\"\"\n", - "The art of focus in our whirlwind existence can sometimes feel like searching for a needle in a haystack\n", - "\"\"\"\n", - "\n", - "# 1 second silence with pydub\n", - "from pydub import AudioSegment\n", - "sentence = AudioSegment.silent(duration=1000)\n", - "\n", - "for word in text.strip().split(' '):\n", - " w = word.strip().lower()\n", - " if w == \"a\":\n", - " w = \"uh\"\n", - " if len(w) > 0:\n", - " filename = f'../audios/temp-{w.replace(\" \", \"-\")}-{regions[i]}-{genders[i]}.mp3'\n", - " get_openai_tts_audio(w, filename, performer=\"alloy\")\n", - " sentence += AudioSegment.from_file(filename) + AudioSegment.silent(duration=200)\n", - " # remove the temp file\n", - " os.remove(filename)\n", - "sentence += AudioSegment.silent(duration=1000)\n", - "# save the sentence as a single audio file\n", - "sentence.export(f'../audios/{text.strip().replace(\" \",\"-\")}-all-strong.mp3', format='mp3')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "5a718cf9", - "metadata": {}, - "outputs": [], - "source": [ - "text = \"\"\"\n", - "The art of focus in our whirlwind existence can sometimes feel like searching for a needle in a haystack\n", - "\"\"\"\n", - "filename = f'../audios/{text.strip().replace(\" \",\"-\")}-natural.mp3'\n", - "get_openai_tts_audio(text, filename, performer=\"alloy\")" - ] } ], "metadata": { diff --git a/1000-hours/public/jupyter-notebooks/phonetics.ipynb b/1000-hours/public/jupyter-notebooks/phonetics.ipynb index e69de29b..70735585 100644 --- a/1000-hours/public/jupyter-notebooks/phonetics.ipynb +++ b/1000-hours/public/jupyter-notebooks/phonetics.ipynb @@ -0,0 +1,127 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.\n" + ] + } + ], + "source": [ + "import json\n", + "import vlc\n", + "import re\n", + "\n", + "# 假设你的 JSON 数据库是一个 JSON 文件,我们将从文件中加载数据\n", + "# 如果 JSON 数据在内存中或其他格式,你可能需要修改这部分代码\n", + "def load_json_database(file_path):\n", + " records = []\n", + " with open(file_path, 'r') as file:\n", + " for line in file:\n", + " try:\n", + " record = json.loads(line)\n", + " records.append(record)\n", + " except json.JSONDecodeError as e:\n", + " print(f\"Error parsing JSON: {e}\")\n", + " return records\n", + "\n", + "# The rest of the code remains the same...\n", + "\n", + "# 在 JSON 数据库中检索 word\n", + "def search_in_json_database(database, search_word, region):\n", + " for record in database:\n", + " # 检查 word 字段是否匹配\n", + " if record.get('word') == search_word:\n", + " # 找到匹配项后,获取美式发音信息\n", + " pos_items = record.get('pos_items', [])\n", + " for pos_item in pos_items:\n", + " pronunciations = pos_item.get('pronunciations', [])\n", + " for pronunciation in pronunciations:\n", + " if pronunciation.get('region') == region:\n", + " # 找到美式发音,返回相关信息\n", + " return {\n", + " 'pronunciation': pronunciation.get('pronunciation'),\n", + " 'audio': pronunciation.get('audio')\n", + " }\n", + " # 如果没有找到匹配的 word 字段,返回 'not exist'\n", + " return 'not exist'\n", + "\n", + "def search_pronunciation(database, pattern):\n", + " # Compile the regex pattern\n", + " regex = re.compile(pattern)\n", + " results = []\n", + " # Search in the database\n", + " for record in database:\n", + " for pos_item in record[\"pos_items\"]:\n", + " for pronunciation in pos_item[\"pronunciations\"]:\n", + " if regex.search(pronunciation[\"pronunciation\"]):\n", + " if pronunciation.get('region') == 'us':\n", + " results.append((record[\"word\"], pronunciation[\"pronunciation\"].replace(\".\", \"\"))) # record[\"_id\"][\"$oid\"],\n", + " # Return None if no match found\n", + " return results\n", + "\n", + "# 用于测试的 JSON 数据库文件路径\n", + "json_db_file_path = '/Users/joker/github/camdict/cam_dict.refined.json'\n", + "\n", + "json_database = load_json_database(json_db_file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "*balls* bɑːlz\n", + "*cards* kɑrdz\n" + ] + } + ], + "source": [ + "list = \"\"\"\n", + "balls,cards\n", + "\"\"\"\n", + "\n", + "for word in list.split(\",\"):\n", + " word = word.strip().lower()\n", + " result = search_in_json_database(json_database, word, 'us')\n", + " if result != 'not exist':\n", + " pho = result['pronunciation']\n", + " else:\n", + " pho = 'not exist'\n", + " line = f'*{word}* {pho}'\n", + " print(line)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/1000-hours/sounds-of-american-english/4.2-words.md b/1000-hours/sounds-of-american-english/4.2-words.md index 52fdf2f3..f943cd7e 100644 --- a/1000-hours/sounds-of-american-english/4.2-words.md +++ b/1000-hours/sounds-of-american-english/4.2-words.md @@ -4,11 +4,11 @@ ## 4.2.1. 重音、次重音、非重音、弱音 -当一个词汇由一个以上的音节构成之时,其中的某个音节可能带有重音(*stress*),在音标中使用 ˈ 作为标记。 +重点在于,英文的音节有**重音**、**次重音**、**非重音**的区别,这一点和亚洲语言明显不同。 + +如果一个单词只有一个音节,单独读出的时候,就当作是**重音**(*stress*)音节读出。而一个多音节词汇中**有且只有一个**重音音节,但,可能还有另外一些音节是**次重音**(*secondary stress*),在音标中使用 ˌ 作为标记。比如,*serendipity* ˌserənˈdɪpət̬i,有一个**重音**和一个**次重音**。而 *[Pneumonoultramicroscopicsilicovolcanoconiosis](https://en.wikipedia.org/wiki/Pneumonoultramicroscopicsilicovolcanoconiosis)* 总计有 19 个音节,其中 7 个是次重音,唯一的重音是 …… -**一个多音节词汇中最多只有一个重音音节**,但,可能还有另外一些音节是**次重音**(*secondary stress*),在音标中使用 ˌ 作为标记。比如,*serendipity* ˌserənˈdɪpət̬i,有一个**重音**和一个**次重音**。而 *[Pneumonoultramicroscopicsilicovolcanoconiosis](https://en.wikipedia.org/wiki/Pneumonoultramicroscopicsilicovolcanoconiosis)* 总计有 19 个音节,其中 7 个是次重音,唯一的重音是 …… -另外一个重点在于,英文的音节有**重音**、**次重音**、**非重音**的区别,这一点也和亚洲语言明显不同。 形象地讲,5 个不分轻重的汉字(或者日文字、韩文字)排在一起大概是这样的: