spellings and examples added.
This commit is contained in:
Binary file not shown.
Binary file not shown.
@@ -51,7 +51,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "603fb48b-2fd5-482c-9c96-38a65fc43824",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -59,7 +59,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/k_/t1vf2gd95jbc1j3k49xxdfy80000gn/T/ipykernel_5661/2864483681.py:30: DeprecationWarning: Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead\n",
|
||||
"/var/folders/k_/t1vf2gd95jbc1j3k49xxdfy80000gn/T/ipykernel_59088/953934680.py:30: DeprecationWarning: Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead\n",
|
||||
" rspd_audio.stream_to_file(speech_file_path)\n"
|
||||
]
|
||||
}
|
||||
@@ -78,7 +78,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"text = \"\"\"\n",
|
||||
"lookout,seekout\n",
|
||||
"adventure,French\n",
|
||||
"\"\"\"\n",
|
||||
"# Create audios for each versions (three was set as default previously.)\n",
|
||||
"\n",
|
||||
|
||||
186
1000-hours/public/jupyter-notebooks/spelling-rules.ipynb
Normal file
186
1000-hours/public/jupyter-notebooks/spelling-rules.ipynb
Normal file
@@ -0,0 +1,186 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"import vlc\n",
|
||||
"import re\n",
|
||||
"import random\n",
|
||||
"from IPython.display import Audio\n",
|
||||
"\n",
|
||||
"def load_json_database(url):\n",
|
||||
" records = []\n",
|
||||
" try:\n",
|
||||
" response = requests.get(url)\n",
|
||||
" response.raise_for_status() # Raise an error for bad status codes\n",
|
||||
" for line in response.iter_lines(decode_unicode=True):\n",
|
||||
" if line:\n",
|
||||
" try:\n",
|
||||
" record = json.loads(line)\n",
|
||||
" records.append(record)\n",
|
||||
" except json.JSONDecodeError as e:\n",
|
||||
" print(f\"Error parsing JSON: {e}\")\n",
|
||||
" except requests.exceptions.RequestException as e:\n",
|
||||
" print(f\"Error fetching data from URL: {e}\")\n",
|
||||
" return records\n",
|
||||
"\n",
|
||||
"def search_in_json_database(database, search_word, region):\n",
|
||||
" for record in database:\n",
|
||||
" # 检查 word 字段是否匹配\n",
|
||||
" if record.get('word') == search_word:\n",
|
||||
" # 找到匹配项后,获取美式发音信息\n",
|
||||
" pos_items = record.get('pos_items', [])\n",
|
||||
" for pos_item in pos_items:\n",
|
||||
" pronunciations = pos_item.get('pronunciations', [])\n",
|
||||
" for pronunciation in pronunciations:\n",
|
||||
" if pronunciation.get('region') == region:\n",
|
||||
" # 找到美式发音,返回相关信息\n",
|
||||
" return {\n",
|
||||
" 'pronunciation': pronunciation.get('pronunciation'),\n",
|
||||
" 'audio': pronunciation.get('audio')\n",
|
||||
" }\n",
|
||||
" # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
|
||||
" return 'not exist'\n",
|
||||
"\n",
|
||||
"url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n",
|
||||
"\n",
|
||||
"json_database = load_json_database(url)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"### <span class=\"pho\">tr</span>\n",
|
||||
"* **tr**\n",
|
||||
"\t- track <span class=\"pho alt\">træk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/track-us-male.mp3\" data-audio-us-female=\"/audios/us/track-us-female.mp3\"></span>\n",
|
||||
" \t- tree <span class=\"pho alt\">triː</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/tree-us-male.mp3\" data-audio-us-female=\"/audios/us/tree-us-female.mp3\"></span>\n",
|
||||
" \t- trick <span class=\"pho alt\">trɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/trick-us-male.mp3\" data-audio-us-female=\"/audios/us/trick-us-female.mp3\"></span>\n",
|
||||
"\n",
|
||||
"### <span class=\"pho\">dr</span>\n",
|
||||
"* **dr**\n",
|
||||
"\t- drive <span class=\"pho alt\">draɪv</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/drive-us-male.mp3\" data-audio-us-female=\"/audios/us/drive-us-female.mp3\"></span>\n",
|
||||
" \t- dream <span class=\"pho alt\">driːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/dream-us-male.mp3\" data-audio-us-female=\"/audios/us/dream-us-female.mp3\"></span>\n",
|
||||
" \t- drink <span class=\"pho alt\">drɪŋk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/drink-us-male.mp3\" data-audio-us-female=\"/audios/us/drink-us-female.mp3\"></span>\n",
|
||||
"\n",
|
||||
"phonetics_not_exist: \n",
|
||||
"\n",
|
||||
"track,tree,trick,drive,dream,drink,\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text =\"\"\"\n",
|
||||
"### `ts`\n",
|
||||
"* \"ts\" - clients, students, treats\n",
|
||||
"\n",
|
||||
"### `dz`\n",
|
||||
"* \"ds\" - deeds, records, words\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"words = \"\"\n",
|
||||
"phonetics_not_exist = ''\n",
|
||||
"lines = text.split(\"\\n\")\n",
|
||||
"for line in lines:\n",
|
||||
" line = line.replace(\" `\", \" <span class=\\\"pho\\\">\")\n",
|
||||
" line = line.replace(\"`\", \"</span>\")\n",
|
||||
" if '*' in line:\n",
|
||||
" line = line.replace('\"', \"**\")\n",
|
||||
" examples = line.split(\"-\")[1].split(\",\")\n",
|
||||
" examples = [x.strip() for x in examples]\n",
|
||||
" line = line.replace(\" - \", \"\\n\")\n",
|
||||
" # print(examples)\n",
|
||||
" # wrap examples in span,\n",
|
||||
" for e in examples:\n",
|
||||
" # join e in words with ','\n",
|
||||
" words += e + \",\"\n",
|
||||
" entry_us = search_in_json_database(json_database, e, 'us')\n",
|
||||
" if entry_us == 'not exist':\n",
|
||||
" phonetics = entry_us\n",
|
||||
" phonetics_not_exist += f'{e},'\n",
|
||||
" else:\n",
|
||||
" phonetics = entry_us['pronunciation'] \n",
|
||||
" wrapped_e = f'\\t- {e} <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{e}-us-male.mp3\" data-audio-us-female=\"/audios/us/{e}-us-female.mp3\"></span>\\n'\n",
|
||||
" line = line.replace(e, wrapped_e).replace(',', '').strip()\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" print(line)\n",
|
||||
"print(f'phonetics_not_exist: {phonetics_not_exist}')\n",
|
||||
"print('\\n'+words)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 106,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['track', 'tree', 'trick', 'drive', 'dream', 'drink']\n",
|
||||
"track\n",
|
||||
"tree\n",
|
||||
"trick\n",
|
||||
"drive\n",
|
||||
"dream\n",
|
||||
"drink\n",
|
||||
"Files created!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"import edge_tts\n",
|
||||
"import pygame\n",
|
||||
"text = words.rstrip(\",\")\n",
|
||||
"Wordlist = text.split(\",\")\n",
|
||||
"# Wordlist = ['chivvy']\n",
|
||||
"print(Wordlist)\n",
|
||||
"for w in Wordlist:\n",
|
||||
" # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n",
|
||||
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
|
||||
" w = w.strip()\n",
|
||||
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
|
||||
" OUTPUT_FILE = f\"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
|
||||
" communicate = edge_tts.Communicate(w, VOICE)\n",
|
||||
" await communicate.save(OUTPUT_FILE) \n",
|
||||
" print(w)\n",
|
||||
"print(\"Files created!\") "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user