Files
everyone-can-use-english/1000-hours/public/jupyter-notebooks/spelling-rules.ipynb
2024-07-24 15:16:14 +08:00

187 lines
7.2 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import requests\n",
"import json\n",
"import vlc\n",
"import re\n",
"import random\n",
"from IPython.display import Audio\n",
"\n",
"def load_json_database(url):\n",
" records = []\n",
" try:\n",
" response = requests.get(url)\n",
" response.raise_for_status() # Raise an error for bad status codes\n",
" for line in response.iter_lines(decode_unicode=True):\n",
" if line:\n",
" try:\n",
" record = json.loads(line)\n",
" records.append(record)\n",
" except json.JSONDecodeError as e:\n",
" print(f\"Error parsing JSON: {e}\")\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"Error fetching data from URL: {e}\")\n",
" return records\n",
"\n",
"def search_in_json_database(database, search_word, region):\n",
" for record in database:\n",
" # 检查 word 字段是否匹配\n",
" if record.get('word') == search_word:\n",
" # 找到匹配项后,获取美式发音信息\n",
" pos_items = record.get('pos_items', [])\n",
" for pos_item in pos_items:\n",
" pronunciations = pos_item.get('pronunciations', [])\n",
" for pronunciation in pronunciations:\n",
" if pronunciation.get('region') == region:\n",
" # 找到美式发音,返回相关信息\n",
" return {\n",
" 'pronunciation': pronunciation.get('pronunciation'),\n",
" 'audio': pronunciation.get('audio')\n",
" }\n",
" # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
" return 'not exist'\n",
"\n",
"url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n",
"\n",
"json_database = load_json_database(url)\n"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"### <span class=\"pho\">tr</span>\n",
"* **tr**\n",
"\t- track <span class=\"pho alt\">træk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/track-us-male.mp3\" data-audio-us-female=\"/audios/us/track-us-female.mp3\"></span>\n",
" \t- tree <span class=\"pho alt\">triː</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/tree-us-male.mp3\" data-audio-us-female=\"/audios/us/tree-us-female.mp3\"></span>\n",
" \t- trick <span class=\"pho alt\">trɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/trick-us-male.mp3\" data-audio-us-female=\"/audios/us/trick-us-female.mp3\"></span>\n",
"\n",
"### <span class=\"pho\">dr</span>\n",
"* **dr**\n",
"\t- drive <span class=\"pho alt\">draɪv</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/drive-us-male.mp3\" data-audio-us-female=\"/audios/us/drive-us-female.mp3\"></span>\n",
" \t- dream <span class=\"pho alt\">driːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/dream-us-male.mp3\" data-audio-us-female=\"/audios/us/dream-us-female.mp3\"></span>\n",
" \t- drink <span class=\"pho alt\">drɪŋk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/drink-us-male.mp3\" data-audio-us-female=\"/audios/us/drink-us-female.mp3\"></span>\n",
"\n",
"phonetics_not_exist: \n",
"\n",
"track,tree,trick,drive,dream,drink,\n"
]
}
],
"source": [
"text =\"\"\"\n",
"### `ts`\n",
"* \"ts\" - clients, students, treats\n",
"\n",
"### `dz`\n",
"* \"ds\" - deeds, records, words\n",
"\"\"\"\n",
"\n",
"words = \"\"\n",
"phonetics_not_exist = ''\n",
"lines = text.split(\"\\n\")\n",
"for line in lines:\n",
" line = line.replace(\" `\", \" <span class=\\\"pho\\\">\")\n",
" line = line.replace(\"`\", \"</span>\")\n",
" if '*' in line:\n",
" line = line.replace('\"', \"**\")\n",
" examples = line.split(\"-\")[1].split(\",\")\n",
" examples = [x.strip() for x in examples]\n",
" line = line.replace(\" - \", \"\\n\")\n",
" # print(examples)\n",
" # wrap examples in span,\n",
" for e in examples:\n",
" # join e in words with ','\n",
" words += e + \",\"\n",
" entry_us = search_in_json_database(json_database, e, 'us')\n",
" if entry_us == 'not exist':\n",
" phonetics = entry_us\n",
" phonetics_not_exist += f'{e},'\n",
" else:\n",
" phonetics = entry_us['pronunciation'] \n",
" wrapped_e = f'\\t- {e} <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{e}-us-male.mp3\" data-audio-us-female=\"/audios/us/{e}-us-female.mp3\"></span>\\n'\n",
" line = line.replace(e, wrapped_e).replace(',', '').strip()\n",
" \n",
"\n",
" print(line)\n",
"print(f'phonetics_not_exist: {phonetics_not_exist}')\n",
"print('\\n'+words)"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['track', 'tree', 'trick', 'drive', 'dream', 'drink']\n",
"track\n",
"tree\n",
"trick\n",
"drive\n",
"dream\n",
"drink\n",
"Files created!\n"
]
}
],
"source": [
"import asyncio\n",
"import edge_tts\n",
"import pygame\n",
"text = words.rstrip(\",\")\n",
"Wordlist = text.split(\",\")\n",
"# Wordlist = ['chivvy']\n",
"print(Wordlist)\n",
"for w in Wordlist:\n",
" # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n",
" for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
" w = w.strip()\n",
" # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
" OUTPUT_FILE = f\"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
" communicate = edge_tts.Communicate(w, VOICE)\n",
" await communicate.save(OUTPUT_FILE) \n",
" print(w)\n",
"print(\"Files created!\") "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}