189 lines
6.5 KiB
Plaintext
189 lines
6.5 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%pip install python-vlc"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"import json\n",
|
||
"import vlc\n",
|
||
"import re\n",
|
||
"import random\n",
|
||
"from IPython.display import Audio\n",
|
||
"\n",
|
||
"import json\n",
|
||
"import requests\n",
|
||
"\n",
|
||
"def load_json_database(source):\n",
|
||
" records = []\n",
|
||
" \n",
|
||
" def parse_json_lines(lines):\n",
|
||
" for line in lines:\n",
|
||
" if line:\n",
|
||
" try:\n",
|
||
" record = json.loads(line)\n",
|
||
" records.append(record)\n",
|
||
" except json.JSONDecodeError as e:\n",
|
||
" print(f\"Error parsing JSON: {e}\")\n",
|
||
"\n",
|
||
" try:\n",
|
||
" if source.startswith('http://') or source.startswith('https://'):\n",
|
||
" # Handle as URL\n",
|
||
" response = requests.get(source)\n",
|
||
" response.raise_for_status() # Raise an error for bad status codes\n",
|
||
" parse_json_lines(response.iter_lines(decode_unicode=True))\n",
|
||
" else:\n",
|
||
" # Handle as file\n",
|
||
" with open(source, 'r', encoding='utf-8') as file:\n",
|
||
" parse_json_lines(file)\n",
|
||
" except requests.exceptions.RequestException as e:\n",
|
||
" print(f\"Error fetching data from URL: {e}\")\n",
|
||
" except FileNotFoundError as e:\n",
|
||
" print(f\"Error opening file: {e}\")\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"An unexpected error occurred: {e}\")\n",
|
||
" \n",
|
||
" return records\n",
|
||
"\n",
|
||
"url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n",
|
||
"json_database = load_json_database(url)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def search_in_json_database(database, search_word, region):\n",
|
||
" for record in database:\n",
|
||
" # 检查 word 字段是否匹配\n",
|
||
" if record.get('word') == search_word:\n",
|
||
" # 找到匹配项后,获取美式发音信息\n",
|
||
" pos_items = record.get('pos_items', [])\n",
|
||
" for pos_item in pos_items:\n",
|
||
" pronunciations = pos_item.get('pronunciations', [])\n",
|
||
" for pronunciation in pronunciations:\n",
|
||
" if pronunciation.get('region') == region:\n",
|
||
" # 找到美式发音,返回相关信息\n",
|
||
" return {\n",
|
||
" 'pronunciation': pronunciation.get('pronunciation'),\n",
|
||
" 'audio': pronunciation.get('audio')\n",
|
||
" }\n",
|
||
" # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
|
||
" return 'not exist'\n",
|
||
"\n",
|
||
"def replace_with_underscores(match):\n",
|
||
" return '_' * len(match.group(0))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"broadband\n",
|
||
"https://dictionary.cambridge.org/media/english/us_pron/c/cus/cus00/cus00276.mp3\n",
|
||
"Fill vowels in blanks: ˈbr__d.b_nd\n",
|
||
"Fill in consonants in blanks: ˈ__ɑː_._æ__\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"\n",
|
||
" <audio controls=\"controls\" >\n",
|
||
" <source src=\"https://dictionary.cambridge.org/media/english/us_pron/c/cus/cus00/cus00276.mp3\" type=\"audio/mpeg\" />\n",
|
||
" Your browser does not support the audio element.\n",
|
||
" </audio>\n",
|
||
" "
|
||
],
|
||
"text/plain": [
|
||
"<IPython.lib.display.Audio object>"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# get a random word from the database\n",
|
||
"\n",
|
||
"vowel_phonetics = re.compile(r'ɑː|ɑːr|ʌ||iː|ɪ|i|ɪr|ʊ|ʊr|uː|ʊr|e|er|æ|ə|ɚ|ɝː|ɒ|ɔː|ɔːr|ɔɪ|aɪ|aɪr|eɪ|aʊ|aʊr|oʊ|')\n",
|
||
"consonant_phonetics = re.compile(r'p|b|t|d|k|ɡ|f|v|θ|ð|s|z|ʃ|ʒ|tʃ|dʒ|r|h|l|t̬|j|w|ŋ|n|m|tr|dr|ts|dz|br|pr|fr|ɡr|θr|dr|ʃr|kr|bl|kl|ɡl|fl|pl|sl|sp|st|sk|sm|sn|sw|str|spr|skr|spl|sfr|skw|skr|skl|')\n",
|
||
"\n",
|
||
"# if the word is with certain enddings such as 'es, ed, ing', get another word\n",
|
||
"random_word = random.choice(json_database)\n",
|
||
"while random_word['word'].endswith(('ed', 'ing', 'es', 'ts', 'ks', 'ds', 'ps', 'bs', 'gs', 'ls', 'rs', 'ms', 'ns', 'er', 'est')):\n",
|
||
" random_word = random.choice(json_database)\n",
|
||
"\n",
|
||
"# get pronunciation of the random word with region 'us'\n",
|
||
"random_word_us = search_in_json_database(json_database, random_word['word'], 'us')\n",
|
||
"\n",
|
||
"# get the word's phonetics\n",
|
||
"random_word_entry = random_word['word']\n",
|
||
"print(random_word_entry)\n",
|
||
"\n",
|
||
"random_word_phonetics = random_word_us['pronunciation']\n",
|
||
"\n",
|
||
"# get the audio url of the word\n",
|
||
"random_word_us_audio_url = random_word_us['audio']\n",
|
||
"print(random_word_us_audio_url)\n",
|
||
"\n",
|
||
"blank_vowel_phonetics = re.sub(vowel_phonetics, replace_with_underscores, random_word_phonetics)\n",
|
||
"blank_consonant_phonetics = re.sub(consonant_phonetics, replace_with_underscores, random_word_phonetics)\n",
|
||
"\n",
|
||
"# fill vowels in blanks\n",
|
||
"print(f'Fill vowels in blanks: {blank_vowel_phonetics}')\n",
|
||
"\n",
|
||
"# fill consonants in blanks\n",
|
||
"print(f'Fill in consonants in blanks: {blank_consonant_phonetics}')\n",
|
||
"\n",
|
||
"# play the audio\n",
|
||
"player = vlc.MediaPlayer(random_word_us['audio'])\n",
|
||
"player.play()\n",
|
||
"\n",
|
||
"# display the audio\n",
|
||
"Audio(url=random_word_us_audio_url)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.4"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|