Files
everyone-can-use-english/1000-hours/public/jupyter-notebooks/phonetics-fill-in-exercise.ipynb
2024-08-25 12:23:34 +08:00

189 lines
6.5 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pip install python-vlc"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import vlc\n",
"import re\n",
"import random\n",
"from IPython.display import Audio\n",
"\n",
"import json\n",
"import requests\n",
"\n",
"def load_json_database(source):\n",
" records = []\n",
" \n",
" def parse_json_lines(lines):\n",
" for line in lines:\n",
" if line:\n",
" try:\n",
" record = json.loads(line)\n",
" records.append(record)\n",
" except json.JSONDecodeError as e:\n",
" print(f\"Error parsing JSON: {e}\")\n",
"\n",
" try:\n",
" if source.startswith('http://') or source.startswith('https://'):\n",
" # Handle as URL\n",
" response = requests.get(source)\n",
" response.raise_for_status() # Raise an error for bad status codes\n",
" parse_json_lines(response.iter_lines(decode_unicode=True))\n",
" else:\n",
" # Handle as file\n",
" with open(source, 'r', encoding='utf-8') as file:\n",
" parse_json_lines(file)\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"Error fetching data from URL: {e}\")\n",
" except FileNotFoundError as e:\n",
" print(f\"Error opening file: {e}\")\n",
" except Exception as e:\n",
" print(f\"An unexpected error occurred: {e}\")\n",
" \n",
" return records\n",
"\n",
"url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n",
"json_database = load_json_database(url)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def search_in_json_database(database, search_word, region):\n",
" for record in database:\n",
" # 检查 word 字段是否匹配\n",
" if record.get('word') == search_word:\n",
" # 找到匹配项后,获取美式发音信息\n",
" pos_items = record.get('pos_items', [])\n",
" for pos_item in pos_items:\n",
" pronunciations = pos_item.get('pronunciations', [])\n",
" for pronunciation in pronunciations:\n",
" if pronunciation.get('region') == region:\n",
" # 找到美式发音,返回相关信息\n",
" return {\n",
" 'pronunciation': pronunciation.get('pronunciation'),\n",
" 'audio': pronunciation.get('audio')\n",
" }\n",
" # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
" return 'not exist'\n",
"\n",
"def replace_with_underscores(match):\n",
" return '_' * len(match.group(0))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"broadband\n",
"https://dictionary.cambridge.org/media/english/us_pron/c/cus/cus00/cus00276.mp3\n",
"Fill vowels in blanks: ˈbr__d.b_nd\n",
"Fill in consonants in blanks: ˈ__ɑː_._æ__\n"
]
},
{
"data": {
"text/html": [
"\n",
" <audio controls=\"controls\" >\n",
" <source src=\"https://dictionary.cambridge.org/media/english/us_pron/c/cus/cus00/cus00276.mp3\" type=\"audio/mpeg\" />\n",
" Your browser does not support the audio element.\n",
" </audio>\n",
" "
],
"text/plain": [
"<IPython.lib.display.Audio object>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get a random word from the database\n",
"\n",
"vowel_phonetics = re.compile(r'ɑː|ɑːr|ʌ||iː|ɪ|i|ɪr|ʊ|ʊr|uː|ʊr|e|er|æ|ə|ɚ|ɝː|ɒ|ɔː|ɔːr|ɔɪ|aɪ|aɪr|eɪ|aʊ|aʊr|oʊ|')\n",
"consonant_phonetics = re.compile(r'p|b|t|d|k|ɡ|f|v|θ|ð|s|z|ʃ|ʒ|tʃ|dʒ|r|h|l|t̬|j|w|ŋ|n|m|tr|dr|ts|dz|br|pr|fr|ɡr|θr|dr|ʃr|kr|bl|kl|ɡl|fl|pl|sl|sp|st|sk|sm|sn|sw|str|spr|skr|spl|sfr|skw|skr|skl|')\n",
"\n",
"# if the word is with certain enddings such as 'es, ed, ing', get another word\n",
"random_word = random.choice(json_database)\n",
"while random_word['word'].endswith(('ed', 'ing', 'es', 'ts', 'ks', 'ds', 'ps', 'bs', 'gs', 'ls', 'rs', 'ms', 'ns', 'er', 'est')):\n",
" random_word = random.choice(json_database)\n",
"\n",
"# get pronunciation of the random word with region 'us'\n",
"random_word_us = search_in_json_database(json_database, random_word['word'], 'us')\n",
"\n",
"# get the word's phonetics\n",
"random_word_entry = random_word['word']\n",
"print(random_word_entry)\n",
"\n",
"random_word_phonetics = random_word_us['pronunciation']\n",
"\n",
"# get the audio url of the word\n",
"random_word_us_audio_url = random_word_us['audio']\n",
"print(random_word_us_audio_url)\n",
"\n",
"blank_vowel_phonetics = re.sub(vowel_phonetics, replace_with_underscores, random_word_phonetics)\n",
"blank_consonant_phonetics = re.sub(consonant_phonetics, replace_with_underscores, random_word_phonetics)\n",
"\n",
"# fill vowels in blanks\n",
"print(f'Fill vowels in blanks: {blank_vowel_phonetics}')\n",
"\n",
"# fill consonants in blanks\n",
"print(f'Fill in consonants in blanks: {blank_consonant_phonetics}')\n",
"\n",
"# play the audio\n",
"player = vlc.MediaPlayer(random_word_us['audio'])\n",
"player.play()\n",
"\n",
"# display the audio\n",
"Audio(url=random_word_us_audio_url)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}