everyone-can-use-english/1000-hours/public/jupyter-notebooks/phonetics.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import vlc\n",
    "import re\n",
    "\n",
    "# 假设你的 JSON 数据库是一个 JSON 文件，我们将从文件中加载数据\n",
    "# 如果 JSON 数据在内存中或其他格式，你可能需要修改这部分代码\n",
    "def load_json_database(file_path):\n",
    "    records = []\n",
    "    with open(file_path, 'r') as file:\n",
    "        for line in file:\n",
    "            try:\n",
    "                record = json.loads(line)\n",
    "                records.append(record)\n",
    "            except json.JSONDecodeError as e:\n",
    "                print(f\"Error parsing JSON: {e}\")\n",
    "    return records\n",
    "\n",
    "# The rest of the code remains the same...\n",
    "\n",
    "# 在 JSON 数据库中检索 word\n",
    "def search_in_json_database(database, search_word, region):\n",
    "    for record in database:\n",
    "        # 检查 word 字段是否匹配\n",
    "        if record.get('word') == search_word:\n",
    "            # 找到匹配项后，获取美式发音信息\n",
    "            pos_items = record.get('pos_items', [])\n",
    "            for pos_item in pos_items:\n",
    "                pronunciations = pos_item.get('pronunciations', [])\n",
    "                for pronunciation in pronunciations:\n",
    "                    if pronunciation.get('region') == region:\n",
    "                        # 找到美式发音，返回相关信息\n",
    "                        return {\n",
    "                            'pronunciation': pronunciation.get('pronunciation'),\n",
    "                            'audio': pronunciation.get('audio')\n",
    "                        }\n",
    "    # 如果没有找到匹配的 word 字段，返回 'not exist'\n",
    "    return 'not exist'\n",
    "\n",
    "def search_pronunciation(database, pattern):\n",
    "    # Compile the regex pattern\n",
    "    regex = re.compile(pattern)\n",
    "    results = []\n",
    "    # Search in the database\n",
    "    for record in database:\n",
    "        for pos_item in record[\"pos_items\"]:\n",
    "            for pronunciation in pos_item[\"pronunciations\"]:\n",
    "                if regex.search(pronunciation[\"pronunciation\"]):\n",
    "                    if pronunciation.get('region') == 'us':\n",
    "                        results.append((record[\"word\"], pronunciation[\"pronunciation\"].replace(\".\", \"\"))) # record[\"_id\"][\"$oid\"],\n",
    "    # Return None if no match found\n",
    "    return results\n",
    "\n",
    "# 用于测试的 JSON 数据库文件路径\n",
    "json_db_file_path = '/Users/joker/github/camdict/cam_dict.refined.json'\n",
    "\n",
    "json_database = load_json_database(json_db_file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "*balls* <span class=\"pho alt\">bɑːlz</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/ballss-us-male.mp3\" data-audio-us-female=\"/audios/ballss-us-female.mp3\"></span>\n",
      "*cards* <span class=\"pho alt\">kɑrdz</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/cardss-us-male.mp3\" data-audio-us-female=\"/audios/cardss-us-female.mp3\"></span>\n"
     ]
    }
   ],
   "source": [
    "list = \"\"\"\n",
    "balls,cards\n",
    "\"\"\"\n",
    "\n",
    "for word in list.split(\",\"):\n",
    "    word = word.strip().lower()\n",
    "    result = search_in_json_database(json_database, word, 'us')\n",
    "    if result != 'not exist':\n",
    "        pho = result['pronunciation']\n",
    "    else:\n",
    "        pho = 'not exist'\n",
    "    line = f'*{word}* <span class=\"pho alt\">{pho}</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/{word}s-us-male.mp3\" data-audio-us-female=\"/audios/{word}s-us-female.mp3\"></span>'\n",
    "    print(line)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}