08-20-1635, chapter 4.2

2024-08-20 16:54:07 +08:00
parent d16300fa0a
commit 83ecdf2ceb
3 changed files with 141 additions and 80 deletions
--- a/1000-hours/public/jupyter-notebooks/phonetics.ipynb
+++ b/1000-hours/public/jupyter-notebooks/phonetics.ipynb
@@ -0,0 +1,127 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "import vlc\n",
+    "import re\n",
+    "\n",
+    "# 假设你的 JSON 数据库是一个 JSON 文件，我们将从文件中加载数据\n",
+    "# 如果 JSON 数据在内存中或其他格式，你可能需要修改这部分代码\n",
+    "def load_json_database(file_path):\n",
+    "    records = []\n",
+    "    with open(file_path, 'r') as file:\n",
+    "        for line in file:\n",
+    "            try:\n",
+    "                record = json.loads(line)\n",
+    "                records.append(record)\n",
+    "            except json.JSONDecodeError as e:\n",
+    "                print(f\"Error parsing JSON: {e}\")\n",
+    "    return records\n",
+    "\n",
+    "# The rest of the code remains the same...\n",
+    "\n",
+    "# 在 JSON 数据库中检索 word\n",
+    "def search_in_json_database(database, search_word, region):\n",
+    "    for record in database:\n",
+    "        # 检查 word 字段是否匹配\n",
+    "        if record.get('word') == search_word:\n",
+    "            # 找到匹配项后，获取美式发音信息\n",
+    "            pos_items = record.get('pos_items', [])\n",
+    "            for pos_item in pos_items:\n",
+    "                pronunciations = pos_item.get('pronunciations', [])\n",
+    "                for pronunciation in pronunciations:\n",
+    "                    if pronunciation.get('region') == region:\n",
+    "                        # 找到美式发音，返回相关信息\n",
+    "                        return {\n",
+    "                            'pronunciation': pronunciation.get('pronunciation'),\n",
+    "                            'audio': pronunciation.get('audio')\n",
+    "                        }\n",
+    "    # 如果没有找到匹配的 word 字段，返回 'not exist'\n",
+    "    return 'not exist'\n",
+    "\n",
+    "def search_pronunciation(database, pattern):\n",
+    "    # Compile the regex pattern\n",
+    "    regex = re.compile(pattern)\n",
+    "    results = []\n",
+    "    # Search in the database\n",
+    "    for record in database:\n",
+    "        for pos_item in record[\"pos_items\"]:\n",
+    "            for pronunciation in pos_item[\"pronunciations\"]:\n",
+    "                if regex.search(pronunciation[\"pronunciation\"]):\n",
+    "                    if pronunciation.get('region') == 'us':\n",
+    "                        results.append((record[\"word\"], pronunciation[\"pronunciation\"].replace(\".\", \"\"))) # record[\"_id\"][\"$oid\"],\n",
+    "    # Return None if no match found\n",
+    "    return results\n",
+    "\n",
+    "# 用于测试的 JSON 数据库文件路径\n",
+    "json_db_file_path = '/Users/joker/github/camdict/cam_dict.refined.json'\n",
+    "\n",
+    "json_database = load_json_database(json_db_file_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "*balls* <span class=\"pho alt\">bɑːlz</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/ballss-us-male.mp3\" data-audio-us-female=\"/audios/ballss-us-female.mp3\"></span>\n",
+      "*cards* <span class=\"pho alt\">kɑrdz</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/cardss-us-male.mp3\" data-audio-us-female=\"/audios/cardss-us-female.mp3\"></span>\n"
+     ]
+    }
+   ],
+   "source": [
+    "list = \"\"\"\n",
+    "balls,cards\n",
+    "\"\"\"\n",
+    "\n",
+    "for word in list.split(\",\"):\n",
+    "    word = word.strip().lower()\n",
+    "    result = search_in_json_database(json_database, word, 'us')\n",
+    "    if result != 'not exist':\n",
+    "        pho = result['pronunciation']\n",
+    "    else:\n",
+    "        pho = 'not exist'\n",
+    "    line = f'*{word}* <span class=\"pho alt\">{pho}</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/{word}s-us-male.mp3\" data-audio-us-female=\"/audios/{word}s-us-female.mp3\"></span>'\n",
+    "    print(line)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}