{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.\n" ] } ], "source": [ "import json\n", "import vlc\n", "import re\n", "\n", "# 假设你的 JSON 数据库是一个 JSON 文件,我们将从文件中加载数据\n", "# 如果 JSON 数据在内存中或其他格式,你可能需要修改这部分代码\n", "def load_json_database(file_path):\n", " records = []\n", " with open(file_path, 'r') as file:\n", " for line in file:\n", " try:\n", " record = json.loads(line)\n", " records.append(record)\n", " except json.JSONDecodeError as e:\n", " print(f\"Error parsing JSON: {e}\")\n", " return records\n", "\n", "# The rest of the code remains the same...\n", "\n", "# 在 JSON 数据库中检索 word\n", "def search_in_json_database(database, search_word, region):\n", " for record in database:\n", " # 检查 word 字段是否匹配\n", " if record.get('word') == search_word:\n", " # 找到匹配项后,获取美式发音信息\n", " pos_items = record.get('pos_items', [])\n", " for pos_item in pos_items:\n", " pronunciations = pos_item.get('pronunciations', [])\n", " for pronunciation in pronunciations:\n", " if pronunciation.get('region') == region:\n", " # 找到美式发音,返回相关信息\n", " return {\n", " 'pronunciation': pronunciation.get('pronunciation'),\n", " 'audio': pronunciation.get('audio')\n", " }\n", " # 如果没有找到匹配的 word 字段,返回 'not exist'\n", " return 'not exist'\n", "\n", "def search_pronunciation(database, pattern):\n", " # Compile the regex pattern\n", " regex = re.compile(pattern)\n", " results = []\n", " # Search in the database\n", " for record in database:\n", " for pos_item in record[\"pos_items\"]:\n", " for pronunciation in pos_item[\"pronunciations\"]:\n", " if regex.search(pronunciation[\"pronunciation\"]):\n", " if pronunciation.get('region') == 'us':\n", " results.append((record[\"word\"], pronunciation[\"pronunciation\"].replace(\".\", \"\"))) # record[\"_id\"][\"$oid\"],\n", " # Return None if no match found\n", " return results\n", "\n", "# 用于测试的 JSON 数据库文件路径\n", "json_db_file_path = '/Users/joker/github/camdict/cam_dict.refined.json'\n", "\n", "json_database = load_json_database(json_db_file_path)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "*balls* bɑːlz\n", "*cards* kɑrdz\n" ] } ], "source": [ "list = \"\"\"\n", "balls,cards\n", "\"\"\"\n", "\n", "for word in list.split(\",\"):\n", " word = word.strip().lower()\n", " result = search_in_json_database(json_database, word, 'us')\n", " if result != 'not exist':\n", " pho = result['pronunciation']\n", " else:\n", " pho = 'not exist'\n", " line = f'*{word}* {pho}'\n", " print(line)\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }