08-20-1635, chapter 4.2
This commit is contained in:
@@ -0,0 +1,127 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed')).History will not be written to the database.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import vlc\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"# 假设你的 JSON 数据库是一个 JSON 文件,我们将从文件中加载数据\n",
|
||||
"# 如果 JSON 数据在内存中或其他格式,你可能需要修改这部分代码\n",
|
||||
"def load_json_database(file_path):\n",
|
||||
" records = []\n",
|
||||
" with open(file_path, 'r') as file:\n",
|
||||
" for line in file:\n",
|
||||
" try:\n",
|
||||
" record = json.loads(line)\n",
|
||||
" records.append(record)\n",
|
||||
" except json.JSONDecodeError as e:\n",
|
||||
" print(f\"Error parsing JSON: {e}\")\n",
|
||||
" return records\n",
|
||||
"\n",
|
||||
"# The rest of the code remains the same...\n",
|
||||
"\n",
|
||||
"# 在 JSON 数据库中检索 word\n",
|
||||
"def search_in_json_database(database, search_word, region):\n",
|
||||
" for record in database:\n",
|
||||
" # 检查 word 字段是否匹配\n",
|
||||
" if record.get('word') == search_word:\n",
|
||||
" # 找到匹配项后,获取美式发音信息\n",
|
||||
" pos_items = record.get('pos_items', [])\n",
|
||||
" for pos_item in pos_items:\n",
|
||||
" pronunciations = pos_item.get('pronunciations', [])\n",
|
||||
" for pronunciation in pronunciations:\n",
|
||||
" if pronunciation.get('region') == region:\n",
|
||||
" # 找到美式发音,返回相关信息\n",
|
||||
" return {\n",
|
||||
" 'pronunciation': pronunciation.get('pronunciation'),\n",
|
||||
" 'audio': pronunciation.get('audio')\n",
|
||||
" }\n",
|
||||
" # 如果没有找到匹配的 word 字段,返回 'not exist'\n",
|
||||
" return 'not exist'\n",
|
||||
"\n",
|
||||
"def search_pronunciation(database, pattern):\n",
|
||||
" # Compile the regex pattern\n",
|
||||
" regex = re.compile(pattern)\n",
|
||||
" results = []\n",
|
||||
" # Search in the database\n",
|
||||
" for record in database:\n",
|
||||
" for pos_item in record[\"pos_items\"]:\n",
|
||||
" for pronunciation in pos_item[\"pronunciations\"]:\n",
|
||||
" if regex.search(pronunciation[\"pronunciation\"]):\n",
|
||||
" if pronunciation.get('region') == 'us':\n",
|
||||
" results.append((record[\"word\"], pronunciation[\"pronunciation\"].replace(\".\", \"\"))) # record[\"_id\"][\"$oid\"],\n",
|
||||
" # Return None if no match found\n",
|
||||
" return results\n",
|
||||
"\n",
|
||||
"# 用于测试的 JSON 数据库文件路径\n",
|
||||
"json_db_file_path = '/Users/joker/github/camdict/cam_dict.refined.json'\n",
|
||||
"\n",
|
||||
"json_database = load_json_database(json_db_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"*balls* <span class=\"pho alt\">bɑːlz</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/ballss-us-male.mp3\" data-audio-us-female=\"/audios/ballss-us-female.mp3\"></span>\n",
|
||||
"*cards* <span class=\"pho alt\">kɑrdz</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/cardss-us-male.mp3\" data-audio-us-female=\"/audios/cardss-us-female.mp3\"></span>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"list = \"\"\"\n",
|
||||
"balls,cards\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"for word in list.split(\",\"):\n",
|
||||
" word = word.strip().lower()\n",
|
||||
" result = search_in_json_database(json_database, word, 'us')\n",
|
||||
" if result != 'not exist':\n",
|
||||
" pho = result['pronunciation']\n",
|
||||
" else:\n",
|
||||
" pho = 'not exist'\n",
|
||||
" line = f'*{word}* <span class=\"pho alt\">{pho}</span><span class=\"speak-word-inline\" data-audio-us-male=\"/audios/{word}s-us-male.mp3\" data-audio-us-female=\"/audios/{word}s-us-female.mp3\"></span>'\n",
|
||||
" print(line)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user