spellings and examples added.

2024-07-24 15:16:14 +08:00
parent 1fd6498f7a
commit b289738632
844 changed files with 998 additions and 17 deletions
--- a/1000-hours/public/jupyter-notebooks/French-us-female.mp3
+++ b/1000-hours/public/jupyter-notebooks/French-us-female.mp3
--- a/1000-hours/public/jupyter-notebooks/French-us-male.mp3
+++ b/1000-hours/public/jupyter-notebooks/French-us-male.mp3
--- a/1000-hours/public/jupyter-notebooks/sentence-openai.ipynb
+++ b/1000-hours/public/jupyter-notebooks/sentence-openai.ipynb
@@ -51,7 +51,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "id": "603fb48b-2fd5-482c-9c96-38a65fc43824",
   "metadata": {},
   "outputs": [
@@ -59,7 +59,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "/var/folders/k_/t1vf2gd95jbc1j3k49xxdfy80000gn/T/ipykernel_5661/2864483681.py:30: DeprecationWarning: Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead\n",
+      "/var/folders/k_/t1vf2gd95jbc1j3k49xxdfy80000gn/T/ipykernel_59088/953934680.py:30: DeprecationWarning: Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead\n",
      "  rspd_audio.stream_to_file(speech_file_path)\n"
     ]
    }
@@ -78,7 +78,7 @@
    "\n",
    "\n",
    "text = \"\"\"\n",
-    "lookout,seekout\n",
+    "adventure,French\n",
    "\"\"\"\n",
    "# Create audios for each versions (three was set as default previously.)\n",
    "\n",
--- a/1000-hours/public/jupyter-notebooks/spelling-rules.ipynb
+++ b/1000-hours/public/jupyter-notebooks/spelling-rules.ipynb
@@ -0,0 +1,186 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import requests\n",
+    "import json\n",
+    "import vlc\n",
+    "import re\n",
+    "import random\n",
+    "from IPython.display import Audio\n",
+    "\n",
+    "def load_json_database(url):\n",
+    "    records = []\n",
+    "    try:\n",
+    "        response = requests.get(url)\n",
+    "        response.raise_for_status()  # Raise an error for bad status codes\n",
+    "        for line in response.iter_lines(decode_unicode=True):\n",
+    "            if line:\n",
+    "                try:\n",
+    "                    record = json.loads(line)\n",
+    "                    records.append(record)\n",
+    "                except json.JSONDecodeError as e:\n",
+    "                    print(f\"Error parsing JSON: {e}\")\n",
+    "    except requests.exceptions.RequestException as e:\n",
+    "        print(f\"Error fetching data from URL: {e}\")\n",
+    "    return records\n",
+    "\n",
+    "def search_in_json_database(database, search_word, region):\n",
+    "    for record in database:\n",
+    "        # 检查 word 字段是否匹配\n",
+    "        if record.get('word') == search_word:\n",
+    "            # 找到匹配项后，获取美式发音信息\n",
+    "            pos_items = record.get('pos_items', [])\n",
+    "            for pos_item in pos_items:\n",
+    "                pronunciations = pos_item.get('pronunciations', [])\n",
+    "                for pronunciation in pronunciations:\n",
+    "                    if pronunciation.get('region') == region:\n",
+    "                        # 找到美式发音，返回相关信息\n",
+    "                        return {\n",
+    "                            'pronunciation': pronunciation.get('pronunciation'),\n",
+    "                            'audio': pronunciation.get('audio')\n",
+    "                        }\n",
+    "    # 如果没有找到匹配的 word 字段，返回 'not exist'\n",
+    "    return 'not exist'\n",
+    "\n",
+    "url = \"https://raw.githubusercontent.com/zelic91/camdict/main/cam_dict.refined.json\"\n",
+    "\n",
+    "json_database = load_json_database(url)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "### <span class=\"pho\">tr</span>\n",
+      "* **tr**\n",
+      "\t- track <span class=\"pho alt\">træk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/track-us-male.mp3\" data-audio-us-female=\"/audios/us/track-us-female.mp3\"></span>\n",
+      " \t- tree <span class=\"pho alt\">triː</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/tree-us-male.mp3\" data-audio-us-female=\"/audios/us/tree-us-female.mp3\"></span>\n",
+      " \t- trick <span class=\"pho alt\">trɪk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/trick-us-male.mp3\" data-audio-us-female=\"/audios/us/trick-us-female.mp3\"></span>\n",
+      "\n",
+      "### <span class=\"pho\">dr</span>\n",
+      "* **dr**\n",
+      "\t- drive <span class=\"pho alt\">draɪv</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/drive-us-male.mp3\" data-audio-us-female=\"/audios/us/drive-us-female.mp3\"></span>\n",
+      " \t- dream <span class=\"pho alt\">driːm</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/dream-us-male.mp3\" data-audio-us-female=\"/audios/us/dream-us-female.mp3\"></span>\n",
+      " \t- drink <span class=\"pho alt\">drɪŋk</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/drink-us-male.mp3\" data-audio-us-female=\"/audios/us/drink-us-female.mp3\"></span>\n",
+      "\n",
+      "phonetics_not_exist: \n",
+      "\n",
+      "track,tree,trick,drive,dream,drink,\n"
+     ]
+    }
+   ],
+   "source": [
+    "text =\"\"\"\n",
+    "### `ts`\n",
+    "* \"ts\" - clients, students, treats\n",
+    "\n",
+    "### `dz`\n",
+    "* \"ds\" - deeds, records, words\n",
+    "\"\"\"\n",
+    "\n",
+    "words = \"\"\n",
+    "phonetics_not_exist = ''\n",
+    "lines = text.split(\"\\n\")\n",
+    "for line in lines:\n",
+    "    line = line.replace(\" `\", \" <span class=\\\"pho\\\">\")\n",
+    "    line = line.replace(\"`\", \"</span>\")\n",
+    "    if '*' in line:\n",
+    "        line = line.replace('\"', \"**\")\n",
+    "        examples = line.split(\"-\")[1].split(\",\")\n",
+    "        examples = [x.strip() for x in examples]\n",
+    "        line = line.replace(\" - \", \"\\n\")\n",
+    "        # print(examples)\n",
+    "        # wrap examples in span,\n",
+    "        for e in examples:\n",
+    "            # join e in words with ','\n",
+    "            words += e + \",\"\n",
+    "            entry_us = search_in_json_database(json_database, e, 'us')\n",
+    "            if entry_us == 'not exist':\n",
+    "                phonetics = entry_us\n",
+    "                phonetics_not_exist += f'{e},'\n",
+    "            else:\n",
+    "                phonetics = entry_us['pronunciation']    \n",
+    "            wrapped_e = f'\\t- {e} <span class=\"pho alt\">{phonetics}</span> <span class=\"speak-word-inline\" data-audio-us-male=\"/audios/us/{e}-us-male.mp3\" data-audio-us-female=\"/audios/us/{e}-us-female.mp3\"></span>\\n'\n",
+    "            line = line.replace(e, wrapped_e).replace(',', '').strip()\n",
+    "            \n",
+    "\n",
+    "    print(line)\n",
+    "print(f'phonetics_not_exist: {phonetics_not_exist}')\n",
+    "print('\\n'+words)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['track', 'tree', 'trick', 'drive', 'dream', 'drink']\n",
+      "track\n",
+      "tree\n",
+      "trick\n",
+      "drive\n",
+      "dream\n",
+      "drink\n",
+      "Files created!\n"
+     ]
+    }
+   ],
+   "source": [
+    "import asyncio\n",
+    "import edge_tts\n",
+    "import pygame\n",
+    "text = words.rstrip(\",\")\n",
+    "Wordlist = text.split(\",\")\n",
+    "# Wordlist = ['chivvy']\n",
+    "print(Wordlist)\n",
+    "for w in Wordlist:\n",
+    "  # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n",
+    "  for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n",
+    "    w = w.strip()\n",
+    "    # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n",
+    "    OUTPUT_FILE = f\"{w}-{VOICE.replace('GuyNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n",
+    "    communicate = edge_tts.Communicate(w, VOICE)\n",
+    "    await communicate.save(OUTPUT_FILE) \n",
+    "  print(w)\n",
+    "print(\"Files created!\") "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}