08-20-1434, chapter 4.2

2024-08-20 14:34:23 +08:00
parent 1d5a90b082
commit fbce021c5f
223 changed files with 263 additions and 2170 deletions
--- a/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb
+++ b/1000-hours/public/jupyter-notebooks/edge-tts-valcab-pronounciation.ipynb
@@ -84,10 +84,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 84,
   "id": "4146f92e",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['en-US-GuyNeural', 'en-US-AriaNeural']\n",
+      "important\n",
+      "../audios/important-us-male.mp3 created\n",
+      "../audios/important-us-female.mp3 created\n"
+     ]
+    }
+   ],
   "source": [
    "\n",
    "voices = [\"en-US-GuyNeural\", \"en-US-AriaNeural\", \"en-GB-RyanNeural\", \"en-GB-LibbyNeural\"]\n",
@@ -101,9 +112,7 @@
    "    print(voices)\n",
    "\n",
    "words = \"\"\"\n",
-    "hello,\n",
-    "heat,\n",
-    "high,\n",
+    "important\n",
    "\"\"\"\n",
    "\n",
    "for word in words.strip().split(','):\n",
@@ -117,7 +126,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 63,
+   "execution_count": 69,
   "id": "2d46cde4",
   "metadata": {},
   "outputs": [],
@@ -137,17 +146,18 @@
    "        ) as response:\n",
    "            response.stream_to_file(path)\n",
    "        \n",
-    "sentence = \"The explanation you gave was clear but I need a more detailed explanation.\"\n",
+    "sentence = \"It's a very important aspect\"\n",
    "\n",
    "# remove all punctuation at the end of sentence,\n",
    "# replace all spaces and punctuations in the sentence with dash\n",
-    "audio_filename_openai = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_openai.mp3'\n",
-    "audio_filename_msedge = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_msedge.mp3'\n",
+    "# audio_filename_openai = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_openai.mp3'\n",
+    "# audio_filename_msedge = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_msedge.mp3'\n",
+    "audio_filename_openai = sentence.rstrip(\",.?!\").translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\")\n",
+    "audio_filename_msedge = sentence.rstrip(\",.?!\").translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\")\n",
    "# get_openai_tts_audio(sentence, audio_filename_openai, performer='alloy')\n",
    "# await generate_edge_tts_audio(sentence, audio_filename_msedge, voice=\"en-US-GuyNeural\", verbose=True, overwrite=True, play=True)\n",
-    "\n",
    "for voice in [\"alloy\", \"nova\"]:\n",
-    "    get_openai_tts_audio(sentence, f'../audios/{sentence.replace(\" \", \"-\")}-{voice}.mp3', performer=voice)\n"
+    "    get_openai_tts_audio(sentence, f'../audios/{audio_filename_openai}-{voice}.mp3', performer=voice)\n"
   ]
  },
  {
@@ -261,6 +271,62 @@
    "* voice = \"en-CA-ClaraNeural\" (Female)\n",
    "* voice = \"en-CA-LiamNeural\" (Male)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "id": "215d423d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<_io.BufferedRandom name='../audios/The-art-of-focus-in-our-whirlwind-existence-can-sometimes-feel-like-searching-for-a-needle-in-a-haystack-all-strong.mp3'>"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "text = \"\"\"\n",
+    "The art of focus in our whirlwind existence can sometimes feel like searching for a needle in a haystack\n",
+    "\"\"\"\n",
+    "\n",
+    "# 1 second silence with pydub\n",
+    "from pydub import AudioSegment\n",
+    "sentence = AudioSegment.silent(duration=1000)\n",
+    "\n",
+    "for word in text.strip().split(' '):\n",
+    "    w = word.strip().lower()\n",
+    "    if w == \"a\":\n",
+    "        w = \"uh\"\n",
+    "    if len(w) > 0:\n",
+    "        filename = f'../audios/temp-{w.replace(\" \", \"-\")}-{regions[i]}-{genders[i]}.mp3'\n",
+    "        get_openai_tts_audio(w, filename, performer=\"alloy\")\n",
+    "        sentence += AudioSegment.from_file(filename) + AudioSegment.silent(duration=200)\n",
+    "        # remove the temp file\n",
+    "        os.remove(filename)\n",
+    "sentence += AudioSegment.silent(duration=1000)\n",
+    "# save the sentence as a single audio file\n",
+    "sentence.export(f'../audios/{text.strip().replace(\" \",\"-\")}-all-strong.mp3', format='mp3')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "id": "5a718cf9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"\"\"\n",
+    "The art of focus in our whirlwind existence can sometimes feel like searching for a needle in a haystack\n",
+    "\"\"\"\n",
+    "filename = f'../audios/{text.strip().replace(\" \",\"-\")}-natural.mp3'\n",
+    "get_openai_tts_audio(text, filename, performer=\"alloy\")"
+   ]
  }
 ],
 "metadata": {