08-20-0951, chapter 4
This commit is contained in:
@@ -29,7 +29,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 49,
|
||||
"id": "71d35cd9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -38,81 +38,17 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['en-US-GuyNeural', 'en-US-AriaNeural']\n",
|
||||
"ma\n",
|
||||
"../audios/ma-us-male.mp3 created\n",
|
||||
"../audios/ma-us-female.mp3 created\n",
|
||||
"hello\n",
|
||||
"../audios/hello-us-male.mp3 created\n",
|
||||
"../audios/hello-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"room\n",
|
||||
"../audios/room-us-male.mp3 created\n",
|
||||
"../audios/room-us-female.mp3 created\n",
|
||||
"heat\n",
|
||||
"../audios/heat-us-male.mp3 created\n",
|
||||
"../audios/heat-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"conversation\n",
|
||||
"../audios/conversation-us-male.mp3 created\n",
|
||||
"../audios/conversation-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"army\n",
|
||||
"../audios/army-us-male.mp3 created\n",
|
||||
"../audios/army-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"mob\n",
|
||||
"../audios/mob-us-male.mp3 created\n",
|
||||
"../audios/mob-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"mom\n",
|
||||
"../audios/mom-us-male.mp3 created\n",
|
||||
"../audios/mom-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"mind\n",
|
||||
"../audios/mind-us-male.mp3 created\n",
|
||||
"../audios/mind-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"night\n",
|
||||
"../audios/night-us-male.mp3 created\n",
|
||||
"../audios/night-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"nine\n",
|
||||
"../audios/nine-us-male.mp3 created\n",
|
||||
"../audios/nine-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"know\n",
|
||||
"../audios/know-us-male.mp3 created\n",
|
||||
"../audios/know-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"knight\n",
|
||||
"../audios/knight-us-male.mp3 created\n",
|
||||
"../audios/knight-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"gnaw\n",
|
||||
"../audios/gnaw-us-male.mp3 created\n",
|
||||
"../audios/gnaw-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"gnome\n",
|
||||
"../audios/gnome-us-male.mp3 created\n",
|
||||
"../audios/gnome-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"anchor\n",
|
||||
"../audios/anchor-us-male.mp3 created\n",
|
||||
"../audios/anchor-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"bank\n",
|
||||
"../audios/bank-us-male.mp3 created\n",
|
||||
"../audios/bank-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"thank\n",
|
||||
"../audios/thank-us-male.mp3 exists, skipping...\n",
|
||||
"../audios/thank-us-female.mp3 exists, skipping...\n",
|
||||
"\n",
|
||||
"bang\n",
|
||||
"../audios/bang-us-male.mp3 created\n",
|
||||
"../audios/bang-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"long\n",
|
||||
"../audios/long-us-male.mp3 created\n",
|
||||
"../audios/long-us-female.mp3 created\n",
|
||||
"\n",
|
||||
"sing\n",
|
||||
"../audios/sing-us-male.mp3 created\n",
|
||||
"../audios/sing-us-female.mp3 created\n",
|
||||
"high\n",
|
||||
"../audios/high-us-male.mp3 created\n",
|
||||
"../audios/high-us-female.mp3 created\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
@@ -143,9 +79,16 @@
|
||||
" if verbose:\n",
|
||||
" print(f'{file_name} created')\n",
|
||||
" \n",
|
||||
" time.sleep(1.5)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" time.sleep(1.5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4146f92e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"voices = [\"en-US-GuyNeural\", \"en-US-AriaNeural\", \"en-GB-RyanNeural\", \"en-GB-LibbyNeural\"]\n",
|
||||
"regions = ['us', 'us', 'uk', 'uk']\n",
|
||||
@@ -158,7 +101,9 @@
|
||||
" print(voices)\n",
|
||||
"\n",
|
||||
"words = \"\"\"\n",
|
||||
"sam\n",
|
||||
"hello,\n",
|
||||
"heat,\n",
|
||||
"high,\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"for word in words.strip().split(','):\n",
|
||||
@@ -170,6 +115,121 @@
|
||||
" await generate_edge_tts_audio(w, filename, voice=voice, verbose=True, overwrite=False, play=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"id": "2d46cde4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_openai_tts_audio(text, path, performer='alloy'):\n",
|
||||
" \n",
|
||||
" from openai import OpenAI\n",
|
||||
" from dotenv import load_dotenv\n",
|
||||
" load_dotenv()\n",
|
||||
" client = OpenAI(\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" with client.audio.speech.with_streaming_response.create(\n",
|
||||
" model=\"tts-1\",\n",
|
||||
" voice=performer,\n",
|
||||
" input=text.strip()\n",
|
||||
" ) as response:\n",
|
||||
" response.stream_to_file(path)\n",
|
||||
" \n",
|
||||
"sentence = \"The explanation you gave was clear but I need a more detailed explanation.\"\n",
|
||||
"\n",
|
||||
"# remove all punctuation at the end of sentence,\n",
|
||||
"# replace all spaces and punctuations in the sentence with dash\n",
|
||||
"audio_filename_openai = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_openai.mp3'\n",
|
||||
"audio_filename_msedge = sentence.translate(str.maketrans(' ,.?!', '-----')).strip().replace(\"--\", \"-\") + '_msedge.mp3'\n",
|
||||
"# get_openai_tts_audio(sentence, audio_filename_openai, performer='alloy')\n",
|
||||
"# await generate_edge_tts_audio(sentence, audio_filename_msedge, voice=\"en-US-GuyNeural\", verbose=True, overwrite=True, play=True)\n",
|
||||
"\n",
|
||||
"for voice in [\"alloy\", \"nova\"]:\n",
|
||||
" get_openai_tts_audio(sentence, f'../audios/{sentence.replace(\" \", \"-\")}-{voice}.mp3', performer=voice)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"id": "7f219eb1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from openai import OpenAI\n",
|
||||
"import os\n",
|
||||
"import IPython\n",
|
||||
"from datetime import datetime\n",
|
||||
"from mutagen.mp3 import MP3\n",
|
||||
"from mutagen.id3 import ID3, APIC, TPE1, TALB, TCON\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from pydub import AudioSegment\n",
|
||||
"\n",
|
||||
"load_dotenv()\n",
|
||||
"client = OpenAI(\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"def get_openai_tts_audio(text, filename, performer=\"alloy\"):\n",
|
||||
"\n",
|
||||
" # check artwork.png and ending.mp3 files exist\n",
|
||||
" if not os.path.isfile('Artwork.png') or not os.path.isfile('ending.mp3'):\n",
|
||||
" print(\"Either Artwork.png or ending.mp3 file not found.\")\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" # split the text into lines\n",
|
||||
" text = markdown_to_text(text).split(\"\\n\")\n",
|
||||
" # remove empty lines\n",
|
||||
" text = [t for t in text if t]\n",
|
||||
"\n",
|
||||
" for t in text:\n",
|
||||
" speech_file_path = f'temp-{text.index(t)}.mp3'\n",
|
||||
" rspd_audio = client.audio.speech.create(\n",
|
||||
" model=\"tts-1\",\n",
|
||||
" voice=performer,\n",
|
||||
" input=t.strip()\n",
|
||||
" ) \n",
|
||||
" rspd_audio.stream_to_file(speech_file_path)\n",
|
||||
" # output a progress percentage \n",
|
||||
" # keep updating within a line\n",
|
||||
" print(f\"\\rprocessing: {round((text.index(t)+1)/len(text)*100)}%\", end='...')\n",
|
||||
" print(\"\\n\")\n",
|
||||
"\n",
|
||||
" # create an audio of 1 second of silence\n",
|
||||
" temp_audio = AudioSegment.silent(duration=1000)\n",
|
||||
" for t in text:\n",
|
||||
" seg = AudioSegment.from_file(f'temp-{text.index(t)}.mp3')\n",
|
||||
" temp_audio += seg + AudioSegment.silent(duration=1500)\n",
|
||||
" # delete the temp file\n",
|
||||
" os.remove(f'temp-{text.index(t)}.mp3')\n",
|
||||
" temp_audio.export('~temp.mp3', format='mp3')\n",
|
||||
" speech = AudioSegment.from_file('~temp.mp3')\n",
|
||||
" ending = AudioSegment.from_file('ending.mp3')\n",
|
||||
" combined = speech + ending\n",
|
||||
" os.remove('~temp.mp3')\n",
|
||||
" if filename:\n",
|
||||
" # if filename has no extension, add .mp3\n",
|
||||
" if filename.endswith('.mp3'):\n",
|
||||
" speech_file_path = filename\n",
|
||||
" else:\n",
|
||||
" speech_file_path = f'{filename}.mp3' \n",
|
||||
" else:\n",
|
||||
" speech_file_path = f'{datetime.now().strftime(\"%Y%m%d_%H%M%S\")}_{performer}.mp3'\n",
|
||||
" combined.export(speech_file_path, format='mp3')\n",
|
||||
" print(f\"Audio file saved as {speech_file_path}\")\n",
|
||||
"\n",
|
||||
" image_file = 'Artwork.png'\n",
|
||||
" artist = 'tts'\n",
|
||||
" album = 'Daily Speech Training'\n",
|
||||
" genre = 'SPEECH'\n",
|
||||
"\n",
|
||||
" add_metadata(speech_file_path, image_file, artist, album, genre)\n",
|
||||
" IPython.display.Audio(speech_file_path)\n",
|
||||
"\n",
|
||||
" return f'{speech_file_path} created successfully.'\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2df59a42",
|
||||
|
||||
Reference in New Issue
Block a user