{ "cells": [ { "cell_type": "markdown", "id": "b94f103d-ac43-4d13-83cb-eb5090220881", "metadata": {}, "source": [ "# EdgeTTS\n", "\n", "https://github.com/rany2/edge-tts\n", "\n", "edge-tts is a Python module that allows you to use Microsoft Edge's online text-to-speech service from within your Python code or using the provided edge-tts or edge-playback command." ] }, { "cell_type": "code", "execution_count": 2, "id": "77deb08f-fec3-4327-b2f9-1c893aacaddc", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: edge-tts in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (6.1.10)\n", "Requirement already satisfied: aiohttp>=3.8.0 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from edge-tts) (3.9.3)\n", "Requirement already satisfied: certifi>=2023.11.17 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from edge-tts) (2024.2.2)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from aiohttp>=3.8.0->edge-tts) (1.3.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from aiohttp>=3.8.0->edge-tts) (23.2.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from aiohttp>=3.8.0->edge-tts) (1.4.1)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from aiohttp>=3.8.0->edge-tts) (6.0.5)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from aiohttp>=3.8.0->edge-tts) (1.9.4)\n", "Requirement already satisfied: idna>=2.0 in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (from yarl<2.0,>=1.0->aiohttp>=3.8.0->edge-tts) (3.4)\n", "Requirement already satisfied: pygame in /opt/homebrew/Caskroom/miniconda/base/lib/python3.12/site-packages (2.5.2)\n" ] } ], "source": [ "%pip install edge-tts" ] }, { "cell_type": "markdown", "id": "2df59a42", "metadata": {}, "source": [ "# English Voices\n", "\n", "* voice = \"en-US-GuyNeural\" (Male)\n", "* voice = \"en-US-AnaNeural\" (Female)\n", "* voice = \"en-US-AndrewNeural\" (Male)\n", "* voice = \"en-US-AriaNeural\" (Female)\n", "* voice = \"en-US-AvaNeural\" (Female)\n", "* voice = \"en-US-BrianNeural\" (Male)\n", "* voice = \"en-US-ChristopherNeural\" (Male)\n", "* voice = \"en-US-EmmaNeural\" (Female)\n", "* voice = \"en-US-EricNeural\" (Male)\n", "* voice = \"en-US-GuyNeural\" (Male)\n", "* voice = \"en-US-JennyNeural\" (Female)\n", "* voice = \"en-US-MichelleNeural\" (Female)\n", "* voice = \"en-US-RogerNeural\" (Male)\n", "* voice = \"en-US-SteffanNeural\" (Male)\n", "* voice = \"en-GB-LibbyNeural\" (Female)\n", "* voice = \"en-GB-MaisieNeural\" (Female)\n", "* voice = \"en-GB-RyanNeural\" (Male)\n", "* voice = \"en-GB-SoniaNeural\" (Female)\n", "* voice = \"en-GB-ThomasNeural\" (Male)\n", "* voice = \"en-AU-NatashaNeural\" (Female)\n", "* voice = \"en-AU-WilliamNeural\" (Male)\n", "* voice = \"en-CA-ClaraNeural\" (Female)\n", "* voice = \"en-CA-LiamNeural\" (Male)" ] }, { "cell_type": "code", "execution_count": 18, "id": "71d35cd9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['en-US-GuyNeural', 'en-US-AriaNeural']\n", "ear\n", "../audios/ear-us-male.mp3 created\n", "../audios/ear-us-female.mp3 created\n", "idea\n", "../audios/idea-us-male.mp3 created\n", "../audios/idea-us-female.mp3 created\n" ] } ], "source": [ "import edge_tts\n", "import os\n", "import pygame\n", "import time\n", "\n", "async def generate_edge_tts_audio(text, file_name, voice='en-US-GuyNeural', style='newscast-formal', verbose=False, play=False, overwrite=False):\n", " communicate = edge_tts.Communicate(text, voice)\n", " # whether file exists?\n", " if os.path.exists(file_name):\n", " if overwrite:\n", " if verbose:\n", " print(f'{file_name} exists, overwriting...')\n", " else:\n", " if verbose:\n", " print(f'{file_name} exists, skipping...')\n", " return\n", " \n", " await communicate.save(file_name)\n", " if play:\n", " pygame.mixer.init()\n", " pygame.mixer.music.load(file_name)\n", " pygame.mixer.music.play()\n", " if verbose:\n", " print(f'{file_name} created')\n", " \n", " time.sleep(1.5)\n", "\n", "\n", "\n", "voices = [\"en-US-GuyNeural\", \"en-US-AriaNeural\", \"en-GB-RyanNeural\", \"en-GB-LibbyNeural\"]\n", "regions = ['us', 'us', 'uk', 'uk']\n", "genders = ['male', 'female', 'male', 'female']\n", "\n", "# only_us = False\n", "only_us = True\n", "if only_us:\n", " voices = voices[:2]\n", " print(voices)\n", "\n", "words = \"\"\"\n", "bed,\n", "guess,\n", "pet,\n", "bread,\n", "dead,\n", "lead,\n", "any,\n", "many,\n", "says,\n", "again,\n", "said,\n", "friend,\n", "apple,\n", "cat,\n", "glass,\n", "calf,\n", "half,\n", "laugh,\n", "draught,\n", "\"\"\"\n", "\n", "for word in words.strip().split(','):\n", " print(word)\n", " for i, voice in enumerate(voices):\n", " w = word.strip().lower()\n", " if len(w) > 0:\n", " filename = f'../audios/{w}-{regions[i]}-{genders[i]}.mp3'\n", " await generate_edge_tts_audio(w, filename, voice=voice, verbose=True, overwrite=False, play=True)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "862f0948", "metadata": {}, "outputs": [], "source": [ "pygame.mixer.init()\n", "pygame.mixer.music.load(OUTPUT_FILE)\n", "pygame.mixer.music.play()" ] }, { "cell_type": "code", "execution_count": 3, "id": "00939422-fbf9-4842-b82a-b6106624c075", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Files created!\n" ] } ], "source": [ "#American\n", "\n", "import asyncio\n", "import edge_tts\n", "import pygame\n", "\n", "TEXT = \"hands\"\n", "Wordlist = TEXT.split(\",\")\n", "\n", "for w in Wordlist:\n", " # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n", " for VOICE in ['en-US-GuyNeural', 'en-US-MichelleNeural']:\n", " w = w.strip()\n", " # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n", " OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n", " communicate = edge_tts.Communicate(w, VOICE)\n", " await communicate.save(OUTPUT_FILE) \n", "\n", "print(\"Files created!\") " ] }, { "cell_type": "code", "execution_count": 2, "id": "cecefa85-b37d-457e-9e99-e582dabdfb9f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Files created!\n" ] } ], "source": [ "#American Sentence\n", "\n", "import asyncio\n", "import edge_tts\n", "import pygame\n", "\n", "TEXT = \"cat,foot\"\n", "Wordlist = TEXT.split(\",\")\n", "\n", "for w in Wordlist:\n", " # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n", " for VOICE in ['en-US-EricNeural', 'en-US-MichelleNeural']:\n", " w = w.strip().replace(' ', '-')\n", " # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n", " OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Male').replace('MichelleNeural', 'Female').replace('en-', '').lower()}.mp3\"\n", " communicate = edge_tts.Communicate(w, VOICE)\n", " await communicate.save(OUTPUT_FILE) \n", "\n", "print(\"Files created!\") " ] }, { "cell_type": "code", "execution_count": null, "id": "93f68a91-7e6b-45ef-932e-533b695e4ac1", "metadata": {}, "outputs": [], "source": [ "# ru-RU-DmitryNeural\n", "# ko-KR-InJoonNeural" ] }, { "cell_type": "code", "execution_count": 51, "id": "474c3f39-11ed-4d0a-b039-63df8b270044", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Files created!\n" ] } ], "source": [ "# British\n", "\n", "import asyncio\n", "import edge_tts\n", "import pygame\n", "\n", "TEXT = \"speak, student, school, frustrate\"\n", "Wordlist = TEXT.split(\",\")\n", "\n", "for w in Wordlist:\n", " # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n", " for VOICE in ['en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n", " w = w.strip()\n", " # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n", " OUTPUT_FILE = f\"{w}-{VOICE.replace('RyanNeural', 'Male').replace('SoniaNeural', 'Female').replace('en-GB-', 'uk-').lower()}.mp3\"\n", " communicate = edge_tts.Communicate(w, VOICE)\n", " await communicate.save(OUTPUT_FILE) \n", "\n", "print(\"Files created!\") " ] }, { "cell_type": "code", "execution_count": 49, "id": "42ecfe3f-f3ac-4fb7-92d0-795d51972051", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Files created!\n" ] } ], "source": [ "#Korean\n", "\n", "import asyncio\n", "import edge_tts\n", "import pygame\n", "\n", "TEXT = \"photo, video\"\n", "Wordlist = TEXT.split(\",\")\n", "\n", "for w in Wordlist:\n", " # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n", " for VOICE in ['ko-KR-InJoonNeural']:\n", " w = w.strip()\n", " # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n", " OUTPUT_FILE = f\"{w}-{VOICE.replace('InJoonNeural', 'kr').replace('SoniaNeural', 'Female').replace('ko-KR', 'ko').lower()}.mp3\"\n", " communicate = edge_tts.Communicate(w, VOICE)\n", " await communicate.save(OUTPUT_FILE) \n", "\n", "print(\"Files created!\") " ] }, { "cell_type": "code", "execution_count": 55, "id": "95b12e63-f92d-49b4-bd51-a2b9d059fd52", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Files created!\n" ] } ], "source": [ "#Chinese\n", "\n", "import asyncio\n", "import edge_tts\n", "import pygame\n", "\n", "TEXT = \"room\"\n", "Wordlist = TEXT.split(\",\")\n", "\n", "for w in Wordlist:\n", " # for VOICE in ['en-US-GuyNeural', 'en-US-JennyNeural', 'en-GB-RyanNeural', 'en-GB-SoniaNeural']:\n", " for VOICE in ['zh-CN-liaoning-XiaobeiNeural']:\n", " w = w.strip()\n", " # OUTPUT_FILE = f\"{w}-{VOICE.replace('EricNeural', 'Guy-Male').replace('JennyNeural', 'Jenny-Female').replace('RyanNeural', 'Ryan-Male').replace('SoniaNeural', 'Sonia-Female').lower()}.mp3\"\n", " OUTPUT_FILE = f\"{w}-{VOICE.replace('liaoning-XiaobeiNeural', 'zh').replace('SoniaNeural', 'Female').replace('zh-CN', 'cn').lower()}.mp3\"\n", " communicate = edge_tts.Communicate(w, VOICE)\n", " await communicate.save(OUTPUT_FILE) \n", "\n", "print(\"Files created!\") " ] }, { "cell_type": "code", "execution_count": null, "id": "2128587f-e6c8-488d-8c40-6c958b9c735e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }