Update to edge-tts 4.0.0

This commit is contained in:
rany
2021-12-07 22:09:43 +02:00
parent 756766fe6e
commit 4fcecddaf0
16 changed files with 207 additions and 101 deletions

3
build.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
set -x
exec python3 setup.py sdist bdist_wheel

View File

@@ -1,6 +1,12 @@
#!/bin/sh #!/bin/sh
set -e
rm -rf build dist src/*.egg-info set -ex
python3 setup.py sdist bdist_wheel
twine upload dist/* ./clean.sh
rm -rf build dist src/*.egg-info
./build.sh
./publish.sh
./clean.sh
exit 0

3
clean.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
set -x
exec rm -rf build dist src/*.egg-info

View File

@@ -1,21 +1,27 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Example Python script that shows how to use edge-tts as a module """
Example Python script that shows how to use edge-tts as a module
"""
import asyncio import asyncio
import tempfile import tempfile
from playsound import playsound from playsound import playsound
import edgeTTS import edge_tts
async def main(): async def main():
communicate = edgeTTS.Communicate() """
Main function
"""
communicate = edge_tts.Communicate()
ask = input("What do you want TTS to say? ") ask = input("What do you want TTS to say? ")
with tempfile.NamedTemporaryFile() as fp: with tempfile.NamedTemporaryFile() as temporary_file:
async for i in communicate.run(ask): async for i in communicate.run(ask):
if i[2] is not None: if i[2] is not None:
fp.write(i[2]) temporary_file.write(i[2])
playsound(fp.name) playsound(temporary_file.name)
if __name__ == "__main__": if __name__ == "__main__":

3
publish.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/bin/sh
set -x
exec twine upload dist/*

View File

@@ -1,6 +1,6 @@
[metadata] [metadata]
name = edge-tts name = edge-tts
version = 3.0.2 version = 4.0.0
author = rany author = rany
author_email = ranygh@riseup.net author_email = ranygh@riseup.net
description = Microsoft Edge's TTS description = Microsoft Edge's TTS
@@ -27,5 +27,5 @@ where=src
[options.entry_points] [options.entry_points]
console_scripts = console_scripts =
edge-tts = edgeTTS.__main__:main edge-tts = edge_tts.__main__:main
edge-playback = edgePlayback.__init__:main edge-playback = edge_playback.__init__:main

View File

@@ -1,6 +0,0 @@
#!/usr/bin/env python3
from edgePlayback.__init__ import *
if __name__ == "__main__":
main()

View File

@@ -1,5 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""
Playback TTS with subtitles using edge-tts and mpv.
"""
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
@@ -7,13 +11,16 @@ from shutil import which
def main(): def main():
"""
Main function.
"""
if which("mpv") and which("edge-tts"): if which("mpv") and which("edge-tts"):
with tempfile.NamedTemporaryFile() as media: with tempfile.NamedTemporaryFile() as media:
with tempfile.NamedTemporaryFile() as subtitle: with tempfile.NamedTemporaryFile() as subtitle:
print() print()
print("Media file %s" % media.name) print(f"Media file {media.name}")
print("Subtitle file %s\n" % subtitle.name) print(f"Subtitle file {subtitle.name}\n")
p = subprocess.Popen( with subprocess.Popen(
[ [
"edge-tts", "edge-tts",
"-w", "-w",
@@ -23,17 +30,18 @@ def main():
subtitle.name, subtitle.name,
] ]
+ sys.argv[1:] + sys.argv[1:]
) ) as process:
p.communicate() process.communicate()
p = subprocess.Popen(
with subprocess.Popen(
[ [
"mpv", "mpv",
"--keep-open=yes", "--keep-open=yes",
"--sub-file=" + subtitle.name, f"--sub-file={subtitle.name}",
media.name, media.name,
] ]
) ) as process:
p.communicate() process.communicate()
else: else:
print("This script requires mpv and edge-tts.") print("This script requires mpv and edge-tts.")

View File

@@ -0,0 +1,10 @@
#!/usr/bin/env python3
"""
This is the main file for the edge_playback package.
"""
from edge_playback.__init__ import main
if __name__ == "__main__":
main()

View File

@@ -1,3 +1,7 @@
"""
__init__ for edge_tts
"""
from .communicate import Communicate from .communicate import Communicate
from .list_voices import list_voices from .list_voices import list_voices
from .submaker import SubMaker from .submaker import SubMaker

View File

@@ -1,3 +1,7 @@
"""
__main__ for edge_tts.
"""
from .util import main from .util import main
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -53,17 +53,14 @@ def remove_incompatible_characters(string):
if isinstance(string, bytes): if isinstance(string, bytes):
string = string.decode("utf-8") string = string.decode("utf-8")
cleaned_string = "" string = list(string)
for character in string:
character_code = ord(character) for idx in range(len(string)): # pylint: disable=consider-using-enumerate
if ( code = ord(string[idx])
(0 <= character_code <= 8) if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
or (11 <= character_code <= 12) string[idx] = " "
or (14 <= character_code <= 31)
): return "".join(string)
character = " "
cleaned_string += character
return cleaned_string
def connect_id(): def connect_id():
@@ -144,7 +141,8 @@ def mkssml(text, voice, pitch, rate, volume):
ssml = ( ssml = (
"<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>" "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>{text}</prosody></voice></speak>" f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
f"{text}</prosody></voice></speak>"
) )
return ssml return ssml
@@ -192,7 +190,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
) )
class Communicate: class Communicate: # pylint: disable=too-few-public-methods
""" """
Class for communicating with the service. Class for communicating with the service.
""" """
@@ -214,7 +212,7 @@ class Communicate:
rate="+0%", rate="+0%",
volume="+0%", volume="+0%",
customspeak=False, customspeak=False,
): ): # pylint: disable=too-many-arguments, too-many-locals
""" """
Runs the Communicate class. Runs the Communicate class.
@@ -266,7 +264,8 @@ class Communicate:
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Accept-Encoding": "gzip, deflate, br", "Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9", "Accept-Language": "en-US,en;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
}, },
) as websocket: ) as websocket:
for message in messages: for message in messages:
@@ -275,18 +274,22 @@ class Communicate:
# Prepare the request to be sent to the service. # Prepare the request to be sent to the service.
# #
# Note that sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed # Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
# to be booleans, but Edge Browser seems to send them as strings and not booleans. # to be booleans, but Edge Browser seems to send them as strings.
# This is a bug in Edge Browser as Azure Cognitive Services actually sends them as #
# booleans and not strings. For now I will send them as booleans unless it causes # This is a bug in Edge as Azure Cognitive Services actually sends them as
# bool and not string. For now I will send them as bool unless it causes
# any problems. # any problems.
# #
# Also pay close attention to double { } in request (escape for Python .format()). # Also pay close attention to double { } in request (escape for f-string).
request = ( request = (
f"X-Timestamp:{self.date}\r\n" f"X-Timestamp:{self.date}\r\n"
"Content-Type:application/json; charset=utf-8\r\n" "Content-Type:application/json; charset=utf-8\r\n"
"Path:speech.config\r\n\r\n" "Path:speech.config\r\n\r\n"
f'{{"context":{{"synthesis":{{"audio":{{"metadataoptions":{{"sentenceBoundaryEnabled":{sentence_boundary},"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"}}}}}}}}\r\n' '{"context":{"synthesis":{"audio":{"metadataoptions":{'
f'"sentenceBoundaryEnabled":{sentence_boundary},'
f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
"}}}}\r\n"
) )
# Send the request to the service. # Send the request to the service.
await websocket.send_str(request) await websocket.send_str(request)

View File

@@ -15,9 +15,7 @@ async def list_voices():
This pulls data from the URL used by Microsoft Edge to return a list of This pulls data from the URL used by Microsoft Edge to return a list of
all available voices. However many more experimental voices are available all available voices. However many more experimental voices are available
than are listed here. than are listed here. (See https://aka.ms/csspeech/voicenames)
(See
https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)
Returns: Returns:
dict: A dictionary of voice attributes. dict: A dictionary of voice attributes.
@@ -29,7 +27,8 @@ async def list_voices():
"Authority": "speech.platform.bing.com", "Authority": "speech.platform.bing.com",
"Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"', "Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
"Sec-CH-UA-Mobile": "?0", "Sec-CH-UA-Mobile": "?0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
"Accept": "*/*", "Accept": "*/*",
"Sec-Fetch-Site": "none", "Sec-Fetch-Site": "none",
"Sec-Fetch-Mode": "cors", "Sec-Fetch-Mode": "cors",

View File

@@ -1,29 +1,68 @@
"""
SubMaker package for the Edge TTS project.
SubMaker is a package that makes the process of creating subtitles with
information provided by the service easier.
"""
import math import math
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
def formatter(offset1, offset2, subdata): def formatter(offset1, offset2, subdata):
data = ( """
formatter returns the timecode and the text of the subtitle.
"""
return (
f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n" f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n"
f"{escape(subdata)}\r\n\r\n" f"{escape(subdata)}\r\n\r\n"
) )
return data
def mktimestamp(time_unit): def mktimestamp(time_unit):
hour = math.floor(time_unit / 10000 / 1000 / 3600) """
minute = math.floor((time_unit / 10000 / 1000 / 60) % 60) mktimestamp returns the timecode of the subtitle.
seconds = (time_unit / 10000 / 1000) % 60
The timecode is in the format of 00:00:00.000.
Returns:
str: The timecode of the subtitle.
"""
hour = math.floor(time_unit / 10 ** 7 / 3600)
minute = math.floor((time_unit / 10 ** 7 / 60) % 60)
seconds = (time_unit / 10 ** 7) % 60
return f"{hour:02d}:{minute:02d}:{seconds:06.3f}" return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
class SubMaker: class SubMaker:
"""
SubMaker class
"""
def __init__(self, overlapping=5): def __init__(self, overlapping=5):
"""
SubMaker constructor.
Args:
overlapping (int): The amount of time in seconds that the
subtitles should overlap.
"""
self.subs_and_offset = [] self.subs_and_offset = []
self.broken_offset = [] self.broken_offset = []
self.overlapping = overlapping * (10 ** 7) self.overlapping = overlapping * (10 ** 7)
def create_sub(self, timestamp, text): def create_sub(self, timestamp, text):
"""
create_sub creates a subtitle with the given timestamp and text
and adds it to the list of subtitles
Args:
timestamp (int): The timestamp of the subtitle.
text (str): The text of the subtitle.
Returns:
None
"""
if len(self.subs_and_offset) >= 2: if len(self.subs_and_offset) >= 2:
if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset): if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset):
self.broken_offset.append(self.subs_and_offset[-2]) self.broken_offset.append(self.subs_and_offset[-2])
@@ -33,6 +72,12 @@ class SubMaker:
self.subs_and_offset.append(text) self.subs_and_offset.append(text)
def generate_subs(self): def generate_subs(self):
"""
generate_subs generates the complete subtitle file.
Returns:
str: The complete subtitle file.
"""
if len(self.subs_and_offset) >= 2: if len(self.subs_and_offset) >= 2:
data = "WEBVTT\r\n\r\n" data = "WEBVTT\r\n\r\n"
old_time_stamp = None old_time_stamp = None

View File

@@ -7,7 +7,54 @@ import argparse
import asyncio import asyncio
import sys import sys
from edgeTTS import Communicate, SubMaker, list_voices from edge_tts import Communicate, SubMaker, list_voices
async def _list_voices():
"""
List available voices.
"""
for idx, voice in enumerate(await list_voices()):
if idx != 0:
print()
for key in voice.keys():
if key in ["SuggestedCodec", "FriendlyName", "Status"]:
continue
# print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
print(f"{key}: {voice[key]}")
async def _tts(args):
tts = Communicate()
subs = SubMaker(args.overlapping)
if args.write_media:
media_file = open(args.write_media, "wb") # pylint: disable=consider-using-with
async for i in tts.run(
args.text,
args.enable_sentence_boundary,
args.enable_word_boundary,
args.codec,
args.voice,
args.pitch,
args.rate,
args.volume,
customspeak=args.custom_ssml,
):
if i[2] is not None:
if not args.write_media:
sys.stdout.buffer.write(i[2])
else:
media_file.write(i[2])
elif i[0] is not None and i[1] is not None:
subs.create_sub(i[0], i[1])
if args.write_media:
media_file.close()
if not args.write_subtitles:
sys.stderr.write(subs.generate_subs())
else:
with open(args.write_subtitles, "w", encoding="utf-8") as file:
file.write(subs.generate_subs())
async def _main(): async def _main():
@@ -24,19 +71,23 @@ async def _main():
parser.add_argument( parser.add_argument(
"-v", "-v",
"--voice", "--voice",
help="voice for TTS. Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", help="voice for TTS. "
"Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)", default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
) )
parser.add_argument( parser.add_argument(
"-c", "-c",
"--codec", "--codec",
help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus. For more info check https://bit.ly/2T33h6S", help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. "
"Another choice is webm-24khz-16bit-mono-opus. "
"For more info check https://bit.ly/2T33h6S",
default="audio-24khz-48kbitrate-mono-mp3", default="audio-24khz-48kbitrate-mono-mp3",
) )
group.add_argument( group.add_argument(
"-l", "-l",
"--list-voices", "--list-voices",
help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3", help="lists available voices. "
"Edge's list is incomplete so check https://bit.ly/2SFq1d3",
action="store_true", action="store_true",
) )
parser.add_argument( parser.add_argument(
@@ -85,6 +136,10 @@ async def _main():
) )
args = parser.parse_args() args = parser.parse_args()
if args.list_voices:
await _list_voices()
sys.exit(0)
if args.text is not None or args.file is not None: if args.text is not None or args.file is not None:
if args.file is not None: if args.file is not None:
# we need to use sys.stdin.read() because some devices # we need to use sys.stdin.read() because some devices
@@ -96,45 +151,8 @@ async def _main():
# logger.debug("reading from %s" % args.file) # logger.debug("reading from %s" % args.file)
with open(args.file, "r", encoding="utf-8") as file: with open(args.file, "r", encoding="utf-8") as file:
args.text = file.read() args.text = file.read()
tts = Communicate()
subs = SubMaker(args.overlapping)
if args.write_media:
media_file = open(args.write_media, "wb")
async for i in tts.run(
args.text,
args.enable_sentence_boundary,
args.enable_word_boundary,
args.codec,
args.voice,
args.pitch,
args.rate,
args.volume,
customspeak=args.custom_ssml,
):
if i[2] is not None:
if not args.write_media:
sys.stdout.buffer.write(i[2])
else:
media_file.write(i[2])
elif i[0] is not None and i[1] is not None:
subs.create_sub(i[0], i[1])
if args.write_media:
media_file.close()
if not args.write_subtitles:
sys.stderr.write(subs.generate_subs())
else:
with open(args.write_subtitles, "w", encoding="utf-8") as file:
file.write(subs.generate_subs())
elif args.list_voices:
for idx, voice in enumerate(await list_voices()):
if idx != 0:
print()
for key in voice.keys(): await _tts(args)
if key in ["SuggestedCodec", "FriendlyName", "Status"]:
continue
# print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
print(f"{key}: {voice[key]}")
def main(): def main():