From 465f5cdb0fdf630913d3ae46b86a9c8cd5851c49 Mon Sep 17 00:00:00 2001 From: rany Date: Sat, 22 May 2021 20:02:09 +0300 Subject: [PATCH] Improvements --- edge-tts.py | 96 +++++++++++++++++------------------------------------ 1 file changed, 31 insertions(+), 65 deletions(-) diff --git a/edge-tts.py b/edge-tts.py index edb7c05..b6da75a 100755 --- a/edge-tts.py +++ b/edge-tts.py @@ -5,58 +5,25 @@ import uuid import signal import argparse import urllib.request -import websocket # pip install websocket-client +import asyncio +import ssl +import websockets +import unicodedata from email.utils import formatdate from xml.sax.saxutils import escape -try: - import thread -except ImportError: - import _thread as thread trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4' +ssl_context = ssl.create_default_context() voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken wsUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken def debug(msg, fd=sys.stderr): - if DEBUG: - print(msg, file=fd) - -def terminator(signo, stack_frame): - sys.exit() + if DEBUG: print(msg, file=fd) +def terminator(signo, stack_frame): sys.exit() signal.signal(signal.SIGINT, terminator) signal.signal(signal.SIGTERM, terminator) - -def removeIncompatibleControlChars(text): - return text.replace(chr(9), " ").replace(chr(13), " ").replace(chr(32), " ") - -def connectId(): - return str(uuid.uuid4()).replace("-", "") - -def on_message(ws, m): - m = m.encode() if type(m) is not bytes else m - debug("Received %s" % m) - if b'turn.end' in m: - ws.close() - elif b'Path:audio\r\n' in m: - sys.stdout.buffer.write(m.split(b'Path:audio\r\n')[1]) - """ - elif b'"Type": "WordBoundary",\n' in m: - print(m, file=sys.stderr) - """ - -def on_open(ws): - def run(*args): - message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n' - message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n' - ws.send(message) - debug("Sent %s" % message) - message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n' - message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n' - message+="" - message+="" + "" + escape(text) + '' - ws.send(message) - debug("Sent %s" % message) - thread.start_new_thread(run, ()) +def connectId(): return str(uuid.uuid4()).replace("-", "") +def removeIncompatibleControlChars(s): return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C") def list_voices(): with urllib.request.urlopen(voiceList) as url: @@ -73,11 +40,26 @@ def list_voices(): print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key])) print() -def run_tts(): - ws = websocket.WebSocketApp(wsUrl, - on_open = on_open, - on_message = on_message) - ws.run_forever() +async def run_tts(): + async with websockets.connect(wsUrl, ssl=ssl_context) as ws: + message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n' + message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n' + await ws.send(message) + debug("> %s" % message) + message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n' + message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n' + message+="" + message+="" + "" + escape(text) + '' + await ws.send(message) + debug("> %s" % message) + while True: + recv = await ws.recv() + recv = recv.encode() if type(recv) is not bytes else recv + debug("< %s" % recv) + if b'turn.end' in recv: + break + elif b'Path:audio\r\n' in recv: + sys.stdout.buffer.write(recv.split(b'Path:audio\r\n')[1]) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Microsoft Edge's Online TTS Reader") @@ -92,7 +74,6 @@ if __name__ == "__main__": parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%") parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true') parser.add_argument('-w', '--disable-word-boundary', help="disable word boundary (not implemented but set)", action='store_false') - parser.add_argument('-S', '--dont-split-sentences', help="sends entire text as is (careful because limit is unknown)", action='store_true') parser.add_argument('-D', '--debug', help="some debugging", action='store_true') args = parser.parse_args() DEBUG = args.debug @@ -115,22 +96,7 @@ if __name__ == "__main__": volumeString = args.volume sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false' wordBoundaryEnabled = 'true' if args.disable_word_boundary else 'false' - if not args.dont_split_sentences: - try: - from nltk.tokenize import sent_tokenize - debug("Was able to load nltk module") - except Exception as e: - print("You need nltk for sentence splitting.", file=sys.stderr) - print("If you can't install it you could use the --dont-split-sentences flag.", file=sys.stderr) - debug("Exception was %s %s" % (e.message, e.args)) - sys.exit(1) - debug("Starting!") - for text in sent_tokenize(removeIncompatibleControlChars(args.text)): - debug(text) - run_tts() - else: - debug("Split sentences disabled, sending text without splitting of any kind") - text = removeIncompatibleControlChars(args.text) - run_tts() + text = removeIncompatibleControlChars(args.text) + asyncio.get_event_loop().run_until_complete(run_tts()) elif args.list_voices: list_voices()