From 107d635e9220c13d62fc18c8264d2667cf03f767 Mon Sep 17 00:00:00 2001 From: rany Date: Tue, 11 May 2021 12:16:36 +0300 Subject: [PATCH] update --- edge-tts.py | 65 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/edge-tts.py b/edge-tts.py index 0526080..a6b6019 100755 --- a/edge-tts.py +++ b/edge-tts.py @@ -6,7 +6,7 @@ import signal import argparse import urllib.request import websocket # pip install websocket-client -from nltk.tokenize import sent_tokenize +from email.utils import formatdate from xml.sax.saxutils import quoteattr as escape try: import thread @@ -17,46 +17,63 @@ trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4' voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken wsUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken +def debug(msg, fd=sys.stderr): + if DEBUG: + print(msg, file=fd) + def terminator(signo, stack_frame): sys.exit() signal.signal(signal.SIGINT, terminator) signal.signal(signal.SIGTERM, terminator) +def removeIncompatibleControlChars(text): + return text.replace(chr(9), " ").replace(chr(13), " ").replace(chr(32), " ") + def connectId(): return str(uuid.uuid4()).replace("-", "") def on_message(ws, m): m = m.encode() if type(m) is str else m + debug("Received %s" % m) if b'turn.end' in m: ws.close() elif b'Path:audio\r\n' in m: sys.stdout.buffer.write(m.split(b'Path:audio\r\n')[1]) + """ + elif b'"Type": "WordBoundary",\n' in m: + print(m, file=sys.stderr) + """ def on_open(ws): - # TODO: add X-Timestamp header with value being javascript Date().toString() in US locale def run(*args): - message='Content-Type:application/json; charset=utf-8\r\n\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":' - message+='{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n' + message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n' + message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n' ws.send(message) - message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n' + debug("Sent %s" % message) + message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n' + message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n' message+="" message+="" + "" + escape(text) + '' ws.send(message) + debug("Sent %s" % message) thread.start_new_thread(run, ()) def list_voices(): with urllib.request.urlopen(voiceList) as url: + debug("Loading json from %s" % voiceList) data = json.loads(url.read().decode()) + debug("JSON Loaded") for voice in data: print() for key in voice.keys(): - if key == "Name" or key == "SuggestedCodec" \ - or key == "FriendlyName" or key == "Status": + debug("Processing key %s" % key) + if key in ["Name", "SuggestedCodec", "FriendlyName", "Status"]: + debug("Key %s skipped" % key) continue - print("%s: %s" % (key, voice[key])) + print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key])) + print() def run_tts(): - #websocket.enableTrace(1) ws = websocket.WebSocketApp(wsUrl, on_open = on_open, on_message = on_message) @@ -68,16 +85,19 @@ if __name__ == "__main__": group.add_argument('-t', '--text', help='what TTS will say') group.add_argument('-f', '--file', help='same as --text but read from file') parser.add_argument('-v', '--voice', help='voice for TTS. Default: en-US-AriaNeural', default='en-US-AriaNeural') - parser.add_argument('-c', '--codec', help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. webm-24khz-16bit-mono-opus doesn't work", default='audio-24khz-48kbitrate-mono-mp3') + parser.add_argument('-c', '--codec', help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus", default='audio-24khz-48kbitrate-mono-mp3') group.add_argument('-l', '--list-voices', help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3", action='store_true') parser.add_argument('-p', '--pitch', help="set TTS pitch. Default +0Hz, For more info check https://bit.ly/3eAE5Nx", default="+0Hz") parser.add_argument('-r', '--rate', help="set TTS rate. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%") parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%") parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true') parser.add_argument('-w', '--disable-word-boundary', help="disable word boundary (not implemented but set)", action='store_false') + parser.add_argument('-S', '--dont-split-sentences', help="sends entire text as is (careful because limit is unknown)", action='store_true') + parser.add_argument('-D', '--debug', help="some debugging", action='store_true') args = parser.parse_args() + DEBUG = args.debug - if args.text is not None or args.file is not None: + if (args.text or args.file) is not None: if args.file is not None: with open(args.file, 'r') as file: args.text = file.read() @@ -86,9 +106,24 @@ if __name__ == "__main__": pitchString = args.pitch rateString = args.rate volumeString = args.volume - sentenceBoundaryEnabled = 'True' if args.enable_sentence_boundary else 'False' - wordBoundaryEnabled = 'True' if args.disable_word_boundary else 'False' - for text in sent_tokenize(args.text.replace(chr(9), " ").replace(chr(13), " ").replace(chr(32), " ")): + sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false' + wordBoundaryEnabled = 'true' if args.disable_word_boundary else 'false' + if not args.dont_split_sentences: + try: + from nltk.tokenize import sent_tokenize + debug("Was able to load nltk module") + except Exception as e: + print("You need nltk for sentence splitting.", file=sys.stderr) + print("If you can't install it you could use the --dont-split-sentences flag.", file=sys.stderr) + debug("Exception was %s %s" % (e.message, e.args)) + sys.exit(1) + debug("Starting!") + for text in sent_tokenize(removeIncompatibleControlChars(args.text)): + debug(text) + run_tts() + else: + debug("Split sentences disabled, sending text without splitting of any kind") + text = removeIncompatibleControlChars(args.text) run_tts() - elif args.list_voices is True: + elif args.list_voices: list_voices()