From 3a4509a942b6484a4b4f7d58bd65c870a4c6d767 Mon Sep 17 00:00:00 2001 From: rany Date: Mon, 10 May 2021 21:55:56 +0300 Subject: [PATCH] first commit --- edge-tts.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100755 edge-tts.py diff --git a/edge-tts.py b/edge-tts.py new file mode 100755 index 0000000..a4b3acb --- /dev/null +++ b/edge-tts.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +import sys +import json +import uuid +import argparse +import urllib.request +import websocket # pip install websocket-client +from xml.sax.saxutils import quoteattr as escape +try: + import thread +except ImportError: + import _thread as thread + +trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4' +voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken +wsUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken + +def connectId(): + return str(uuid.uuid4()).replace("-", "") + +def on_message(ws, m): + m = m.encode() if type(m) is str else m + if b'turn.end' in m: + ws.close() + elif b'Path:audio\r\n' in m: + sys.stdout.buffer.write(m.split(b'Path:audio\r\n')[1]) + +def on_open(ws): + # TODO: add X-Timestamp header with value being javascript Date().toString() in US locale + def run(*args): + message='Content-Type:application/json; charset=utf-8\r\n\r\nPath:speech.config\r\n\r\n{"context":{"synthesis":{"audio":{"metadataoptions":' + message+='{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n' + ws.send(message) + message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n' + message+="" + message+="" + "" + escape(text) + '' + ws.send(message) + thread.start_new_thread(run, ()) + +def list_voices(): + with urllib.request.urlopen(voiceList) as url: + data = json.loads(url.read().decode()) + for voice in data: + print() + for key in voice.keys(): + if key == "Name" or key == "SuggestedCodec" \ + or key == "FriendlyName" or key == "Status": + continue + print("%s: %s" % (key, voice[key])) + +def run_tts(): + #websocket.enableTrace(1) + ws = websocket.WebSocketApp(wsUrl, + on_open = on_open, + on_message = on_message) + ws.run_forever() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Microsoft Edge's Online TTS Reader") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('-t', '--text', help='what TTS will say') + parser.add_argument('-v', '--voice', help='voice for TTS. Default: en-US-AriaNeural', default='en-US-AriaNeural') + parser.add_argument('-c', '--codec', help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. webm-24khz-16bit-mono-opus doesn't work", default='audio-24khz-48kbitrate-mono-mp3') + group.add_argument('-l', '--list-voices', help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3", action='store_true') + parser.add_argument('-p', '--pitch', help="set TTS pitch. Default +0Hz", default="+0Hz") + parser.add_argument('-r', '--rate', help="set TTS rate. Default +0%%", default="+0%") + parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%", default="+0%") + parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary", action='store_true') + parser.add_argument('-w', '--disable-word-boundary', help="disable word boundary", action='store_false') + args = parser.parse_args() + + if args.text is not None: + codec = args.codec + voice = args.voice + pitchString = args.pitch + rateString = args.rate + volumeString = args.volume + sentenceBoundaryEnabled = 'True' if args.enable_sentence_boundary else 'False' + wordBoundaryEnabled = 'True' if args.disable_word_boundary else 'False' + text = args.text + run_tts() + elif args.list_voices is True: + list_voices()