diff --git a/edge-tts.py b/edge-tts.py index a6e8dc1..a5c49e7 100755 --- a/edge-tts.py +++ b/edge-tts.py @@ -50,11 +50,14 @@ def list_voices(): print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key])) print() -def mkssmlmsg(text=""): +def mkssmlmsg(text="", customspeak=False): message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n' message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n' - message+="" - message+="" + "" + text + '' + if customspeak: + message+=text + else: + message+="" + message+="" + "" + text + '' return message async def run_tts(msg): @@ -120,6 +123,7 @@ if __name__ == "__main__": group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-t', '--text', help='what TTS will say') group.add_argument('-f', '--file', help='same as --text but read from file') + parser.add_argument('-z', '--custom-ssml', help='treat text as ssml to send. For more info check https://bit.ly/3fIq13S', action='store_true') parser.add_argument('-v', '--voice', help='voice for TTS. Default: en-US-AriaNeural', default='en-US-AriaNeural') parser.add_argument('-c', '--codec', help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus", default='audio-24khz-48kbitrate-mono-mp3') group.add_argument('-l', '--list-voices', help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3", action='store_true') @@ -152,9 +156,12 @@ if __name__ == "__main__": wordBoundaryEnabled = 'true' if args.enable_word_boundary else 'false' # https://hpbn.co/websocket/ says client must also send a masking key, # which adds an extra 4 bytes to the header, resulting in 6–14 bytes over overhead - overhead = len(mkssmlmsg()) + 14 - wsmax = 65536 - overhead - for text in _minimize(escape(removeIncompatibleControlChars(args.text)), " ", wsmax): - asyncio.get_event_loop().run_until_complete(run_tts(mkssmlmsg(text))) + if args.custom_ssml: + asyncio.get_event_loop().run_until_complete(run_tts(mkssmlmsg(text=args.text, customspeak=True))) + else: + overhead = len(mkssmlmsg()) + 14 + wsmax = 65536 - overhead + for text in _minimize(escape(removeIncompatibleControlChars(args.text)), " ", wsmax): + asyncio.get_event_loop().run_until_complete(run_tts(mkssmlmsg(text))) elif args.list_voices: list_voices()