From 465f5cdb0fdf630913d3ae46b86a9c8cd5851c49 Mon Sep 17 00:00:00 2001
From: rany <ranygh@riseup.net>
Date: Sat, 22 May 2021 20:02:09 +0300
Subject: [PATCH] Improvements

---
 edge-tts.py | 96 +++++++++++++++++------------------------------------
 1 file changed, 31 insertions(+), 65 deletions(-)
diff --git a/edge-tts.py b/edge-tts.py
index edb7c05..b6da75a 100755
--- a/edge-tts.py
+++ b/edge-tts.py
@@ -5,58 +5,25 @@ import uuid
 import signal
 import argparse
 import urllib.request
-import websocket # pip install websocket-client
+import asyncio
+import ssl
+import websockets
+import unicodedata
 from email.utils import formatdate
 from xml.sax.saxutils import escape
-try:
-	import thread
-except ImportError:
-	import _thread as thread
 
 trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
+ssl_context = ssl.create_default_context()
 voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken
 wsUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken
 
 def debug(msg, fd=sys.stderr):
-	if DEBUG:
-		print(msg, file=fd)
-
-def terminator(signo, stack_frame):
-	sys.exit()
+	if DEBUG: print(msg, file=fd)
+def terminator(signo, stack_frame): sys.exit()
 signal.signal(signal.SIGINT, terminator)
 signal.signal(signal.SIGTERM, terminator)
-
-def removeIncompatibleControlChars(text):
-	return text.replace(chr(9), " ").replace(chr(13), " ").replace(chr(32), " ")
-
-def connectId():
-	return str(uuid.uuid4()).replace("-", "")
-
-def on_message(ws, m):
-	m = m.encode() if type(m) is not bytes else m
-	debug("Received %s" % m)
-	if b'turn.end' in m:
-		ws.close()
-	elif b'Path:audio\r\n' in m:
-		sys.stdout.buffer.write(m.split(b'Path:audio\r\n')[1])
-	"""
-	elif b'"Type": "WordBoundary",\n' in m:
-		print(m, file=sys.stderr)
-	"""
-
-def on_open(ws):
-	def run(*args):
-		message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
-		message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
-		ws.send(message)
-		debug("Sent %s" % message)
-		message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n'
-		message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n'
-		message+="<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
-		message+="<voice  name='" + voice + "'>" + "<prosody pitch='" + pitchString + "' rate ='" + rateString + "' volume='" + volumeString + "'>" + escape(text) + '</prosody></voice></speak>'
-		ws.send(message)
-		debug("Sent %s" % message)
-	thread.start_new_thread(run, ())
+def connectId(): return str(uuid.uuid4()).replace("-", "")
+def removeIncompatibleControlChars(s): return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C")
 
 def list_voices():
 	with urllib.request.urlopen(voiceList) as url:
@@ -73,11 +40,26 @@ def list_voices():
 				print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
 	print()
 
-def run_tts():
-	ws = websocket.WebSocketApp(wsUrl,
-		on_open = on_open,
-		on_message = on_message)
-	ws.run_forever()
+async def run_tts():
+	async with websockets.connect(wsUrl, ssl=ssl_context) as ws:
+		message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
+		message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
+		await ws.send(message)
+		debug("> %s" % message)
+		message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n'
+		message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n'
+		message+="<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
+		message+="<voice  name='" + voice + "'>" + "<prosody pitch='" + pitchString + "' rate ='" + rateString + "' volume='" + volumeString + "'>" + escape(text) + '</prosody></voice></speak>'
+		await ws.send(message)
+		debug("> %s" % message)
+		while True:
+			recv = await ws.recv()
+			recv = recv.encode() if type(recv) is not bytes else recv
+			debug("< %s" % recv)
+			if b'turn.end' in recv:
+				break
+			elif b'Path:audio\r\n' in recv:
+				sys.stdout.buffer.write(recv.split(b'Path:audio\r\n')[1])
 
 if __name__ == "__main__":
 	parser = argparse.ArgumentParser(description="Microsoft Edge's Online TTS Reader")
@@ -92,7 +74,6 @@ if __name__ == "__main__":
 	parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
 	parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true')
 	parser.add_argument('-w', '--disable-word-boundary', help="disable word boundary (not implemented but set)", action='store_false')
-	parser.add_argument('-S', '--dont-split-sentences', help="sends entire text as is (careful because limit is unknown)", action='store_true')
 	parser.add_argument('-D', '--debug', help="some debugging", action='store_true')
 	args = parser.parse_args()
 	DEBUG = args.debug
@@ -115,22 +96,7 @@ if __name__ == "__main__":
 		volumeString = args.volume
 		sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false'
 		wordBoundaryEnabled = 'true' if args.disable_word_boundary else 'false'
-		if not args.dont_split_sentences:
-			try:
-				from nltk.tokenize import sent_tokenize
-				debug("Was able to load nltk module")
-			except Exception as e:
-				print("You need nltk for sentence splitting.", file=sys.stderr)
-				print("If you can't install it you could use the --dont-split-sentences flag.", file=sys.stderr)
-				debug("Exception was %s %s" % (e.message, e.args))
-				sys.exit(1)
-			debug("Starting!")
-			for text in sent_tokenize(removeIncompatibleControlChars(args.text)):
-				debug(text)
-				run_tts()
-		else:
-			debug("Split sentences disabled, sending text without splitting of any kind")
-			text = removeIncompatibleControlChars(args.text)
-			run_tts()
+		text = removeIncompatibleControlChars(args.text)
+		asyncio.get_event_loop().run_until_complete(run_tts())
 	elif args.list_voices:
 		list_voices()