From 4d924320c9aebd3a42e25b9354a7e2138cccbb45 Mon Sep 17 00:00:00 2001 From: rany Date: Tue, 25 May 2021 16:01:05 +0300 Subject: [PATCH] Fix message splitting based on bytes not chars count --- edge-tts.py | 43 ++++++++++++------------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/edge-tts.py b/edge-tts.py index a5c49e7..36ad326 100755 --- a/edge-tts.py +++ b/edge-tts.py @@ -38,7 +38,7 @@ def removeIncompatibleControlChars(s): def list_voices(): with urllib.request.urlopen(voiceList) as url: debug("Loading json from %s" % voiceList) - data = json.loads(url.read().decode()) + data = json.loads(url.read().decode('utf-8')) debug("JSON Loaded") for voice in data: print() @@ -69,37 +69,20 @@ async def run_tts(msg): debug("> %s" % message) await ws.send(msg) debug("> %s" % msg) - while True: - recv = await ws.recv() - recv = recv.encode() if type(recv) is not bytes else recv + async for recv in ws: + recv = recv.encode('utf-8') if type(recv) is not bytes else recv debug("< %s" % recv) if b'turn.end' in recv: - break + await ws.close() elif b'Path:audio\r\n' in recv: sys.stdout.buffer.write(recv.split(b'Path:audio\r\n')[1]) -# From https://github.com/pndurette/gTTS/blob/6d9309f05b3ad26ca356654732f3b5b9c3bec538/gtts/utils.py#L13-L54 +# Based on https://github.com/pndurette/gTTS/blob/6d9309f05b3ad26ca356654732f3b5b9c3bec538/gtts/utils.py#L13-L54 +# Modified to measure based on bytes rather than number of characters def _minimize(the_string, delim, max_size): - """Recursively split a string in the largest chunks - possible from the highest position of a delimiter all the way - to a maximum size - Args: - the_string (string): The string to split. - delim (string): The delimiter to split on. - max_size (int): The maximum size of a chunk. - Returns: - list: the minimized string in tokens - Every chunk size will be at minimum ``the_string[0:idx]`` where ``idx`` - is the highest index of ``delim`` found in ``the_string``; and at maximum - ``the_string[0:max_size]`` if no ``delim`` was found in ``the_string``. - In the latter case, the split will occur at ``the_string[max_size]`` - which can be any character. The function runs itself again on the rest of - ``the_string`` (``the_string[idx:]``) until no chunk is larger than - ``max_size``. - """ - # Remove `delim` from start of `the_string` - # i.e. prevent a recursive infinite loop on `the_string[0:0]` - # if `the_string` starts with `delim` and is larger than `max_size` + # Make sure we are measuring based on bytes + the_string = the_string.encode('utf-8') if type(the_string) is str else the_string + if the_string.startswith(delim): the_string = the_string[len(delim):] @@ -154,14 +137,12 @@ if __name__ == "__main__": volumeString = args.volume sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false' wordBoundaryEnabled = 'true' if args.enable_word_boundary else 'false' - # https://hpbn.co/websocket/ says client must also send a masking key, - # which adds an extra 4 bytes to the header, resulting in 6–14 bytes over overhead if args.custom_ssml: asyncio.get_event_loop().run_until_complete(run_tts(mkssmlmsg(text=args.text, customspeak=True))) else: - overhead = len(mkssmlmsg()) + 14 + overhead = len(mkssmlmsg().encode('utf-8')) wsmax = 65536 - overhead - for text in _minimize(escape(removeIncompatibleControlChars(args.text)), " ", wsmax): - asyncio.get_event_loop().run_until_complete(run_tts(mkssmlmsg(text))) + for text in _minimize(escape(removeIncompatibleControlChars(args.text)), b" ", wsmax): + asyncio.get_event_loop().run_until_complete(run_tts(mkssmlmsg(text.decode('utf-8')))) elif args.list_voices: list_voices()