Improve message handling

2021-06-06 20:12:49 +03:00
parent 65cb204f97
commit 9f8a965a1f
2 changed files with 15 additions and 10 deletions
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = edge-tts
-version = 1.1.3
+version = 1.1.4
 author = rany
 author_email = ranygh@riseup.net
 description = Microsoft Edge's TTS
--- a/src/edgeTTS/init.py
+++ b/src/edgeTTS/init.py
@@ -63,7 +63,6 @@ def bool_to_lower_str(x): return 'true' if x else 'false'
 async def run_tts(msg, sentenceBoundary=False, wordBoundary=False, codec="audio-24khz-48kbitrate-mono-mp3"):
    sentenceBoundary = bool_to_lower_str(sentenceBoundary)
    wordBoundary = bool_to_lower_str(wordBoundary)
    logging.debug("Doing %s!" % msg)
    # yes, the connectid() in websockets.connect is different
    async with websockets.connect(
        wssUrl + "&ConnectionId=" + connectId(),
@@ -80,16 +79,22 @@ async def run_tts(msg, sentenceBoundary=False, wordBoundary=False, codec="audio-
        message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
        message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundary+'","wordBoundaryEnabled":"'+wordBoundary+'"},"outputFormat":"' + codec + '"}}}}\r\n'
        await ws.send(message)
        logging.debug("> %s" % message)
        await ws.send(msg)
-        logging.debug("> %s" % msg)
+        download = False
        async for recv in ws:
-            recv = recv.encode('utf-8') if type(recv) is not bytes else recv
+            if type(recv) is str:
-            logging.debug("< %s" % recv)
+                if 'turn.start' in recv:
-            if b'turn.end' in recv:
+                    download = True
-                await ws.close()
+                elif 'turn.end' in recv:
-            elif b'Path:audio\r\n' in recv:
+                    download = False
-                yield b"".join(recv.split(b'Path:audio\r\n')[1:])
+                    await ws.close()
                # TODO: add some sort of captioning based on audio:metadata. It's just JSON with offset.
                # WordBoundary is the only thing supported. SentenceBoundary does nothing.
                #elif 'audio.metadata' in recv:
                #    print("".join(recv.split('Path:audio.metadata\r\n\r\n')[1:]), file=sys.stderr)
            elif type(recv) is bytes:
                if download:
                    yield b"".join(recv.split(b'Path:audio\r\n')[1:])
 # Based on https://github.com/pndurette/gTTS/blob/6d9309f05b3ad26ca356654732f3b5b9c3bec538/gtts/utils.py#L13-L54
 # Modified to measure based on bytes rather than number of characters