diff --git a/setup.cfg b/setup.cfg
index 5285fab..b470dbe 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = edge-tts
-version = 1.1.3
+version = 1.1.4
 author = rany
 author_email = ranygh@riseup.net
 description = Microsoft Edge's TTS
diff --git a/src/edgeTTS/__init__.py b/src/edgeTTS/__init__.py
index ccfbdee..bc58236 100755
--- a/src/edgeTTS/__init__.py
+++ b/src/edgeTTS/__init__.py
@@ -63,7 +63,6 @@ def bool_to_lower_str(x): return 'true' if x else 'false'
 async def run_tts(msg, sentenceBoundary=False, wordBoundary=False, codec="audio-24khz-48kbitrate-mono-mp3"):
     sentenceBoundary = bool_to_lower_str(sentenceBoundary)
     wordBoundary = bool_to_lower_str(wordBoundary)
-    logging.debug("Doing %s!" % msg)
     # yes, the connectid() in websockets.connect is different
     async with websockets.connect(
         wssUrl + "&ConnectionId=" + connectId(),
@@ -80,16 +79,22 @@ async def run_tts(msg, sentenceBoundary=False, wordBoundary=False, codec="audio-
         message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
         message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundary+'","wordBoundaryEnabled":"'+wordBoundary+'"},"outputFormat":"' + codec + '"}}}}\r\n'
         await ws.send(message)
-        logging.debug("> %s" % message)
         await ws.send(msg)
-        logging.debug("> %s" % msg)
+        download = False
         async for recv in ws:
-            recv = recv.encode('utf-8') if type(recv) is not bytes else recv
-            logging.debug("< %s" % recv)
-            if b'turn.end' in recv:
-                await ws.close()
-            elif b'Path:audio\r\n' in recv:
-                yield b"".join(recv.split(b'Path:audio\r\n')[1:])
+            if type(recv) is str:
+                if 'turn.start' in recv:
+                    download = True
+                elif 'turn.end' in recv:
+                    download = False
+                    await ws.close()
+                # TODO: add some sort of captioning based on audio:metadata. It's just JSON with offset.
+                # WordBoundary is the only thing supported. SentenceBoundary does nothing.
+                #elif 'audio.metadata' in recv:
+                #    print("".join(recv.split('Path:audio.metadata\r\n\r\n')[1:]), file=sys.stderr)
+            elif type(recv) is bytes:
+                if download:
+                    yield b"".join(recv.split(b'Path:audio\r\n')[1:])
 
 # Based on https://github.com/pndurette/gTTS/blob/6d9309f05b3ad26ca356654732f3b5b9c3bec538/gtts/utils.py#L13-L54
 # Modified to measure based on bytes rather than number of characters