diff --git a/src/edge_tts/communicate.py b/src/edge_tts/communicate.py index 7d1561c..98aca84 100644 --- a/src/edge_tts/communicate.py +++ b/src/edge_tts/communicate.py @@ -5,6 +5,7 @@ Communicate package. import json import re +import struct import time import uuid from contextlib import nullcontext @@ -414,12 +415,13 @@ class Communicate: "We received a binary message, but we are not expecting one." ) - yield { - "type": "audio", - "data": received.data[ - received.data.find(b"Path:audio\r\n") + 12 : - ], - } + # See: https://github.com/microsoft/cognitive-services-speech-sdk-js/blob/d071d11d1e9f34d6f79d0ab6114c90eecb02ba1f/src/common.speech/WebsocketMessageFormatter.ts#L46 + header_length = struct.unpack(">H", received.data[:2])[0] + if len(received.data) > header_length + 2: + yield { + "type": "audio", + "data": received.data[header_length + 2 :], + } audio_was_received = True elif received.type == aiohttp.WSMsgType.ERROR: raise WebSocketError(