Update edge-tts to version 4.0.11

* Add pylint check for lint.sh * Change overlapping default from 5second to 1second for SubMaker and util.py * Default to WordBoundary for edge-playback (from SentenceBoundary) * Drop SentenceBoundary support (never works properly and too many hacks) - No longer actually supported by Azure's official SDK for a few years already * Stop attempting to correct broken offsets sent back to us by Azure - Fixes never work properly because AI voice sometimes takes odd pauses at the start and sometimes doesn't do so. It's never predictable and cannot be fixed on the library's end. - Solution is for Microsoft to fix the integer overflow bug they are facing in the {Word,Sentence}Boundary offsets. It doesn't affect us until we reach 30min long TTS anyway. * Have edge-tts --list-voices use the configured HTTP proxy * More misc changes and fixes
2022-05-29 18:08:16 +03:00
parent 9a20f1ca90
commit 797d04f182
7 changed files with 53 additions and 67 deletions
--- a/src/edge_tts/communicate.py
+++ b/src/edge_tts/communicate.py
@@ -55,8 +55,8 @@ def remove_incompatible_characters(string):

    string = list(string)

-    for idx in range(len(string)):  # pylint: disable=consider-using-enumerate
-        code = ord(string[idx])
+    for idx, char in enumerate(string):
+        code = ord(char)
        if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
            string[idx] = " "

@@ -193,7 +193,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
    )


-class Communicate:  # pylint: disable=too-few-public-methods
+class Communicate:
    """
    Class for communicating with the service.
    """
@@ -215,7 +215,7 @@ class Communicate:  # pylint: disable=too-few-public-methods
        volume="+0%",
        customspeak=False,
        proxy=None,
-    ):  # pylint: disable=too-many-arguments, too-many-locals
+    ):
        """
        Runs the Communicate class.

@@ -234,14 +234,14 @@ class Communicate:  # pylint: disable=too-few-public-methods
        """

        word_boundary = False
-        sentence_boundary = False

        if boundary_type > 0:
            word_boundary = True
        if boundary_type > 1:
-            sentence_boundary = True
+            raise ValueError(
+                "Invalid boundary type. SentenceBoundary is no longer supported."
+            )

-        sentence_boundary = str(sentence_boundary).lower()
        word_boundary = str(word_boundary).lower()

        if not customspeak:
@@ -262,12 +262,8 @@ class Communicate:  # pylint: disable=too-few-public-methods
            if isinstance(messages, str):
                messages = [messages]

-
        # Variables for the loop
        download = False
-        current_subtitle = ""
-        first_offset = None
-        last_offset = None
        async with aiohttp.ClientSession(trust_env=True) as session:
            async with session.ws_connect(
                f"{WSS_URL}&ConnectionId={connect_id()}",
@@ -304,7 +300,7 @@ class Communicate:  # pylint: disable=too-few-public-methods
                        "Content-Type:application/json; charset=utf-8\r\n"
                        "Path:speech.config\r\n\r\n"
                        '{"context":{"synthesis":{"audio":{"metadataoptions":{'
-                        f'"sentenceBoundaryEnabled":{sentence_boundary},'
+                        f'"sentenceBoundaryEnabled":false,'
                        f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
                        "}}}}\r\n"
                    )
@@ -326,12 +322,6 @@ class Communicate:  # pylint: disable=too-few-public-methods

                    # Begin listening for the response.
                    async for received in websocket:
-                        if received.type in (
-                            aiohttp.WSMsgType.CLOSED,
-                            aiohttp.WSMsgType.ERROR,
-                        ):
-                            break
-
                        if received.type == aiohttp.WSMsgType.TEXT:
                            parameters, data = get_headers_and_data(received.data)
                            if (
@@ -358,12 +348,15 @@ class Communicate:  # pylint: disable=too-few-public-methods
                                    metadata_duration = metadata["Metadata"][0]["Data"][
                                        "Duration"
                                    ]
-                                except KeyError:
-                                    metadata_duration = 0
+                                except KeyError as exception:
+                                    raise ValueError(
+                                        "The metadata doesn't contain a Duration field. "
+                                        + "This usually happens when SentenceBoundary metadata type is sent."
+                                    ) from exception
                                metadata_text = metadata["Metadata"][0]["Data"]["text"][
                                    "Text"
                                ]
-                                if boundary_type == 1:
+                                if metadata_type == "WordBoundary":
                                    yield (
                                        [
                                            metadata_offset,
@@ -372,31 +365,32 @@ class Communicate:  # pylint: disable=too-few-public-methods
                                        metadata_text,
                                        None,
                                    )
+                                elif metadata_type == "SentenceBoundary":
+                                    raise NotImplementedError(
+                                        "SentenceBoundary is not supported due to being broken."
+                                    )
                                else:
-                                    if metadata_type == "WordBoundary":
-                                        if current_subtitle:
-                                            current_subtitle += " "
-                                        current_subtitle += metadata_text
-                                        if first_offset is None:
-                                            first_offset = metadata_offset
-                                        last_offset = [
-                                            metadata_offset,
-                                            metadata_duration,
-                                        ]
-                                    elif metadata_type == "SentenceBoundary":
-                                        if current_subtitle:
-                                            yield (
-                                                [
-                                                    first_offset,
-                                                    sum(last_offset) - first_offset,
-                                                ],
-                                                current_subtitle,
-                                                None,
-                                            )
-                                        current_subtitle = ""
-                                        first_offset = None
-                                        last_offset = None
+                                    raise NotImplementedError(
+                                        f"Unknown metadata type: {metadata_type}"
+                                    )
+                            elif (
+                                "Path" in parameters
+                                and parameters["Path"] == "response"
+                            ):
+                                # TODO: implement this:
+                                """
+                                X-RequestId:xxxxxxxxxxxxxxxxxxxxxxxxx
+                                Content-Type:application/json; charset=utf-8
+                                Path:response

+                                {"context":{"serviceTag":"yyyyyyyyyyyyyyyyyyy"},"audio":{"type":"inline","streamId":"zzzzzzzzzzzzzzzzz"}}
+                                """
+                                pass
+                            else:
+                                raise ValueError(
+                                    "The response from the service is not recognized.\n"
+                                    + received.data
+                                )
                        elif received.type == aiohttp.WSMsgType.BINARY:
                            if download:
                                yield (
@@ -406,10 +400,8 @@ class Communicate:  # pylint: disable=too-few-public-methods
                                        received.data.split(b"Path:audio\r\n")[1:]
                                    ),
                                )
-                if current_subtitle:
-                    yield (
-                        [first_offset, sum(last_offset) - first_offset],
-                        current_subtitle,
-                        None,
-                    )
+                            else:
+                                raise ValueError(
+                                    "The service sent a binary message, but we are not expecting one."
+                                )
                await websocket.close()