drop custom SSML support

2022-06-19 21:06:55 +03:00
parent 797d04f182
commit 68a9e191d7
4 changed files with 24 additions and 66 deletions
--- a/README.md
+++ b/README.md
@@ -61,35 +61,11 @@ You must first check the available voices with the `--list-voices` option:
 ### Custom SSML
-It is possible to send Microsoft's text-to-speech servers a custom SSML document which would allow greater customization of the speech. 
+Support for custom SSML has been removed since 5.0.0 because Microsoft has taken the initiative to prevent it from working. You cannot use custom SSML anymore.
 Information about the SSML format can be found here on Microsoft's own website: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
 As a short example, if you want to apply the following SSML document and play it back using `edge-tts`.
 ```
 <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
       xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">
    <voice name="en-US-AriaNeural">
        <mstts:express-as style="cheerful">
            That'd be just amazing!
        </mstts:express-as>
    </voice>
 </speak>
 ```
 It would be easiest to do the following:
 1. Create a file called `custom_ssml.xml` with the above content.
 2. Run the following command:
       $ edge-tts --custom-ssml --file custom_ssml.xml --write-media amazing.mp3
 3. Voila!
 ### Changing pitch, rate, volume, etc.
-It is possible to make minor changes to the generated speech without resorting to custom SSML. However, you must note that you couldn't use the `--custom-ssml` option with the `--pitch`, `--rate`, `--volume`, etc. options.
+It is possible to make minor changes to the generated speech.
    $ edge-tts --pitch=-10Hz --text "Hello, world!" --write-media hello_with_pitch_down.mp3
    $ edge-tts --rate=0.5 --text "Hello, world!" --write-media hello_with_rate_halved.mp3
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = edge-tts
-version = 4.0.11
+version = 5.0.0
 author = rany
 author_email = ranygh@riseup.net
 description = Microsoft Edge's TTS
--- a/src/edge_tts/communicate.py
+++ b/src/edge_tts/communicate.py
@@ -213,7 +213,6 @@ class Communicate:
        pitch="+0Hz",
        rate="+0%",
        volume="+0%",
        customspeak=False,
        proxy=None,
    ):
        """
@@ -223,11 +222,10 @@ class Communicate:
            messages (str or list): A list of SSML strings or a single text.
            boundery_type (int): The type of boundary to use. 0 for none, 1 for word_boundary, 2 for sentence_boundary.
            codec (str): The codec to use.
-            voice (str): The voice to use (only applicable to non-customspeak).
+            voice (str): The voice to use.
-            pitch (str): The pitch to use (only applicable to non-customspeak).
+            pitch (str): The pitch to use.
-            rate (str): The rate to use (only applicable to non-customspeak).
+            rate (str): The rate to use.
-            volume (str): The volume to use (only applicable to non-customspeak).
+            volume (str): The volume to use.
            customspeak (bool): Whether to create the SSML or treat the messages as SSML.
        Yields:
            tuple: The subtitle offset, subtitle, and audio data.
@@ -244,23 +242,19 @@ class Communicate:
        word_boundary = str(word_boundary).lower()
-        if not customspeak:
+        websocket_max_size = 2**16
-            websocket_max_size = 2**16
+        overhead_per_message = (
-            overhead_per_message = (
+            len(
-                len(
+                ssml_headers_plus_data(
-                    ssml_headers_plus_data(
+                    connect_id(), self.date, mkssml("", voice, pitch, rate, volume)
                        connect_id(), self.date, mkssml("", voice, pitch, rate, volume)
                    )
                )
                + 50
            )  # margin of error
            messages = split_text_by_byte_length(
                escape(remove_incompatible_characters(messages)),
                websocket_max_size - overhead_per_message,
            )
-        else:
+            + 50
-            if isinstance(messages, str):
+        )  # margin of error
-                messages = [messages]
+        messages = split_text_by_byte_length(
            escape(remove_incompatible_characters(messages)),
            websocket_max_size - overhead_per_message,
        )
        # Variables for the loop
        download = False
@@ -307,18 +301,13 @@ class Communicate:
                    # Send the request to the service.
                    await websocket.send_str(request)
                    # Send the message itself.
-                    if not customspeak:
+                    await websocket.send_str(
-                        await websocket.send_str(
+                        ssml_headers_plus_data(
-                            ssml_headers_plus_data(
+                            connect_id(),
-                                connect_id(),
+                            self.date,
-                                self.date,
+                            mkssml(message, voice, pitch, rate, volume),
                                mkssml(message, voice, pitch, rate, volume),
                            )
                        )
                    else:
                        await websocket.send_str(
                            ssml_headers_plus_data(connect_id(), self.date, message)
                        )
                    )
                    # Begin listening for the response.
                    async for received in websocket:
--- a/src/edge_tts/util.py
+++ b/src/edge_tts/util.py
@@ -38,7 +38,6 @@ async def _tts(args):
        args.pitch,
        args.rate,
        args.volume,
        customspeak=args.custom_ssml,
        proxy=args.proxy,
    ):
        if i[2] is not None:
@@ -62,12 +61,6 @@ async def _main():
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("-t", "--text", help="what TTS will say")
    group.add_argument("-f", "--file", help="same as --text but read from file")
    parser.add_argument(
        "-z",
        "--custom-ssml",
        help="treat text as ssml to send. For more info check https://bit.ly/3fIq13S",
        action="store_true",
    )
    parser.add_argument(
        "-v",
        "--voice",