Update to edge-tts 4.0.0

2021-12-07 22:09:43 +02:00
parent 756766fe6e
commit 4fcecddaf0
16 changed files with 207 additions and 101 deletions
--- a/build.sh
+++ b/build.sh
@@ -0,0 +1,3 @@
 #!/bin/sh
 set -x
 exec python3 setup.py sdist bdist_wheel
--- a/build_and_publish.sh
+++ b/build_and_publish.sh
@@ -1,6 +1,12 @@
 #!/bin/sh
-set -e
+
-rm -rf build dist src/*.egg-info
+set -ex
-python3 setup.py sdist bdist_wheel
+
-twine upload dist/*
+./clean.sh
-rm -rf build dist src/*.egg-info
+
 ./build.sh
 ./publish.sh
 ./clean.sh
 exit 0
--- a/clean.sh
+++ b/clean.sh
@@ -0,0 +1,3 @@
 #!/bin/sh
 set -x
 exec rm -rf build dist src/*.egg-info
--- a/examples/input_example.py
+++ b/examples/input_example.py
@@ -1,21 +1,27 @@
 #!/usr/bin/env python3
-# Example Python script that shows how to use edge-tts as a module
+"""
 Example Python script that shows how to use edge-tts as a module
 """
 import asyncio
 import tempfile
 from playsound import playsound
-import edgeTTS
+import edge_tts
 async def main():
-    communicate = edgeTTS.Communicate()
+    """
    Main function
    """
    communicate = edge_tts.Communicate()
    ask = input("What do you want TTS to say? ")
-    with tempfile.NamedTemporaryFile() as fp:
+    with tempfile.NamedTemporaryFile() as temporary_file:
        async for i in communicate.run(ask):
            if i[2] is not None:
-                fp.write(i[2])
+                temporary_file.write(i[2])
-        playsound(fp.name)
+        playsound(temporary_file.name)
 if __name__ == "__main__":
--- a/publish.sh
+++ b/publish.sh
@@ -0,0 +1,3 @@
 #!/bin/sh
 set -x
 exec twine upload dist/*
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = edge-tts
-version = 3.0.2
+version = 4.0.0
 author = rany
 author_email = ranygh@riseup.net
 description = Microsoft Edge's TTS
@@ -27,5 +27,5 @@ where=src
 [options.entry_points]
 console_scripts =
-    edge-tts = edgeTTS.__main__:main
+    edge-tts = edge_tts.__main__:main
-    edge-playback = edgePlayback.__init__:main
+    edge-playback = edge_playback.__init__:main
--- a/src/edgePlayback/main.py
+++ b/src/edgePlayback/main.py
@@ -1,6 +0,0 @@
 #!/usr/bin/env python3
 from edgePlayback.__init__ import *
 if __name__ == "__main__":
    main()
--- a/src/edge_playback/init.py
+++ b/src/edge_playback/init.py
@@ -1,5 +1,9 @@
 #!/usr/bin/env python3
 """
 Playback TTS with subtitles using edge-tts and mpv.
 """
 import subprocess
 import sys
 import tempfile
@@ -7,13 +11,16 @@ from shutil import which
 def main():
    """
    Main function.
    """
    if which("mpv") and which("edge-tts"):
        with tempfile.NamedTemporaryFile() as media:
            with tempfile.NamedTemporaryFile() as subtitle:
                print()
-                print("Media file      %s" % media.name)
+                print(f"Media file      {media.name}")
-                print("Subtitle file   %s\n" % subtitle.name)
+                print(f"Subtitle file   {subtitle.name}\n")
-                p = subprocess.Popen(
+                with subprocess.Popen(
                    [
                        "edge-tts",
                        "-w",
@@ -23,17 +30,18 @@ def main():
                        subtitle.name,
                    ]
                    + sys.argv[1:]
-                )
+                ) as process:
-                p.communicate()
+                    process.communicate()
-                p = subprocess.Popen(
+
                with subprocess.Popen(
                    [
                        "mpv",
                        "--keep-open=yes",
-                        "--sub-file=" + subtitle.name,
+                        f"--sub-file={subtitle.name}",
                        media.name,
                    ]
-                )
+                ) as process:
-                p.communicate()
+                    process.communicate()
    else:
        print("This script requires mpv and edge-tts.")
--- a/src/edge_playback/main.py
+++ b/src/edge_playback/main.py
@@ -0,0 +1,10 @@
 #!/usr/bin/env python3
 """
 This is the main file for the edge_playback package.
 """
 from edge_playback.__init__ import main
 if __name__ == "__main__":
    main()
--- a/src/edge_tts/init.py
+++ b/src/edge_tts/init.py
@@ -1,3 +1,7 @@
 """
 __init__ for edge_tts
 """
 from .communicate import Communicate
 from .list_voices import list_voices
 from .submaker import SubMaker
--- a/src/edge_tts/main.py
+++ b/src/edge_tts/main.py
@@ -1,3 +1,7 @@
 """
 __main__ for edge_tts.
 """
 from .util import main
 if __name__ == "__main__":
--- a/src/edge_tts/communicate.py
+++ b/src/edge_tts/communicate.py
@@ -53,17 +53,14 @@ def remove_incompatible_characters(string):
    if isinstance(string, bytes):
        string = string.decode("utf-8")
-    cleaned_string = ""
+    string = list(string)
-    for character in string:
+
-        character_code = ord(character)
+    for idx in range(len(string)):  # pylint: disable=consider-using-enumerate
-        if (
+        code = ord(string[idx])
-            (0 <= character_code <= 8)
+        if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
-            or (11 <= character_code <= 12)
+            string[idx] = " "
-            or (14 <= character_code <= 31)
+
-        ):
+    return "".join(string)
            character = " "
        cleaned_string += character
    return cleaned_string
 def connect_id():
@@ -144,7 +141,8 @@ def mkssml(text, voice, pitch, rate, volume):
    ssml = (
        "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
-        f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>{text}</prosody></voice></speak>"
+        f"<voice name='{voice}'><prosody pitch='{pitch}' rate='{rate}' volume='{volume}'>"
        f"{text}</prosody></voice></speak>"
    )
    return ssml
@@ -192,7 +190,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
    )
-class Communicate:
+class Communicate:  # pylint: disable=too-few-public-methods
    """
    Class for communicating with the service.
    """
@@ -214,7 +212,7 @@ class Communicate:
        rate="+0%",
        volume="+0%",
        customspeak=False,
-    ):
+    ):  # pylint: disable=too-many-arguments, too-many-locals
        """
        Runs the Communicate class.
@@ -266,7 +264,8 @@ class Communicate:
                    "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
                    "Accept-Encoding": "gzip, deflate, br",
                    "Accept-Language": "en-US,en;q=0.9",
-                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
+                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
                    " (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
                },
            ) as websocket:
                for message in messages:
@@ -275,18 +274,22 @@ class Communicate:
                    # Prepare the request to be sent to the service.
                    #
-                    # Note that sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
+                    # Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
-                    # to be booleans, but Edge Browser seems to send them as strings and not booleans.
+                    # to be booleans, but Edge Browser seems to send them as strings.
-                    # This is a bug in Edge Browser as Azure Cognitive Services actually sends them as
+                    #
-                    # booleans and not strings. For now I will send them as booleans unless it causes
+                    # This is a bug in Edge as Azure Cognitive Services actually sends them as
                    # bool and not string. For now I will send them as bool unless it causes
                    # any problems.
                    #
-                    # Also pay close attention to double {  } in request (escape for Python .format()).
+                    # Also pay close attention to double { } in request (escape for f-string).
                    request = (
                        f"X-Timestamp:{self.date}\r\n"
                        "Content-Type:application/json; charset=utf-8\r\n"
                        "Path:speech.config\r\n\r\n"
-                        f'{{"context":{{"synthesis":{{"audio":{{"metadataoptions":{{"sentenceBoundaryEnabled":{sentence_boundary},"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"}}}}}}}}\r\n'
+                        '{"context":{"synthesis":{"audio":{"metadataoptions":{'
                        f'"sentenceBoundaryEnabled":{sentence_boundary},'
                        f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
                        "}}}}\r\n"
                    )
                    # Send the request to the service.
                    await websocket.send_str(request)
--- a/src/edge_tts/constants.py
+++ b/src/edge_tts/constants.py
--- a/src/edge_tts/list_voices.py
+++ b/src/edge_tts/list_voices.py
@@ -15,9 +15,7 @@ async def list_voices():
    This pulls data from the URL used by Microsoft Edge to return a list of
    all available voices. However many more experimental voices are available
-    than are listed here.
+    than are listed here. (See https://aka.ms/csspeech/voicenames)
    (See
    https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)
    Returns:
        dict: A dictionary of voice attributes.
@@ -29,7 +27,8 @@ async def list_voices():
                "Authority": "speech.platform.bing.com",
                "Sec-CH-UA": '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
                "Sec-CH-UA-Mobile": "?0",
-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
                "Accept": "*/*",
                "Sec-Fetch-Site": "none",
                "Sec-Fetch-Mode": "cors",
--- a/src/edge_tts/submaker.py
+++ b/src/edge_tts/submaker.py
@@ -1,29 +1,68 @@
 """
 SubMaker package for the Edge TTS project.
 SubMaker is a package that makes the process of creating subtitles with
 information provided by the service easier.
 """
 import math
 from xml.sax.saxutils import escape
 def formatter(offset1, offset2, subdata):
-    data = (
+    """
    formatter returns the timecode and the text of the subtitle.
    """
    return (
        f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n"
        f"{escape(subdata)}\r\n\r\n"
    )
    return data
 def mktimestamp(time_unit):
-    hour = math.floor(time_unit / 10000 / 1000 / 3600)
+    """
-    minute = math.floor((time_unit / 10000 / 1000 / 60) % 60)
+    mktimestamp returns the timecode of the subtitle.
-    seconds = (time_unit / 10000 / 1000) % 60
+
    The timecode is in the format of 00:00:00.000.
    Returns:
        str: The timecode of the subtitle.
    """
    hour = math.floor(time_unit / 10 ** 7 / 3600)
    minute = math.floor((time_unit / 10 ** 7 / 60) % 60)
    seconds = (time_unit / 10 ** 7) % 60
    return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
 class SubMaker:
    """
    SubMaker class
    """
    def __init__(self, overlapping=5):
        """
        SubMaker constructor.
        Args:
            overlapping (int): The amount of time in seconds that the
                               subtitles should overlap.
        """
        self.subs_and_offset = []
        self.broken_offset = []
        self.overlapping = overlapping * (10 ** 7)
    def create_sub(self, timestamp, text):
        """
        create_sub creates a subtitle with the given timestamp and text
        and adds it to the list of subtitles
        Args:
            timestamp (int): The timestamp of the subtitle.
            text (str): The text of the subtitle.
        Returns:
            None
        """
        if len(self.subs_and_offset) >= 2:
            if self.subs_and_offset[-2] >= timestamp + sum(self.broken_offset):
                self.broken_offset.append(self.subs_and_offset[-2])
@@ -33,6 +72,12 @@ class SubMaker:
        self.subs_and_offset.append(text)
    def generate_subs(self):
        """
        generate_subs generates the complete subtitle file.
        Returns:
            str: The complete subtitle file.
        """
        if len(self.subs_and_offset) >= 2:
            data = "WEBVTT\r\n\r\n"
            old_time_stamp = None
--- a/src/edge_tts/util.py
+++ b/src/edge_tts/util.py
@@ -7,7 +7,54 @@ import argparse
 import asyncio
 import sys
-from edgeTTS import Communicate, SubMaker, list_voices
+from edge_tts import Communicate, SubMaker, list_voices
 async def _list_voices():
    """
    List available voices.
    """
    for idx, voice in enumerate(await list_voices()):
        if idx != 0:
            print()
        for key in voice.keys():
            if key in ["SuggestedCodec", "FriendlyName", "Status"]:
                continue
            # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
            print(f"{key}: {voice[key]}")
 async def _tts(args):
    tts = Communicate()
    subs = SubMaker(args.overlapping)
    if args.write_media:
        media_file = open(args.write_media, "wb")  # pylint: disable=consider-using-with
    async for i in tts.run(
        args.text,
        args.enable_sentence_boundary,
        args.enable_word_boundary,
        args.codec,
        args.voice,
        args.pitch,
        args.rate,
        args.volume,
        customspeak=args.custom_ssml,
    ):
        if i[2] is not None:
            if not args.write_media:
                sys.stdout.buffer.write(i[2])
            else:
                media_file.write(i[2])
        elif i[0] is not None and i[1] is not None:
            subs.create_sub(i[0], i[1])
    if args.write_media:
        media_file.close()
    if not args.write_subtitles:
        sys.stderr.write(subs.generate_subs())
    else:
        with open(args.write_subtitles, "w", encoding="utf-8") as file:
            file.write(subs.generate_subs())
 async def _main():
@@ -24,19 +71,23 @@ async def _main():
    parser.add_argument(
        "-v",
        "--voice",
-        help="voice for TTS. Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
+        help="voice for TTS. "
        "Default: Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
        default="Microsoft Server Speech Text to Speech Voice (en-US, AriaNeural)",
    )
    parser.add_argument(
        "-c",
        "--codec",
-        help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus. For more info check https://bit.ly/2T33h6S",
+        help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. "
        "Another choice is webm-24khz-16bit-mono-opus. "
        "For more info check https://bit.ly/2T33h6S",
        default="audio-24khz-48kbitrate-mono-mp3",
    )
    group.add_argument(
        "-l",
        "--list-voices",
-        help="lists available voices. Edge's list is incomplete so check https://bit.ly/2SFq1d3",
+        help="lists available voices. "
        "Edge's list is incomplete so check https://bit.ly/2SFq1d3",
        action="store_true",
    )
    parser.add_argument(
@@ -85,6 +136,10 @@ async def _main():
    )
    args = parser.parse_args()
    if args.list_voices:
        await _list_voices()
        sys.exit(0)
    if args.text is not None or args.file is not None:
        if args.file is not None:
            # we need to use sys.stdin.read() because some devices
@@ -96,45 +151,8 @@ async def _main():
                # logger.debug("reading from %s" % args.file)
                with open(args.file, "r", encoding="utf-8") as file:
                    args.text = file.read()
        tts = Communicate()
        subs = SubMaker(args.overlapping)
        if args.write_media:
            media_file = open(args.write_media, "wb")
        async for i in tts.run(
            args.text,
            args.enable_sentence_boundary,
            args.enable_word_boundary,
            args.codec,
            args.voice,
            args.pitch,
            args.rate,
            args.volume,
            customspeak=args.custom_ssml,
        ):
            if i[2] is not None:
                if not args.write_media:
                    sys.stdout.buffer.write(i[2])
                else:
                    media_file.write(i[2])
            elif i[0] is not None and i[1] is not None:
                subs.create_sub(i[0], i[1])
        if args.write_media:
            media_file.close()
        if not args.write_subtitles:
            sys.stderr.write(subs.generate_subs())
        else:
            with open(args.write_subtitles, "w", encoding="utf-8") as file:
                file.write(subs.generate_subs())
    elif args.list_voices:
        for idx, voice in enumerate(await list_voices()):
            if idx != 0:
                print()
-            for key in voice.keys():
+        await _tts(args)
                if key in ["SuggestedCodec", "FriendlyName", "Status"]:
                    continue
                # print ("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
                print(f"{key}: {voice[key]}")
 def main():