diff --git a/lint.sh b/lint.sh index c2c19a9..b80309d 100755 --- a/lint.sh +++ b/lint.sh @@ -1,2 +1,3 @@ find src examples -name '*.py' | xargs black find src examples -name '*.py' | xargs isort +find src examples -name '*.py' | xargs pylint diff --git a/setup.cfg b/setup.cfg index 7d1ee6f..6107499 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = edge-tts -version = 4.0.10 +version = 4.0.11 author = rany author_email = ranygh@riseup.net description = Microsoft Edge's TTS diff --git a/src/edge_playback/__init__.py b/src/edge_playback/__init__.py index c88a423..86df7c4 100644 --- a/src/edge_playback/__init__.py +++ b/src/edge_playback/__init__.py @@ -28,7 +28,7 @@ def main(): with subprocess.Popen( [ "edge-tts", - "--boundary-type=2", + "--boundary-type=1", f"--write-media={media.name}", f"--write-subtitles={subtitle.name}", ] diff --git a/src/edge_tts/communicate.py b/src/edge_tts/communicate.py index a383ec8..432ce39 100644 --- a/src/edge_tts/communicate.py +++ b/src/edge_tts/communicate.py @@ -55,8 +55,8 @@ def remove_incompatible_characters(string): string = list(string) - for idx in range(len(string)): # pylint: disable=consider-using-enumerate - code = ord(string[idx]) + for idx, char in enumerate(string): + code = ord(char) if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31): string[idx] = " " @@ -193,7 +193,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml): ) -class Communicate: # pylint: disable=too-few-public-methods +class Communicate: """ Class for communicating with the service. """ @@ -215,7 +215,7 @@ class Communicate: # pylint: disable=too-few-public-methods volume="+0%", customspeak=False, proxy=None, - ): # pylint: disable=too-many-arguments, too-many-locals + ): """ Runs the Communicate class. @@ -234,14 +234,14 @@ class Communicate: # pylint: disable=too-few-public-methods """ word_boundary = False - sentence_boundary = False if boundary_type > 0: word_boundary = True if boundary_type > 1: - sentence_boundary = True + raise ValueError( + "Invalid boundary type. SentenceBoundary is no longer supported." + ) - sentence_boundary = str(sentence_boundary).lower() word_boundary = str(word_boundary).lower() if not customspeak: @@ -262,12 +262,8 @@ class Communicate: # pylint: disable=too-few-public-methods if isinstance(messages, str): messages = [messages] - # Variables for the loop download = False - current_subtitle = "" - first_offset = None - last_offset = None async with aiohttp.ClientSession(trust_env=True) as session: async with session.ws_connect( f"{WSS_URL}&ConnectionId={connect_id()}", @@ -304,7 +300,7 @@ class Communicate: # pylint: disable=too-few-public-methods "Content-Type:application/json; charset=utf-8\r\n" "Path:speech.config\r\n\r\n" '{"context":{"synthesis":{"audio":{"metadataoptions":{' - f'"sentenceBoundaryEnabled":{sentence_boundary},' + f'"sentenceBoundaryEnabled":false,' f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"' "}}}}\r\n" ) @@ -326,12 +322,6 @@ class Communicate: # pylint: disable=too-few-public-methods # Begin listening for the response. async for received in websocket: - if received.type in ( - aiohttp.WSMsgType.CLOSED, - aiohttp.WSMsgType.ERROR, - ): - break - if received.type == aiohttp.WSMsgType.TEXT: parameters, data = get_headers_and_data(received.data) if ( @@ -358,12 +348,15 @@ class Communicate: # pylint: disable=too-few-public-methods metadata_duration = metadata["Metadata"][0]["Data"][ "Duration" ] - except KeyError: - metadata_duration = 0 + except KeyError as exception: + raise ValueError( + "The metadata doesn't contain a Duration field. " + + "This usually happens when SentenceBoundary metadata type is sent." + ) from exception metadata_text = metadata["Metadata"][0]["Data"]["text"][ "Text" ] - if boundary_type == 1: + if metadata_type == "WordBoundary": yield ( [ metadata_offset, @@ -372,31 +365,32 @@ class Communicate: # pylint: disable=too-few-public-methods metadata_text, None, ) + elif metadata_type == "SentenceBoundary": + raise NotImplementedError( + "SentenceBoundary is not supported due to being broken." + ) else: - if metadata_type == "WordBoundary": - if current_subtitle: - current_subtitle += " " - current_subtitle += metadata_text - if first_offset is None: - first_offset = metadata_offset - last_offset = [ - metadata_offset, - metadata_duration, - ] - elif metadata_type == "SentenceBoundary": - if current_subtitle: - yield ( - [ - first_offset, - sum(last_offset) - first_offset, - ], - current_subtitle, - None, - ) - current_subtitle = "" - first_offset = None - last_offset = None + raise NotImplementedError( + f"Unknown metadata type: {metadata_type}" + ) + elif ( + "Path" in parameters + and parameters["Path"] == "response" + ): + # TODO: implement this: + """ + X-RequestId:xxxxxxxxxxxxxxxxxxxxxxxxx + Content-Type:application/json; charset=utf-8 + Path:response + {"context":{"serviceTag":"yyyyyyyyyyyyyyyyyyy"},"audio":{"type":"inline","streamId":"zzzzzzzzzzzzzzzzz"}} + """ + pass + else: + raise ValueError( + "The response from the service is not recognized.\n" + + received.data + ) elif received.type == aiohttp.WSMsgType.BINARY: if download: yield ( @@ -406,10 +400,8 @@ class Communicate: # pylint: disable=too-few-public-methods received.data.split(b"Path:audio\r\n")[1:] ), ) - if current_subtitle: - yield ( - [first_offset, sum(last_offset) - first_offset], - current_subtitle, - None, - ) + else: + raise ValueError( + "The service sent a binary message, but we are not expecting one." + ) await websocket.close() diff --git a/src/edge_tts/list_voices.py b/src/edge_tts/list_voices.py index 8d42370..73bc6dc 100644 --- a/src/edge_tts/list_voices.py +++ b/src/edge_tts/list_voices.py @@ -9,7 +9,7 @@ import aiohttp from .constants import VOICE_LIST -async def list_voices(): +async def list_voices(proxy=None): """ List all available voices and their attributes. @@ -36,6 +36,7 @@ async def list_voices(): "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9", }, + proxy=proxy, ) as url: data = json.loads(await url.text()) return data diff --git a/src/edge_tts/submaker.py b/src/edge_tts/submaker.py index fdcf7c0..5a432c3 100644 --- a/src/edge_tts/submaker.py +++ b/src/edge_tts/submaker.py @@ -39,7 +39,7 @@ class SubMaker: SubMaker class """ - def __init__(self, overlapping=5): + def __init__(self, overlapping=1): """ SubMaker constructor. @@ -48,7 +48,6 @@ class SubMaker: subtitles should overlap. """ self.subs_and_offset = [] - self.broken_offset = 0 self.overlapping = overlapping * (10**7) def create_sub(self, timestamp, text): @@ -64,13 +63,6 @@ class SubMaker: None """ timestamp[1] += timestamp[0] - - if len(self.subs_and_offset) >= 2: - if self.subs_and_offset[-2][1] >= timestamp[0] + self.broken_offset: - self.broken_offset = self.subs_and_offset[-2][1] - timestamp[0] += self.broken_offset - timestamp[1] += self.broken_offset - self.subs_and_offset.append(timestamp) self.subs_and_offset.append(text) diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py index d0181bd..491e265 100644 --- a/src/edge_tts/util.py +++ b/src/edge_tts/util.py @@ -10,11 +10,11 @@ import sys from edge_tts import Communicate, SubMaker, list_voices -async def _list_voices(): +async def _list_voices(proxy): """ List available voices. """ - for idx, voice in enumerate(await list_voices()): + for idx, voice in enumerate(await list_voices(proxy=proxy)): if idx != 0: print() @@ -112,13 +112,13 @@ async def _main(): "-O", "--overlapping", help="overlapping subtitles in seconds", - default=5, + default=1, type=float, ) parser.add_argument( "-b", "--boundary-type", - help="set boundary type for subtitles. Default 0 for none. Set 1 for word_boundary, 2 for sentence_boundary", + help="set boundary type for subtitles. Default 0 for none. Set 1 for word_boundary.", default=0, type=int, ) @@ -136,7 +136,7 @@ async def _main(): args = parser.parse_args() if args.list_voices: - await _list_voices() + await _list_voices(args.proxy) sys.exit(0) if args.text is not None or args.file is not None: