diff --git a/examples/sync_audio_streaming_with_predefined_voice_subtitles_print2stdout.py b/examples/sync_audio_streaming_with_predefined_voice_subtitles_print2stdout.py index 7c7890d..4bb770a 100644 --- a/examples/sync_audio_streaming_with_predefined_voice_subtitles_print2stdout.py +++ b/examples/sync_audio_streaming_with_predefined_voice_subtitles_print2stdout.py @@ -24,7 +24,7 @@ VOICE = "zh-CN-YunjianNeural" def main() -> None: """Main function""" - communicate = edge_tts.Communicate(TEXT, VOICE, Boundary="SentenceBoundary") + communicate = edge_tts.Communicate(TEXT, VOICE, boundary="SentenceBoundary") submaker = edge_tts.SubMaker() stdout = sys.stdout audio_bytes = [] diff --git a/src/edge_tts/communicate.py b/src/edge_tts/communicate.py index 52ea351..ff9bd63 100644 --- a/src/edge_tts/communicate.py +++ b/src/edge_tts/communicate.py @@ -19,14 +19,12 @@ from typing import ( Literal, Optional, Tuple, - TypedDict, Union, ) from xml.sax.saxutils import escape, unescape import aiohttp import certifi -from typing_extensions import NotRequired, Unpack from .constants import DEFAULT_VOICE, SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL from .data_classes import TTSConfig @@ -311,19 +309,12 @@ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str: ) -class CommunicateRequest(TypedDict): - """ - A class to communicate with the service. - """ - - Boundary: NotRequired[Literal["WordBoundary", "SentenceBoundary"]] - - class Communicate: """ Communicate with the service. """ + # pylint: disable=too-many-arguments def __init__( self, text: str, @@ -332,24 +323,13 @@ class Communicate: rate: str = "+0%", volume: str = "+0%", pitch: str = "+0Hz", + boundary: Literal["WordBoundary", "SentenceBoundary"] = "SentenceBoundary", connector: Optional[aiohttp.BaseConnector] = None, proxy: Optional[str] = None, connect_timeout: Optional[int] = 10, receive_timeout: Optional[int] = 60, - **kwargs: Unpack[CommunicateRequest], ): - """ - Args: - boundary (str): The boundary to use for the TTS. - Defaults to "WordBoundary". - Valid values are "WordBoundary" and "SentenceBoundary". - If "WordBoundary", the TTS will return a word boundary for each word. - If "SentenceBoundary", the TTS will return a sentence boundary for each sentence. - Which is more friendly to Chinese users. - """ - # Validate TTS settings and store the TTSConfig object. - boundary = kwargs.get("Boundary", "WordBoundary") self.tts_config = TTSConfig(voice, rate, volume, pitch, boundary) # Validate the text parameter. diff --git a/src/edge_tts/data_classes.py b/src/edge_tts/data_classes.py index dd5e60a..83da751 100644 --- a/src/edge_tts/data_classes.py +++ b/src/edge_tts/data_classes.py @@ -5,6 +5,7 @@ import argparse import re from dataclasses import dataclass +from typing import Literal @dataclass @@ -17,7 +18,7 @@ class TTSConfig: rate: str volume: str pitch: str - boundary: str + boundary: Literal["WordBoundary", "SentenceBoundary"] @staticmethod def validate_string_param(param_name: str, param_value: str, pattern: str) -> str: diff --git a/src/edge_tts/submaker.py b/src/edge_tts/submaker.py index 71a3f22..490f663 100644 --- a/src/edge_tts/submaker.py +++ b/src/edge_tts/submaker.py @@ -1,7 +1,7 @@ """SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events.""" from datetime import timedelta -from typing import List +from typing import List, Optional from .srt_composer import Subtitle, compose from .typing import TTSChunk @@ -14,6 +14,7 @@ class SubMaker: def __init__(self) -> None: self.cues: List[Subtitle] = [] + self.type: Optional[str] = None def feed(self, msg: TTSChunk) -> None: """ @@ -26,7 +27,16 @@ class SubMaker: None """ if msg["type"] not in ("WordBoundary", "SentenceBoundary"): - raise ValueError("Invalid message type, expected 'WordBoundary'") + raise ValueError( + "Invalid message type, expected 'WordBoundary' or 'SentenceBoundary'." + ) + + if self.type is None: + self.type = msg["type"] + elif self.type != msg["type"]: + raise ValueError( + f"Expected message type '{self.type}', but got '{msg['type']}'." + ) self.cues.append( Subtitle( @@ -37,38 +47,6 @@ class SubMaker: ) ) - def merge_cues(self, words: int) -> None: - """ - Merge cues to reduce the number of cues. - - Args: - words (int): The number of words to merge. - - Returns: - None - """ - if words <= 0: - raise ValueError("Invalid number of words to merge, expected > 0") - - if len(self.cues) == 0: - return - - new_cues: List[Subtitle] = [] - current_cue: Subtitle = self.cues[0] - for cue in self.cues[1:]: - if len(current_cue.content.split()) < words: - current_cue = Subtitle( - index=current_cue.index, - start=current_cue.start, - end=cue.end, - content=f"{current_cue.content} {cue.content}", - ) - else: - new_cues.append(current_cue) - current_cue = cue - new_cues.append(current_cue) - self.cues = new_cues - def get_srt(self) -> str: """ Get the SRT formatted subtitles from the SubMaker object. diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py index d6fe3eb..0d551ac 100644 --- a/src/edge_tts/util.py +++ b/src/edge_tts/util.py @@ -75,9 +75,6 @@ async def _run_tts(args: UtilArgs) -> None: elif chunk["type"] in ("WordBoundary", "SentenceBoundary"): submaker.feed(chunk) - if args.words_in_cue > 0: - submaker.merge_cues(args.words_in_cue) - if sub_file is not None: sub_file.write(submaker.get_srt()) finally: