Cleanup SentenceBoundary support (#396)

- Default to SentenceBoundary
- Modify boundary argument to lowercase to match other options.
- Drop merge_cues support as SentenceBoundary renders it obsolete.

Signed-off-by: rany <rany2@riseup.net>
This commit is contained in:
rany2
2025-08-05 14:30:30 +03:00
committed by GitHub
parent 645c207cfd
commit c78e49d28e
5 changed files with 17 additions and 61 deletions

View File

@@ -24,7 +24,7 @@ VOICE = "zh-CN-YunjianNeural"
def main() -> None:
"""Main function"""
communicate = edge_tts.Communicate(TEXT, VOICE, Boundary="SentenceBoundary")
communicate = edge_tts.Communicate(TEXT, VOICE, boundary="SentenceBoundary")
submaker = edge_tts.SubMaker()
stdout = sys.stdout
audio_bytes = []

View File

@@ -19,14 +19,12 @@ from typing import (
Literal,
Optional,
Tuple,
TypedDict,
Union,
)
from xml.sax.saxutils import escape, unescape
import aiohttp
import certifi
from typing_extensions import NotRequired, Unpack
from .constants import DEFAULT_VOICE, SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL
from .data_classes import TTSConfig
@@ -311,19 +309,12 @@ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str:
)
class CommunicateRequest(TypedDict):
"""
A class to communicate with the service.
"""
Boundary: NotRequired[Literal["WordBoundary", "SentenceBoundary"]]
class Communicate:
"""
Communicate with the service.
"""
# pylint: disable=too-many-arguments
def __init__(
self,
text: str,
@@ -332,24 +323,13 @@ class Communicate:
rate: str = "+0%",
volume: str = "+0%",
pitch: str = "+0Hz",
boundary: Literal["WordBoundary", "SentenceBoundary"] = "SentenceBoundary",
connector: Optional[aiohttp.BaseConnector] = None,
proxy: Optional[str] = None,
connect_timeout: Optional[int] = 10,
receive_timeout: Optional[int] = 60,
**kwargs: Unpack[CommunicateRequest],
):
"""
Args:
boundary (str): The boundary to use for the TTS.
Defaults to "WordBoundary".
Valid values are "WordBoundary" and "SentenceBoundary".
If "WordBoundary", the TTS will return a word boundary for each word.
If "SentenceBoundary", the TTS will return a sentence boundary for each sentence.
Which is more friendly to Chinese users.
"""
# Validate TTS settings and store the TTSConfig object.
boundary = kwargs.get("Boundary", "WordBoundary")
self.tts_config = TTSConfig(voice, rate, volume, pitch, boundary)
# Validate the text parameter.

View File

@@ -5,6 +5,7 @@
import argparse
import re
from dataclasses import dataclass
from typing import Literal
@dataclass
@@ -17,7 +18,7 @@ class TTSConfig:
rate: str
volume: str
pitch: str
boundary: str
boundary: Literal["WordBoundary", "SentenceBoundary"]
@staticmethod
def validate_string_param(param_name: str, param_value: str, pattern: str) -> str:

View File

@@ -1,7 +1,7 @@
"""SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events."""
from datetime import timedelta
from typing import List
from typing import List, Optional
from .srt_composer import Subtitle, compose
from .typing import TTSChunk
@@ -14,6 +14,7 @@ class SubMaker:
def __init__(self) -> None:
self.cues: List[Subtitle] = []
self.type: Optional[str] = None
def feed(self, msg: TTSChunk) -> None:
"""
@@ -26,7 +27,16 @@ class SubMaker:
None
"""
if msg["type"] not in ("WordBoundary", "SentenceBoundary"):
raise ValueError("Invalid message type, expected 'WordBoundary'")
raise ValueError(
"Invalid message type, expected 'WordBoundary' or 'SentenceBoundary'."
)
if self.type is None:
self.type = msg["type"]
elif self.type != msg["type"]:
raise ValueError(
f"Expected message type '{self.type}', but got '{msg['type']}'."
)
self.cues.append(
Subtitle(
@@ -37,38 +47,6 @@ class SubMaker:
)
)
def merge_cues(self, words: int) -> None:
"""
Merge cues to reduce the number of cues.
Args:
words (int): The number of words to merge.
Returns:
None
"""
if words <= 0:
raise ValueError("Invalid number of words to merge, expected > 0")
if len(self.cues) == 0:
return
new_cues: List[Subtitle] = []
current_cue: Subtitle = self.cues[0]
for cue in self.cues[1:]:
if len(current_cue.content.split()) < words:
current_cue = Subtitle(
index=current_cue.index,
start=current_cue.start,
end=cue.end,
content=f"{current_cue.content} {cue.content}",
)
else:
new_cues.append(current_cue)
current_cue = cue
new_cues.append(current_cue)
self.cues = new_cues
def get_srt(self) -> str:
"""
Get the SRT formatted subtitles from the SubMaker object.

View File

@@ -75,9 +75,6 @@ async def _run_tts(args: UtilArgs) -> None:
elif chunk["type"] in ("WordBoundary", "SentenceBoundary"):
submaker.feed(chunk)
if args.words_in_cue > 0:
submaker.merge_cues(args.words_in_cue)
if sub_file is not None:
sub_file.write(submaker.get_srt())
finally: