Cleanup SentenceBoundary support (#396)
- Default to SentenceBoundary - Modify boundary argument to lowercase to match other options. - Drop merge_cues support as SentenceBoundary renders it obsolete. Signed-off-by: rany <rany2@riseup.net>
This commit is contained in:
@@ -24,7 +24,7 @@ VOICE = "zh-CN-YunjianNeural"
|
|||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
"""Main function"""
|
"""Main function"""
|
||||||
communicate = edge_tts.Communicate(TEXT, VOICE, Boundary="SentenceBoundary")
|
communicate = edge_tts.Communicate(TEXT, VOICE, boundary="SentenceBoundary")
|
||||||
submaker = edge_tts.SubMaker()
|
submaker = edge_tts.SubMaker()
|
||||||
stdout = sys.stdout
|
stdout = sys.stdout
|
||||||
audio_bytes = []
|
audio_bytes = []
|
||||||
|
|||||||
@@ -19,14 +19,12 @@ from typing import (
|
|||||||
Literal,
|
Literal,
|
||||||
Optional,
|
Optional,
|
||||||
Tuple,
|
Tuple,
|
||||||
TypedDict,
|
|
||||||
Union,
|
Union,
|
||||||
)
|
)
|
||||||
from xml.sax.saxutils import escape, unescape
|
from xml.sax.saxutils import escape, unescape
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import certifi
|
import certifi
|
||||||
from typing_extensions import NotRequired, Unpack
|
|
||||||
|
|
||||||
from .constants import DEFAULT_VOICE, SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL
|
from .constants import DEFAULT_VOICE, SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL
|
||||||
from .data_classes import TTSConfig
|
from .data_classes import TTSConfig
|
||||||
@@ -311,19 +309,12 @@ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CommunicateRequest(TypedDict):
|
|
||||||
"""
|
|
||||||
A class to communicate with the service.
|
|
||||||
"""
|
|
||||||
|
|
||||||
Boundary: NotRequired[Literal["WordBoundary", "SentenceBoundary"]]
|
|
||||||
|
|
||||||
|
|
||||||
class Communicate:
|
class Communicate:
|
||||||
"""
|
"""
|
||||||
Communicate with the service.
|
Communicate with the service.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# pylint: disable=too-many-arguments
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
text: str,
|
text: str,
|
||||||
@@ -332,24 +323,13 @@ class Communicate:
|
|||||||
rate: str = "+0%",
|
rate: str = "+0%",
|
||||||
volume: str = "+0%",
|
volume: str = "+0%",
|
||||||
pitch: str = "+0Hz",
|
pitch: str = "+0Hz",
|
||||||
|
boundary: Literal["WordBoundary", "SentenceBoundary"] = "SentenceBoundary",
|
||||||
connector: Optional[aiohttp.BaseConnector] = None,
|
connector: Optional[aiohttp.BaseConnector] = None,
|
||||||
proxy: Optional[str] = None,
|
proxy: Optional[str] = None,
|
||||||
connect_timeout: Optional[int] = 10,
|
connect_timeout: Optional[int] = 10,
|
||||||
receive_timeout: Optional[int] = 60,
|
receive_timeout: Optional[int] = 60,
|
||||||
**kwargs: Unpack[CommunicateRequest],
|
|
||||||
):
|
):
|
||||||
"""
|
|
||||||
Args:
|
|
||||||
boundary (str): The boundary to use for the TTS.
|
|
||||||
Defaults to "WordBoundary".
|
|
||||||
Valid values are "WordBoundary" and "SentenceBoundary".
|
|
||||||
If "WordBoundary", the TTS will return a word boundary for each word.
|
|
||||||
If "SentenceBoundary", the TTS will return a sentence boundary for each sentence.
|
|
||||||
Which is more friendly to Chinese users.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Validate TTS settings and store the TTSConfig object.
|
# Validate TTS settings and store the TTSConfig object.
|
||||||
boundary = kwargs.get("Boundary", "WordBoundary")
|
|
||||||
self.tts_config = TTSConfig(voice, rate, volume, pitch, boundary)
|
self.tts_config = TTSConfig(voice, rate, volume, pitch, boundary)
|
||||||
|
|
||||||
# Validate the text parameter.
|
# Validate the text parameter.
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -17,7 +18,7 @@ class TTSConfig:
|
|||||||
rate: str
|
rate: str
|
||||||
volume: str
|
volume: str
|
||||||
pitch: str
|
pitch: str
|
||||||
boundary: str
|
boundary: Literal["WordBoundary", "SentenceBoundary"]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def validate_string_param(param_name: str, param_value: str, pattern: str) -> str:
|
def validate_string_param(param_name: str, param_value: str, pattern: str) -> str:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events."""
|
"""SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events."""
|
||||||
|
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
from .srt_composer import Subtitle, compose
|
from .srt_composer import Subtitle, compose
|
||||||
from .typing import TTSChunk
|
from .typing import TTSChunk
|
||||||
@@ -14,6 +14,7 @@ class SubMaker:
|
|||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.cues: List[Subtitle] = []
|
self.cues: List[Subtitle] = []
|
||||||
|
self.type: Optional[str] = None
|
||||||
|
|
||||||
def feed(self, msg: TTSChunk) -> None:
|
def feed(self, msg: TTSChunk) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -26,7 +27,16 @@ class SubMaker:
|
|||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
if msg["type"] not in ("WordBoundary", "SentenceBoundary"):
|
if msg["type"] not in ("WordBoundary", "SentenceBoundary"):
|
||||||
raise ValueError("Invalid message type, expected 'WordBoundary'")
|
raise ValueError(
|
||||||
|
"Invalid message type, expected 'WordBoundary' or 'SentenceBoundary'."
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.type is None:
|
||||||
|
self.type = msg["type"]
|
||||||
|
elif self.type != msg["type"]:
|
||||||
|
raise ValueError(
|
||||||
|
f"Expected message type '{self.type}', but got '{msg['type']}'."
|
||||||
|
)
|
||||||
|
|
||||||
self.cues.append(
|
self.cues.append(
|
||||||
Subtitle(
|
Subtitle(
|
||||||
@@ -37,38 +47,6 @@ class SubMaker:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def merge_cues(self, words: int) -> None:
|
|
||||||
"""
|
|
||||||
Merge cues to reduce the number of cues.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
words (int): The number of words to merge.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
None
|
|
||||||
"""
|
|
||||||
if words <= 0:
|
|
||||||
raise ValueError("Invalid number of words to merge, expected > 0")
|
|
||||||
|
|
||||||
if len(self.cues) == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
new_cues: List[Subtitle] = []
|
|
||||||
current_cue: Subtitle = self.cues[0]
|
|
||||||
for cue in self.cues[1:]:
|
|
||||||
if len(current_cue.content.split()) < words:
|
|
||||||
current_cue = Subtitle(
|
|
||||||
index=current_cue.index,
|
|
||||||
start=current_cue.start,
|
|
||||||
end=cue.end,
|
|
||||||
content=f"{current_cue.content} {cue.content}",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
new_cues.append(current_cue)
|
|
||||||
current_cue = cue
|
|
||||||
new_cues.append(current_cue)
|
|
||||||
self.cues = new_cues
|
|
||||||
|
|
||||||
def get_srt(self) -> str:
|
def get_srt(self) -> str:
|
||||||
"""
|
"""
|
||||||
Get the SRT formatted subtitles from the SubMaker object.
|
Get the SRT formatted subtitles from the SubMaker object.
|
||||||
|
|||||||
@@ -75,9 +75,6 @@ async def _run_tts(args: UtilArgs) -> None:
|
|||||||
elif chunk["type"] in ("WordBoundary", "SentenceBoundary"):
|
elif chunk["type"] in ("WordBoundary", "SentenceBoundary"):
|
||||||
submaker.feed(chunk)
|
submaker.feed(chunk)
|
||||||
|
|
||||||
if args.words_in_cue > 0:
|
|
||||||
submaker.merge_cues(args.words_in_cue)
|
|
||||||
|
|
||||||
if sub_file is not None:
|
if sub_file is not None:
|
||||||
sub_file.write(submaker.get_srt())
|
sub_file.write(submaker.get_srt())
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
Reference in New Issue
Block a user