Cleanup subtitle related code and make it easier to use SubMaker (#329)

Also don't output subtitles to STDERR by default.

Signed-off-by: rany <rany2@riseup.net>
This commit is contained in:
Rany
2024-11-22 22:57:54 +02:00
committed by GitHub
parent 93fb851cd2
commit 144215496a
3 changed files with 43 additions and 26 deletions

View File

@@ -26,7 +26,7 @@ async def amain() -> None:
if chunk["type"] == "audio": if chunk["type"] == "audio":
file.write(chunk["data"]) file.write(chunk["data"])
elif chunk["type"] == "WordBoundary": elif chunk["type"] == "WordBoundary":
submaker.add_cue((chunk["offset"], chunk["duration"]), chunk["text"]) submaker.feed(chunk)
with open(SRT_FILE, "w", encoding="utf-8") as file: with open(SRT_FILE, "w", encoding="utf-8") as file:
file.write(submaker.get_srt()) file.write(submaker.get_srt())

View File

@@ -1,9 +1,11 @@
"""SubMaker module is used to generate subtitles from WordBoundary events.""" """SubMaker module is used to generate subtitles from WordBoundary events."""
from typing import List, Tuple from typing import List
import srt # type: ignore import srt # type: ignore
from .typing import TTSChunk
class SubMaker: class SubMaker:
""" """
@@ -13,23 +15,25 @@ class SubMaker:
def __init__(self) -> None: def __init__(self) -> None:
self.cues: List[srt.Subtitle] = [] # type: ignore self.cues: List[srt.Subtitle] = [] # type: ignore
def add_cue(self, timestamp: Tuple[float, float], text: str) -> None: def feed(self, msg: TTSChunk) -> None:
""" """
Add a cue to the SubMaker object. Feed a WordBoundary message to the SubMaker object.
Args: Args:
timestamp (tuple): The offset and duration of the subtitle. msg (dict): The WordBoundary message.
text (str): The text of the subtitle.
Returns: Returns:
None None
""" """
if msg["type"] != "WordBoundary":
raise ValueError("Invalid message type, expected 'WordBoundary'")
self.cues.append( self.cues.append(
srt.Subtitle( srt.Subtitle(
index=len(self.cues) + 1, index=len(self.cues) + 1,
start=srt.timedelta(microseconds=timestamp[0] / 10), start=srt.timedelta(microseconds=msg["duration"] / 10),
end=srt.timedelta(microseconds=sum(timestamp) / 10), end=srt.timedelta(microseconds=(msg["duration"] + msg["offset"]) / 10),
content=text, content=msg["text"],
) )
) )
@@ -41,3 +45,6 @@ class SubMaker:
str: The SRT formatted subtitles. str: The SRT formatted subtitles.
""" """
return srt.compose(self.cues) # type: ignore return srt.compose(self.cues) # type: ignore
def __str__(self) -> str:
return self.get_srt()

View File

@@ -3,8 +3,7 @@
import argparse import argparse
import asyncio import asyncio
import sys import sys
from io import TextIOWrapper from typing import Any, Optional, TextIO
from typing import Any, TextIO, Union
from tabulate import tabulate from tabulate import tabulate
@@ -45,31 +44,42 @@ async def _run_tts(args: Any) -> None:
print("\nOperation canceled.", file=sys.stderr) print("\nOperation canceled.", file=sys.stderr)
return return
tts: Communicate = Communicate( communicate = Communicate(
args.text, args.text,
args.voice, args.voice,
proxy=args.proxy,
rate=args.rate, rate=args.rate,
volume=args.volume, volume=args.volume,
pitch=args.pitch, pitch=args.pitch,
proxy=args.proxy,
) )
subs: SubMaker = SubMaker() submaker = SubMaker()
with ( try:
open(args.write_media, "wb") if args.write_media else sys.stdout.buffer audio_file = (
) as audio_file: open(args.write_media, "wb")
async for chunk in tts.stream(): if args.write_media is not None and args.write_media != "-"
else sys.stdout.buffer
)
sub_file: Optional[TextIO] = (
open(args.write_subtitles, "w", encoding="utf-8")
if args.write_subtitles is not None and args.write_subtitles != "-"
else None
)
if sub_file is None and args.write_subtitles == "-":
sub_file = sys.stderr
async for chunk in communicate.stream():
if chunk["type"] == "audio": if chunk["type"] == "audio":
audio_file.write(chunk["data"]) audio_file.write(chunk["data"])
elif chunk["type"] == "WordBoundary": elif chunk["type"] == "WordBoundary":
subs.add_cue((chunk["offset"], chunk["duration"]), chunk["text"]) submaker.feed(chunk)
sub_file: Union[TextIOWrapper, TextIO] = ( if sub_file is not None:
open(args.write_subtitles, "w", encoding="utf-8") sub_file.write(submaker.get_srt())
if args.write_subtitles finally:
else sys.stderr if audio_file is not sys.stdout.buffer:
) audio_file.close()
with sub_file: if sub_file is not None and sub_file is not sys.stderr:
sub_file.write(subs.get_srt()) sub_file.close()
async def amain() -> None: async def amain() -> None: