Cleanup subtitle related code and make it easier to use SubMaker (#329)
Also don't output subtitles to STDERR by default. Signed-off-by: rany <rany2@riseup.net>
This commit is contained in:
@@ -26,7 +26,7 @@ async def amain() -> None:
|
|||||||
if chunk["type"] == "audio":
|
if chunk["type"] == "audio":
|
||||||
file.write(chunk["data"])
|
file.write(chunk["data"])
|
||||||
elif chunk["type"] == "WordBoundary":
|
elif chunk["type"] == "WordBoundary":
|
||||||
submaker.add_cue((chunk["offset"], chunk["duration"]), chunk["text"])
|
submaker.feed(chunk)
|
||||||
|
|
||||||
with open(SRT_FILE, "w", encoding="utf-8") as file:
|
with open(SRT_FILE, "w", encoding="utf-8") as file:
|
||||||
file.write(submaker.get_srt())
|
file.write(submaker.get_srt())
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
"""SubMaker module is used to generate subtitles from WordBoundary events."""
|
"""SubMaker module is used to generate subtitles from WordBoundary events."""
|
||||||
|
|
||||||
from typing import List, Tuple
|
from typing import List
|
||||||
|
|
||||||
import srt # type: ignore
|
import srt # type: ignore
|
||||||
|
|
||||||
|
from .typing import TTSChunk
|
||||||
|
|
||||||
|
|
||||||
class SubMaker:
|
class SubMaker:
|
||||||
"""
|
"""
|
||||||
@@ -13,23 +15,25 @@ class SubMaker:
|
|||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.cues: List[srt.Subtitle] = [] # type: ignore
|
self.cues: List[srt.Subtitle] = [] # type: ignore
|
||||||
|
|
||||||
def add_cue(self, timestamp: Tuple[float, float], text: str) -> None:
|
def feed(self, msg: TTSChunk) -> None:
|
||||||
"""
|
"""
|
||||||
Add a cue to the SubMaker object.
|
Feed a WordBoundary message to the SubMaker object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
timestamp (tuple): The offset and duration of the subtitle.
|
msg (dict): The WordBoundary message.
|
||||||
text (str): The text of the subtitle.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
|
if msg["type"] != "WordBoundary":
|
||||||
|
raise ValueError("Invalid message type, expected 'WordBoundary'")
|
||||||
|
|
||||||
self.cues.append(
|
self.cues.append(
|
||||||
srt.Subtitle(
|
srt.Subtitle(
|
||||||
index=len(self.cues) + 1,
|
index=len(self.cues) + 1,
|
||||||
start=srt.timedelta(microseconds=timestamp[0] / 10),
|
start=srt.timedelta(microseconds=msg["duration"] / 10),
|
||||||
end=srt.timedelta(microseconds=sum(timestamp) / 10),
|
end=srt.timedelta(microseconds=(msg["duration"] + msg["offset"]) / 10),
|
||||||
content=text,
|
content=msg["text"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -41,3 +45,6 @@ class SubMaker:
|
|||||||
str: The SRT formatted subtitles.
|
str: The SRT formatted subtitles.
|
||||||
"""
|
"""
|
||||||
return srt.compose(self.cues) # type: ignore
|
return srt.compose(self.cues) # type: ignore
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.get_srt()
|
||||||
|
|||||||
@@ -3,8 +3,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
import sys
|
import sys
|
||||||
from io import TextIOWrapper
|
from typing import Any, Optional, TextIO
|
||||||
from typing import Any, TextIO, Union
|
|
||||||
|
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
|
|
||||||
@@ -45,31 +44,42 @@ async def _run_tts(args: Any) -> None:
|
|||||||
print("\nOperation canceled.", file=sys.stderr)
|
print("\nOperation canceled.", file=sys.stderr)
|
||||||
return
|
return
|
||||||
|
|
||||||
tts: Communicate = Communicate(
|
communicate = Communicate(
|
||||||
args.text,
|
args.text,
|
||||||
args.voice,
|
args.voice,
|
||||||
proxy=args.proxy,
|
|
||||||
rate=args.rate,
|
rate=args.rate,
|
||||||
volume=args.volume,
|
volume=args.volume,
|
||||||
pitch=args.pitch,
|
pitch=args.pitch,
|
||||||
|
proxy=args.proxy,
|
||||||
)
|
)
|
||||||
subs: SubMaker = SubMaker()
|
submaker = SubMaker()
|
||||||
with (
|
try:
|
||||||
open(args.write_media, "wb") if args.write_media else sys.stdout.buffer
|
audio_file = (
|
||||||
) as audio_file:
|
open(args.write_media, "wb")
|
||||||
async for chunk in tts.stream():
|
if args.write_media is not None and args.write_media != "-"
|
||||||
|
else sys.stdout.buffer
|
||||||
|
)
|
||||||
|
sub_file: Optional[TextIO] = (
|
||||||
|
open(args.write_subtitles, "w", encoding="utf-8")
|
||||||
|
if args.write_subtitles is not None and args.write_subtitles != "-"
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
if sub_file is None and args.write_subtitles == "-":
|
||||||
|
sub_file = sys.stderr
|
||||||
|
|
||||||
|
async for chunk in communicate.stream():
|
||||||
if chunk["type"] == "audio":
|
if chunk["type"] == "audio":
|
||||||
audio_file.write(chunk["data"])
|
audio_file.write(chunk["data"])
|
||||||
elif chunk["type"] == "WordBoundary":
|
elif chunk["type"] == "WordBoundary":
|
||||||
subs.add_cue((chunk["offset"], chunk["duration"]), chunk["text"])
|
submaker.feed(chunk)
|
||||||
|
|
||||||
sub_file: Union[TextIOWrapper, TextIO] = (
|
if sub_file is not None:
|
||||||
open(args.write_subtitles, "w", encoding="utf-8")
|
sub_file.write(submaker.get_srt())
|
||||||
if args.write_subtitles
|
finally:
|
||||||
else sys.stderr
|
if audio_file is not sys.stdout.buffer:
|
||||||
)
|
audio_file.close()
|
||||||
with sub_file:
|
if sub_file is not None and sub_file is not sys.stderr:
|
||||||
sub_file.write(subs.get_srt())
|
sub_file.close()
|
||||||
|
|
||||||
|
|
||||||
async def amain() -> None:
|
async def amain() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user