Files
edge-tts/src/edge_tts/submaker.py
2023-01-05 00:56:15 +02:00

102 lines
2.9 KiB
Python

"""
SubMaker package for the Edge TTS project.
SubMaker is a package that makes the process of creating subtitles with
information provided by the service easier.
"""
import math
from typing import List, Tuple
from xml.sax.saxutils import escape, unescape
def formatter(offset1: float, offset2: float, subdata: str) -> str:
"""
formatter returns the timecode and the text of the subtitle.
"""
return (
f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n"
f"{escape(subdata)}\r\n\r\n"
)
def mktimestamp(time_unit: float) -> str:
"""
mktimestamp returns the timecode of the subtitle.
The timecode is in the format of 00:00:00.000.
Returns:
str: The timecode of the subtitle.
"""
hour = math.floor(time_unit / 10**7 / 3600)
minute = math.floor((time_unit / 10**7 / 60) % 60)
seconds = (time_unit / 10**7) % 60
return f"{hour:02d}:{minute:02d}:{seconds:06.3f}"
class SubMaker:
"""
SubMaker class
"""
def __init__(self, overlapping: int = 1) -> None:
"""
SubMaker constructor.
Args:
overlapping (int): The amount of time in seconds that the
subtitles should overlap.
"""
self.offset: List[Tuple[float, float]] = []
self.subs: List[str] = []
self.overlapping: int = overlapping * (10**7)
def create_sub(self, timestamp: Tuple[float, float], text: str) -> None:
"""
create_sub creates a subtitle with the given timestamp and text
and adds it to the list of subtitles
Args:
timestamp (tuple): The offset and duration of the subtitle.
text (str): The text of the subtitle.
Returns:
None
"""
self.offset.append((timestamp[0], timestamp[0] + timestamp[1]))
self.subs.append(text)
def generate_subs(self) -> str:
"""
generate_subs generates the complete subtitle file.
Returns:
str: The complete subtitle file.
"""
if len(self.subs) == len(self.offset):
data = "WEBVTT\r\n\r\n"
for offset, subs in zip(self.offset, self.subs):
subs = unescape(subs)
split_subs: List[str] = [subs[i : i + 79] for i in range(0, len(subs), 79)]
for i in range(len(split_subs) - 1):
sub = split_subs[i]
split_at_word = True
if sub[-1] == " ":
split_subs[i] = sub[:-1]
split_at_word = False
if sub[0] == " ":
split_subs[i] = sub[1:]
split_at_word = False
if split_at_word:
split_subs[i] += "-"
subs = "\r\n".join(split_subs)
data += formatter(offset[0], offset[1] + self.overlapping, subs)
return data
return ""