""" SubMaker package for the Edge TTS project. SubMaker is a package that makes the process of creating subtitles with information provided by the service easier. """ import math from typing import List, Tuple from xml.sax.saxutils import escape, unescape def formatter(offset1: float, offset2: float, subdata: str) -> str: """ formatter returns the timecode and the text of the subtitle. """ return ( f"{mktimestamp(offset1)} --> {mktimestamp(offset2)}\r\n" f"{escape(subdata)}\r\n\r\n" ) def mktimestamp(time_unit: float) -> str: """ mktimestamp returns the timecode of the subtitle. The timecode is in the format of 00:00:00.000. Returns: str: The timecode of the subtitle. """ hour = math.floor(time_unit / 10**7 / 3600) minute = math.floor((time_unit / 10**7 / 60) % 60) seconds = (time_unit / 10**7) % 60 return f"{hour:02d}:{minute:02d}:{seconds:06.3f}" class SubMaker: """ SubMaker class """ def __init__(self, overlapping: int = 1) -> None: """ SubMaker constructor. Args: overlapping (int): The amount of time in seconds that the subtitles should overlap. """ self.offset: List[Tuple[float, float]] = [] self.subs: List[str] = [] self.overlapping: int = overlapping * (10**7) def create_sub(self, timestamp: Tuple[float, float], text: str) -> None: """ create_sub creates a subtitle with the given timestamp and text and adds it to the list of subtitles Args: timestamp (tuple): The offset and duration of the subtitle. text (str): The text of the subtitle. Returns: None """ self.offset.append((timestamp[0], timestamp[0] + timestamp[1])) self.subs.append(text) def generate_subs(self) -> str: """ generate_subs generates the complete subtitle file. Returns: str: The complete subtitle file. """ if len(self.subs) == len(self.offset): data = "WEBVTT\r\n\r\n" for offset, subs in zip(self.offset, self.subs): subs = unescape(subs) split_subs: List[str] = [subs[i : i + 79] for i in range(0, len(subs), 79)] for i in range(len(split_subs) - 1): sub = split_subs[i] split_at_word = True if sub[-1] == " ": split_subs[i] = sub[:-1] split_at_word = False if sub[0] == " ": split_subs[i] = sub[1:] split_at_word = False if split_at_word: split_subs[i] += "-" subs = "\r\n".join(split_subs) data += formatter(offset[0], offset[1] + self.overlapping, subs) return data return ""