From 36d3006df5453d5e646f7e244bc03c67e3458aa0 Mon Sep 17 00:00:00 2001 From: rany2 Date: Tue, 5 Aug 2025 13:43:57 +0300 Subject: [PATCH] Bundle srt module with edge-tts (#393) Fixes https://github.com/rany2/edge-tts/issues/383 Signed-off-by: rany --- LICENSE | 30 ++++ setup.cfg | 2 +- setup.py | 1 - src/edge_tts/data_classes.py | 2 +- src/edge_tts/srt_composer.py | 294 +++++++++++++++++++++++++++++++++++ src/edge_tts/submaker.py | 22 +-- src/edge_tts/util.py | 2 +- 7 files changed, 338 insertions(+), 15 deletions(-) create mode 100644 src/edge_tts/srt_composer.py diff --git a/LICENSE b/LICENSE index 0a04128..5d9bc34 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,33 @@ +The MIT license is used for 'src/edge_tts/srt_composer.py' only. All +remaining files are licensed under the LGPLv3. + +----------------------------------------------------------------------- + +The MIT License + +Copyright (c) 2014-2023 Christopher Down +Copyright (c) 2025- rany + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +----------------------------------------------------------------------- + GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 diff --git a/setup.cfg b/setup.cfg index 19a6974..62b7557 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,4 +35,4 @@ dev = isort mypy pylint - types-tabulate \ No newline at end of file + types-tabulate diff --git a/setup.py b/setup.py index 3c1c1fd..559bcdc 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,6 @@ setup( install_requires=[ "aiohttp>=3.8.0,<4.0.0", "certifi>=2023.11.17", - "srt>=3.4.1,<4.0.0", "tabulate>=0.4.4,<1.0.0", "typing-extensions>=4.1.0,<5.0.0", ], diff --git a/src/edge_tts/data_classes.py b/src/edge_tts/data_classes.py index ec80194..dd5e60a 100644 --- a/src/edge_tts/data_classes.py +++ b/src/edge_tts/data_classes.py @@ -56,7 +56,7 @@ class TTSConfig: region = match.group(2) name = match.group(3) if name.find("-") != -1: - region = region + "-" + name[: name.find("-")] + region = f"{region}-{name[:name.find('-')]}" name = name[name.find("-") + 1 :] self.voice = ( "Microsoft Server Speech Text to Speech Voice" diff --git a/src/edge_tts/srt_composer.py b/src/edge_tts/srt_composer.py new file mode 100644 index 0000000..187f112 --- /dev/null +++ b/src/edge_tts/srt_composer.py @@ -0,0 +1,294 @@ +"""A tiny library for composing SRT files. + +Based on https://github.com/cdown/srt with parsing, subtitle modifying, +functionality and Python 2 support removed. This is because of +https://github.com/rany2/edge-tts/issues/383. + +Typing support was added, and more Python 3 features were used. + +Copyright (c) 2014-2023 Christopher Down +Copyright (c) 2025- rany + +This file is licensed under the MIT License (MIT). +See the LICENSE-MIT file for details. +""" + +import functools +import logging +import re +from datetime import timedelta +from typing import Generator, List, Union + +LOG = logging.getLogger(__name__) + +MULTI_WS_REGEX = re.compile(r"\n\n+") + +ZERO_TIMEDELTA = timedelta(0) + +# Info message if truthy return -> Function taking a Subtitle, skip if True +SUBTITLE_SKIP_CONDITIONS = ( + ("No content", lambda sub: not sub.content.strip()), + ("Start time < 0 seconds", lambda sub: sub.start < ZERO_TIMEDELTA), + ("Subtitle start time >= end time", lambda sub: sub.start >= sub.end), +) + +SECONDS_IN_HOUR = 3600 +SECONDS_IN_MINUTE = 60 +HOURS_IN_DAY = 24 +MICROSECONDS_IN_MILLISECOND = 1000 + + +@functools.total_ordering +class Subtitle: + r""" + The metadata relating to a single subtitle. Subtitles are sorted by start + time by default. If no index was provided, index 0 will be used on writing + an SRT block. + + :param index: The SRT index for this subtitle + :type index: int or None + :param start: The time that the subtitle should start being shown + :type start: :py:class:`datetime.timedelta` + :param end: The time that the subtitle should stop being shown + :type end: :py:class:`datetime.timedelta` + :param str content: The subtitle content. Should not contain OS-specific + line separators, only \\n. This is taken care of + already if you use :py:func:`srt.parse` to generate + Subtitle objects. + """ + + # pylint: disable=R0913 + def __init__( + self, index: Union[int, None], start: timedelta, end: timedelta, content: str + ) -> None: + self.index = index + self.start = start + self.end = end + self.content = content + + def __hash__(self) -> int: + return hash(frozenset(vars(self).items())) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Subtitle): + return NotImplemented + + return vars(self) == vars(other) + + def __lt__(self, other: object) -> bool: + if not isinstance(other, Subtitle): + return NotImplemented + + return (self.start, self.end, self.index) < ( + other.start, + other.end, + other.index, + ) + + def __repr__(self) -> str: + # Python 2/3 cross compatibility + var_items = getattr(vars(self), "iteritems", getattr(vars(self), "items")) + item_list = ", ".join(f"{k}={v!r}" for k, v in var_items()) + return f"{type(self).__name__}({item_list})" + + def to_srt(self, eol: Union[str, None] = None) -> str: + r""" + Convert the current :py:class:`Subtitle` to an SRT block. + + :param str eol: The end of line string to use (default "\\n") + :returns: The metadata of the current :py:class:`Subtitle` object as an + SRT formatted subtitle block + :rtype: str + """ + output_content = make_legal_content(self.content) + + if eol is None: + eol = "\n" + elif eol != "\n": + output_content = output_content.replace("\n", eol) + + template = "{idx}{eol}{start} --> {end}{eol}{content}{eol}{eol}" + return template.format( + idx=self.index or 0, + start=timedelta_to_srt_timestamp(self.start), + end=timedelta_to_srt_timestamp(self.end), + content=output_content, + eol=eol, + ) + + +def make_legal_content(content: str) -> str: + r""" + Remove illegal content from a content block. Illegal content includes: + + * Blank lines + * Starting or ending with a blank line + + .. doctest:: + + >>> make_legal_content('\nfoo\n\nbar\n') + 'foo\nbar' + + :param str content: The content to make legal + :returns: The legalised content + :rtype: srt + """ + # Optimisation: Usually the content we get is legally valid. Do a quick + # check to see if we really need to do anything here. This saves time from + # generating legal_content by about 50%. + if content and content[0] != "\n" and "\n\n" not in content: + return content + + legal_content = MULTI_WS_REGEX.sub("\n", content.strip("\n")) + LOG.info("Legalised content %r to %r", content, legal_content) + return legal_content + + +def timedelta_to_srt_timestamp(timedelta_timestamp: timedelta) -> str: + r""" + Convert a :py:class:`~datetime.timedelta` to an SRT timestamp. + + .. doctest:: + + >>> import datetime + >>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4) + >>> timedelta_to_srt_timestamp(delta) + '01:23:04,000' + + :param datetime.timedelta timedelta_timestamp: A datetime to convert to an + SRT timestamp + :returns: The timestamp in SRT format + :rtype: str + """ + + hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR) + hrs += timedelta_timestamp.days * HOURS_IN_DAY + mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE) + msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND + return f"{int(hrs):02}:{int(mins):02}:{int(secs):02},{int(msecs):03}" + + +def sort_and_reindex( + subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]], + start_index: int = 1, + in_place: bool = False, + skip: bool = True, +) -> Generator[Subtitle, None, None]: + """ + Reorder subtitles to be sorted by start time order, and rewrite the indexes + to be in that same order. This ensures that the SRT file will play in an + expected fashion after, for example, times were changed in some subtitles + and they may need to be resorted. + + If skip=True, subtitles will also be skipped if they are considered not to + be useful. Currently, the conditions to be considered "not useful" are as + follows: + + - Content is empty, or only whitespace + - The start time is negative + - The start time is equal to or later than the end time + + .. doctest:: + + >>> from datetime import timedelta + >>> one = timedelta(seconds=1) + >>> two = timedelta(seconds=2) + >>> three = timedelta(seconds=3) + >>> subs = [ + ... Subtitle(index=999, start=one, end=two, content='1'), + ... Subtitle(index=0, start=two, end=three, content='2'), + ... ] + >>> list(sort_and_reindex(subs)) # doctest: +ELLIPSIS + [Subtitle(...index=1...), Subtitle(...index=2...)] + + :param subtitles: :py:class:`Subtitle` objects in any order + :param int start_index: The index to start from + :param bool in_place: Whether to modify subs in-place for performance + (version <=1.0.0 behaviour) + :param bool skip: Whether to skip subtitles considered not useful (see + above for rules) + :returns: The sorted subtitles + :rtype: :term:`generator` of :py:class:`Subtitle` objects + """ + skipped_subs = 0 + for sub_num, subtitle in enumerate(sorted(subtitles), start=start_index): + if not in_place: + subtitle = Subtitle(**vars(subtitle)) + + if skip: + try: + _should_skip_sub(subtitle) + except _ShouldSkipException as thrown_exc: + if subtitle.index is None: + LOG.info("Skipped subtitle with no index: %s", thrown_exc) + else: + LOG.info( + "Skipped subtitle at index %d: %s", subtitle.index, thrown_exc + ) + skipped_subs += 1 + continue + + subtitle.index = sub_num - skipped_subs + + yield subtitle + + +def _should_skip_sub(subtitle: Subtitle) -> None: + """ + Check if a subtitle should be skipped based on the rules in + SUBTITLE_SKIP_CONDITIONS. + + :param subtitle: A :py:class:`Subtitle` to check whether to skip + :raises _ShouldSkipException: If the subtitle should be skipped + """ + for info_msg, sub_skipper in SUBTITLE_SKIP_CONDITIONS: + if sub_skipper(subtitle): + raise _ShouldSkipException(info_msg) + + +def compose( + subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]], + reindex: bool = True, + start_index: int = 1, + eol: Union[str, None] = None, + in_place: bool = False, +) -> str: + r""" + Convert an iterator of :py:class:`Subtitle` objects to a string of joined + SRT blocks. + + .. doctest:: + + >>> from datetime import timedelta + >>> start = timedelta(seconds=1) + >>> end = timedelta(seconds=2) + >>> subs = [ + ... Subtitle(index=1, start=start, end=end, content='x'), + ... Subtitle(index=2, start=start, end=end, content='y'), + ... ] + >>> compose(subs) # doctest: +ELLIPSIS + '1\n00:00:01,000 --> 00:00:02,000\nx\n\n2\n00:00:01,000 --> ...' + + :param subtitles: The subtitles to convert to SRT blocks + :type subtitles: :term:`iterator` of :py:class:`Subtitle` objects + :param bool reindex: Whether to reindex subtitles based on start time + :param int start_index: If reindexing, the index to start reindexing from + :param str eol: The end of line string to use (default "\\n") + :returns: A single SRT formatted string, with each input + :py:class:`Subtitle` represented as an SRT block + :param bool in_place: Whether to reindex subs in-place for performance + (version <=1.0.0 behaviour) + :rtype: str + """ + if reindex: + subtitles = sort_and_reindex( + subtitles, start_index=start_index, in_place=in_place + ) + + return "".join(subtitle.to_srt(eol=eol) for subtitle in subtitles) + + +class _ShouldSkipException(Exception): + """ + Raised when a subtitle should be skipped. + """ diff --git a/src/edge_tts/submaker.py b/src/edge_tts/submaker.py index 56d2a24..71a3f22 100644 --- a/src/edge_tts/submaker.py +++ b/src/edge_tts/submaker.py @@ -1,9 +1,9 @@ """SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events.""" +from datetime import timedelta from typing import List -import srt # type: ignore - +from .srt_composer import Subtitle, compose from .typing import TTSChunk @@ -13,7 +13,7 @@ class SubMaker: """ def __init__(self) -> None: - self.cues: List[srt.Subtitle] = [] # type: ignore + self.cues: List[Subtitle] = [] def feed(self, msg: TTSChunk) -> None: """ @@ -29,10 +29,10 @@ class SubMaker: raise ValueError("Invalid message type, expected 'WordBoundary'") self.cues.append( - srt.Subtitle( + Subtitle( index=len(self.cues) + 1, - start=srt.timedelta(microseconds=msg["offset"] / 10), - end=srt.timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10), + start=timedelta(microseconds=msg["offset"] / 10), + end=timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10), content=msg["text"], ) ) @@ -53,15 +53,15 @@ class SubMaker: if len(self.cues) == 0: return - new_cues: List[srt.Subtitle] = [] # type: ignore - current_cue: srt.Subtitle = self.cues[0] # type: ignore + new_cues: List[Subtitle] = [] + current_cue: Subtitle = self.cues[0] for cue in self.cues[1:]: if len(current_cue.content.split()) < words: - current_cue = srt.Subtitle( + current_cue = Subtitle( index=current_cue.index, start=current_cue.start, end=cue.end, - content=current_cue.content + " " + cue.content, + content=f"{current_cue.content} {cue.content}", ) else: new_cues.append(current_cue) @@ -76,7 +76,7 @@ class SubMaker: Returns: str: The SRT formatted subtitles. """ - return srt.compose(self.cues) # type: ignore + return compose(self.cues) def __str__(self) -> str: return self.get_srt() diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py index 180e262..d6fe3eb 100644 --- a/src/edge_tts/util.py +++ b/src/edge_tts/util.py @@ -134,7 +134,7 @@ async def amain() -> None: if args.file in ("-", "/dev/stdin"): args.text = sys.stdin.read() else: - with open(args.file, "r", encoding="utf-8") as file: + with open(args.file, encoding="utf-8") as file: args.text = file.read() if args.text is not None: