Bundle srt module with edge-tts (#393)

Fixes https://github.com/rany2/edge-tts/issues/383 Signed-off-by: rany <rany2@riseup.net>
2025-08-05 13:43:57 +03:00
parent f78c8ece0a
commit 36d3006df5
7 changed files with 338 additions and 15 deletions
--- a/30
+++ b/30
@@ -1,3 +1,33 @@
+The MIT license is used for 'src/edge_tts/srt_composer.py' only. All
+remaining files are licensed under the LGPLv3.
+
+-----------------------------------------------------------------------
+
+The MIT License
+
+Copyright (c) 2014-2023 Christopher Down
+Copyright (c) 2025- rany <rany@riseup.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+-----------------------------------------------------------------------
+
                   GNU LESSER GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

--- a/setup.cfg
+++ b/setup.cfg
@@ -35,4 +35,4 @@ dev =
    isort
    mypy
    pylint
-    types-tabulate
+    types-tabulate
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,6 @@ setup(
    install_requires=[
        "aiohttp>=3.8.0,<4.0.0",
        "certifi>=2023.11.17",
-        "srt>=3.4.1,<4.0.0",
        "tabulate>=0.4.4,<1.0.0",
        "typing-extensions>=4.1.0,<5.0.0",
    ],
--- a/src/edge_tts/data_classes.py
+++ b/src/edge_tts/data_classes.py
@@ -56,7 +56,7 @@ class TTSConfig:
            region = match.group(2)
            name = match.group(3)
            if name.find("-") != -1:
-                region = region + "-" + name[: name.find("-")]
+                region = f"{region}-{name[:name.find('-')]}"
                name = name[name.find("-") + 1 :]
            self.voice = (
                "Microsoft Server Speech Text to Speech Voice"
--- a/src/edge_tts/srt_composer.py
+++ b/src/edge_tts/srt_composer.py
@@ -0,0 +1,294 @@
+"""A tiny library for composing SRT files.
+
+Based on https://github.com/cdown/srt with parsing, subtitle modifying,
+functionality and Python 2 support removed. This is because of
+https://github.com/rany2/edge-tts/issues/383.
+
+Typing support was added, and more Python 3 features were used.
+
+Copyright (c) 2014-2023 Christopher Down
+Copyright (c) 2025- rany <rany@riseup.net>
+
+This file is licensed under the MIT License (MIT).
+See the LICENSE-MIT file for details.
+"""
+
+import functools
+import logging
+import re
+from datetime import timedelta
+from typing import Generator, List, Union
+
+LOG = logging.getLogger(__name__)
+
+MULTI_WS_REGEX = re.compile(r"\n\n+")
+
+ZERO_TIMEDELTA = timedelta(0)
+
+# Info message if truthy return -> Function taking a Subtitle, skip if True
+SUBTITLE_SKIP_CONDITIONS = (
+    ("No content", lambda sub: not sub.content.strip()),
+    ("Start time < 0 seconds", lambda sub: sub.start < ZERO_TIMEDELTA),
+    ("Subtitle start time >= end time", lambda sub: sub.start >= sub.end),
+)
+
+SECONDS_IN_HOUR = 3600
+SECONDS_IN_MINUTE = 60
+HOURS_IN_DAY = 24
+MICROSECONDS_IN_MILLISECOND = 1000
+
+
+@functools.total_ordering
+class Subtitle:
+    r"""
+    The metadata relating to a single subtitle. Subtitles are sorted by start
+    time by default. If no index was provided, index 0 will be used on writing
+    an SRT block.
+
+    :param index: The SRT index for this subtitle
+    :type index: int or None
+    :param start: The time that the subtitle should start being shown
+    :type start: :py:class:`datetime.timedelta`
+    :param end: The time that the subtitle should stop being shown
+    :type end: :py:class:`datetime.timedelta`
+    :param str content: The subtitle content. Should not contain OS-specific
+                        line separators, only \\n. This is taken care of
+                        already if you use :py:func:`srt.parse` to generate
+                        Subtitle objects.
+    """
+
+    # pylint: disable=R0913
+    def __init__(
+        self, index: Union[int, None], start: timedelta, end: timedelta, content: str
+    ) -> None:
+        self.index = index
+        self.start = start
+        self.end = end
+        self.content = content
+
+    def __hash__(self) -> int:
+        return hash(frozenset(vars(self).items()))
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Subtitle):
+            return NotImplemented
+
+        return vars(self) == vars(other)
+
+    def __lt__(self, other: object) -> bool:
+        if not isinstance(other, Subtitle):
+            return NotImplemented
+
+        return (self.start, self.end, self.index) < (
+            other.start,
+            other.end,
+            other.index,
+        )
+
+    def __repr__(self) -> str:
+        # Python 2/3 cross compatibility
+        var_items = getattr(vars(self), "iteritems", getattr(vars(self), "items"))
+        item_list = ", ".join(f"{k}={v!r}" for k, v in var_items())
+        return f"{type(self).__name__}({item_list})"
+
+    def to_srt(self, eol: Union[str, None] = None) -> str:
+        r"""
+        Convert the current :py:class:`Subtitle` to an SRT block.
+
+        :param str eol: The end of line string to use (default "\\n")
+        :returns: The metadata of the current :py:class:`Subtitle` object as an
+                  SRT formatted subtitle block
+        :rtype: str
+        """
+        output_content = make_legal_content(self.content)
+
+        if eol is None:
+            eol = "\n"
+        elif eol != "\n":
+            output_content = output_content.replace("\n", eol)
+
+        template = "{idx}{eol}{start} --> {end}{eol}{content}{eol}{eol}"
+        return template.format(
+            idx=self.index or 0,
+            start=timedelta_to_srt_timestamp(self.start),
+            end=timedelta_to_srt_timestamp(self.end),
+            content=output_content,
+            eol=eol,
+        )
+
+
+def make_legal_content(content: str) -> str:
+    r"""
+    Remove illegal content from a content block. Illegal content includes:
+
+    * Blank lines
+    * Starting or ending with a blank line
+
+    .. doctest::
+
+        >>> make_legal_content('\nfoo\n\nbar\n')
+        'foo\nbar'
+
+    :param str content: The content to make legal
+    :returns: The legalised content
+    :rtype: srt
+    """
+    # Optimisation: Usually the content we get is legally valid. Do a quick
+    # check to see if we really need to do anything here. This saves time from
+    # generating legal_content by about 50%.
+    if content and content[0] != "\n" and "\n\n" not in content:
+        return content
+
+    legal_content = MULTI_WS_REGEX.sub("\n", content.strip("\n"))
+    LOG.info("Legalised content %r to %r", content, legal_content)
+    return legal_content
+
+
+def timedelta_to_srt_timestamp(timedelta_timestamp: timedelta) -> str:
+    r"""
+    Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
+
+    .. doctest::
+
+        >>> import datetime
+        >>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
+        >>> timedelta_to_srt_timestamp(delta)
+        '01:23:04,000'
+
+    :param datetime.timedelta timedelta_timestamp: A datetime to convert to an
+                                                   SRT timestamp
+    :returns: The timestamp in SRT format
+    :rtype: str
+    """
+
+    hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
+    hrs += timedelta_timestamp.days * HOURS_IN_DAY
+    mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
+    msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
+    return f"{int(hrs):02}:{int(mins):02}:{int(secs):02},{int(msecs):03}"
+
+
+def sort_and_reindex(
+    subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
+    start_index: int = 1,
+    in_place: bool = False,
+    skip: bool = True,
+) -> Generator[Subtitle, None, None]:
+    """
+    Reorder subtitles to be sorted by start time order, and rewrite the indexes
+    to be in that same order. This ensures that the SRT file will play in an
+    expected fashion after, for example, times were changed in some subtitles
+    and they may need to be resorted.
+
+    If skip=True, subtitles will also be skipped if they are considered not to
+    be useful. Currently, the conditions to be considered "not useful" are as
+    follows:
+
+    - Content is empty, or only whitespace
+    - The start time is negative
+    - The start time is equal to or later than the end time
+
+    .. doctest::
+
+        >>> from datetime import timedelta
+        >>> one = timedelta(seconds=1)
+        >>> two = timedelta(seconds=2)
+        >>> three = timedelta(seconds=3)
+        >>> subs = [
+        ...     Subtitle(index=999, start=one, end=two, content='1'),
+        ...     Subtitle(index=0, start=two, end=three, content='2'),
+        ... ]
+        >>> list(sort_and_reindex(subs))  # doctest: +ELLIPSIS
+        [Subtitle(...index=1...), Subtitle(...index=2...)]
+
+    :param subtitles: :py:class:`Subtitle` objects in any order
+    :param int start_index: The index to start from
+    :param bool in_place: Whether to modify subs in-place for performance
+                          (version <=1.0.0 behaviour)
+    :param bool skip: Whether to skip subtitles considered not useful (see
+                      above for rules)
+    :returns: The sorted subtitles
+    :rtype: :term:`generator` of :py:class:`Subtitle` objects
+    """
+    skipped_subs = 0
+    for sub_num, subtitle in enumerate(sorted(subtitles), start=start_index):
+        if not in_place:
+            subtitle = Subtitle(**vars(subtitle))
+
+        if skip:
+            try:
+                _should_skip_sub(subtitle)
+            except _ShouldSkipException as thrown_exc:
+                if subtitle.index is None:
+                    LOG.info("Skipped subtitle with no index: %s", thrown_exc)
+                else:
+                    LOG.info(
+                        "Skipped subtitle at index %d: %s", subtitle.index, thrown_exc
+                    )
+                skipped_subs += 1
+                continue
+
+        subtitle.index = sub_num - skipped_subs
+
+        yield subtitle
+
+
+def _should_skip_sub(subtitle: Subtitle) -> None:
+    """
+    Check if a subtitle should be skipped based on the rules in
+    SUBTITLE_SKIP_CONDITIONS.
+
+    :param subtitle: A :py:class:`Subtitle` to check whether to skip
+    :raises _ShouldSkipException: If the subtitle should be skipped
+    """
+    for info_msg, sub_skipper in SUBTITLE_SKIP_CONDITIONS:
+        if sub_skipper(subtitle):
+            raise _ShouldSkipException(info_msg)
+
+
+def compose(
+    subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
+    reindex: bool = True,
+    start_index: int = 1,
+    eol: Union[str, None] = None,
+    in_place: bool = False,
+) -> str:
+    r"""
+    Convert an iterator of :py:class:`Subtitle` objects to a string of joined
+    SRT blocks.
+
+    .. doctest::
+
+        >>> from datetime import timedelta
+        >>> start = timedelta(seconds=1)
+        >>> end = timedelta(seconds=2)
+        >>> subs = [
+        ...     Subtitle(index=1, start=start, end=end, content='x'),
+        ...     Subtitle(index=2, start=start, end=end, content='y'),
+        ... ]
+        >>> compose(subs)  # doctest: +ELLIPSIS
+        '1\n00:00:01,000 --> 00:00:02,000\nx\n\n2\n00:00:01,000 --> ...'
+
+    :param subtitles: The subtitles to convert to SRT blocks
+    :type subtitles: :term:`iterator` of :py:class:`Subtitle` objects
+    :param bool reindex: Whether to reindex subtitles based on start time
+    :param int start_index: If reindexing, the index to start reindexing from
+    :param str eol: The end of line string to use (default "\\n")
+    :returns: A single SRT formatted string, with each input
+              :py:class:`Subtitle` represented as an SRT block
+    :param bool in_place: Whether to reindex subs in-place for performance
+                          (version <=1.0.0 behaviour)
+    :rtype: str
+    """
+    if reindex:
+        subtitles = sort_and_reindex(
+            subtitles, start_index=start_index, in_place=in_place
+        )
+
+    return "".join(subtitle.to_srt(eol=eol) for subtitle in subtitles)
+
+
+class _ShouldSkipException(Exception):
+    """
+    Raised when a subtitle should be skipped.
+    """
--- a/src/edge_tts/submaker.py
+++ b/src/edge_tts/submaker.py
@@ -1,9 +1,9 @@
 """SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events."""

+from datetime import timedelta
 from typing import List

-import srt  # type: ignore
-
+from .srt_composer import Subtitle, compose
 from .typing import TTSChunk


@@ -13,7 +13,7 @@ class SubMaker:
    """

    def __init__(self) -> None:
-        self.cues: List[srt.Subtitle] = []  # type: ignore
+        self.cues: List[Subtitle] = []

    def feed(self, msg: TTSChunk) -> None:
        """
@@ -29,10 +29,10 @@ class SubMaker:
            raise ValueError("Invalid message type, expected 'WordBoundary'")

        self.cues.append(
-            srt.Subtitle(
+            Subtitle(
                index=len(self.cues) + 1,
-                start=srt.timedelta(microseconds=msg["offset"] / 10),
-                end=srt.timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10),
+                start=timedelta(microseconds=msg["offset"] / 10),
+                end=timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10),
                content=msg["text"],
            )
        )
@@ -53,15 +53,15 @@ class SubMaker:
        if len(self.cues) == 0:
            return

-        new_cues: List[srt.Subtitle] = []  # type: ignore
-        current_cue: srt.Subtitle = self.cues[0]  # type: ignore
+        new_cues: List[Subtitle] = []
+        current_cue: Subtitle = self.cues[0]
        for cue in self.cues[1:]:
            if len(current_cue.content.split()) < words:
-                current_cue = srt.Subtitle(
+                current_cue = Subtitle(
                    index=current_cue.index,
                    start=current_cue.start,
                    end=cue.end,
-                    content=current_cue.content + " " + cue.content,
+                    content=f"{current_cue.content} {cue.content}",
                )
            else:
                new_cues.append(current_cue)
@@ -76,7 +76,7 @@ class SubMaker:
        Returns:
            str: The SRT formatted subtitles.
        """
-        return srt.compose(self.cues)  # type: ignore
+        return compose(self.cues)

    def __str__(self) -> str:
        return self.get_srt()
--- a/src/edge_tts/util.py
+++ b/src/edge_tts/util.py
@@ -134,7 +134,7 @@ async def amain() -> None:
        if args.file in ("-", "/dev/stdin"):
            args.text = sys.stdin.read()
        else:
-            with open(args.file, "r", encoding="utf-8") as file:
+            with open(args.file, encoding="utf-8") as file:
                args.text = file.read()

    if args.text is not None: