Bundle srt module with edge-tts (#393)
Fixes https://github.com/rany2/edge-tts/issues/383 Signed-off-by: rany <rany2@riseup.net>
This commit is contained in:
30
LICENSE
30
LICENSE
@@ -1,3 +1,33 @@
|
||||
The MIT license is used for 'src/edge_tts/srt_composer.py' only. All
|
||||
remaining files are licensed under the LGPLv3.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2014-2023 Christopher Down
|
||||
Copyright (c) 2025- rany <rany@riseup.net>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
|
||||
GNU LESSER GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
|
||||
1
setup.py
1
setup.py
@@ -6,7 +6,6 @@ setup(
|
||||
install_requires=[
|
||||
"aiohttp>=3.8.0,<4.0.0",
|
||||
"certifi>=2023.11.17",
|
||||
"srt>=3.4.1,<4.0.0",
|
||||
"tabulate>=0.4.4,<1.0.0",
|
||||
"typing-extensions>=4.1.0,<5.0.0",
|
||||
],
|
||||
|
||||
@@ -56,7 +56,7 @@ class TTSConfig:
|
||||
region = match.group(2)
|
||||
name = match.group(3)
|
||||
if name.find("-") != -1:
|
||||
region = region + "-" + name[: name.find("-")]
|
||||
region = f"{region}-{name[:name.find('-')]}"
|
||||
name = name[name.find("-") + 1 :]
|
||||
self.voice = (
|
||||
"Microsoft Server Speech Text to Speech Voice"
|
||||
|
||||
294
src/edge_tts/srt_composer.py
Normal file
294
src/edge_tts/srt_composer.py
Normal file
@@ -0,0 +1,294 @@
|
||||
"""A tiny library for composing SRT files.
|
||||
|
||||
Based on https://github.com/cdown/srt with parsing, subtitle modifying,
|
||||
functionality and Python 2 support removed. This is because of
|
||||
https://github.com/rany2/edge-tts/issues/383.
|
||||
|
||||
Typing support was added, and more Python 3 features were used.
|
||||
|
||||
Copyright (c) 2014-2023 Christopher Down
|
||||
Copyright (c) 2025- rany <rany@riseup.net>
|
||||
|
||||
This file is licensed under the MIT License (MIT).
|
||||
See the LICENSE-MIT file for details.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import re
|
||||
from datetime import timedelta
|
||||
from typing import Generator, List, Union
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
MULTI_WS_REGEX = re.compile(r"\n\n+")
|
||||
|
||||
ZERO_TIMEDELTA = timedelta(0)
|
||||
|
||||
# Info message if truthy return -> Function taking a Subtitle, skip if True
|
||||
SUBTITLE_SKIP_CONDITIONS = (
|
||||
("No content", lambda sub: not sub.content.strip()),
|
||||
("Start time < 0 seconds", lambda sub: sub.start < ZERO_TIMEDELTA),
|
||||
("Subtitle start time >= end time", lambda sub: sub.start >= sub.end),
|
||||
)
|
||||
|
||||
SECONDS_IN_HOUR = 3600
|
||||
SECONDS_IN_MINUTE = 60
|
||||
HOURS_IN_DAY = 24
|
||||
MICROSECONDS_IN_MILLISECOND = 1000
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
class Subtitle:
|
||||
r"""
|
||||
The metadata relating to a single subtitle. Subtitles are sorted by start
|
||||
time by default. If no index was provided, index 0 will be used on writing
|
||||
an SRT block.
|
||||
|
||||
:param index: The SRT index for this subtitle
|
||||
:type index: int or None
|
||||
:param start: The time that the subtitle should start being shown
|
||||
:type start: :py:class:`datetime.timedelta`
|
||||
:param end: The time that the subtitle should stop being shown
|
||||
:type end: :py:class:`datetime.timedelta`
|
||||
:param str content: The subtitle content. Should not contain OS-specific
|
||||
line separators, only \\n. This is taken care of
|
||||
already if you use :py:func:`srt.parse` to generate
|
||||
Subtitle objects.
|
||||
"""
|
||||
|
||||
# pylint: disable=R0913
|
||||
def __init__(
|
||||
self, index: Union[int, None], start: timedelta, end: timedelta, content: str
|
||||
) -> None:
|
||||
self.index = index
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.content = content
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(frozenset(vars(self).items()))
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, Subtitle):
|
||||
return NotImplemented
|
||||
|
||||
return vars(self) == vars(other)
|
||||
|
||||
def __lt__(self, other: object) -> bool:
|
||||
if not isinstance(other, Subtitle):
|
||||
return NotImplemented
|
||||
|
||||
return (self.start, self.end, self.index) < (
|
||||
other.start,
|
||||
other.end,
|
||||
other.index,
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
# Python 2/3 cross compatibility
|
||||
var_items = getattr(vars(self), "iteritems", getattr(vars(self), "items"))
|
||||
item_list = ", ".join(f"{k}={v!r}" for k, v in var_items())
|
||||
return f"{type(self).__name__}({item_list})"
|
||||
|
||||
def to_srt(self, eol: Union[str, None] = None) -> str:
|
||||
r"""
|
||||
Convert the current :py:class:`Subtitle` to an SRT block.
|
||||
|
||||
:param str eol: The end of line string to use (default "\\n")
|
||||
:returns: The metadata of the current :py:class:`Subtitle` object as an
|
||||
SRT formatted subtitle block
|
||||
:rtype: str
|
||||
"""
|
||||
output_content = make_legal_content(self.content)
|
||||
|
||||
if eol is None:
|
||||
eol = "\n"
|
||||
elif eol != "\n":
|
||||
output_content = output_content.replace("\n", eol)
|
||||
|
||||
template = "{idx}{eol}{start} --> {end}{eol}{content}{eol}{eol}"
|
||||
return template.format(
|
||||
idx=self.index or 0,
|
||||
start=timedelta_to_srt_timestamp(self.start),
|
||||
end=timedelta_to_srt_timestamp(self.end),
|
||||
content=output_content,
|
||||
eol=eol,
|
||||
)
|
||||
|
||||
|
||||
def make_legal_content(content: str) -> str:
|
||||
r"""
|
||||
Remove illegal content from a content block. Illegal content includes:
|
||||
|
||||
* Blank lines
|
||||
* Starting or ending with a blank line
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> make_legal_content('\nfoo\n\nbar\n')
|
||||
'foo\nbar'
|
||||
|
||||
:param str content: The content to make legal
|
||||
:returns: The legalised content
|
||||
:rtype: srt
|
||||
"""
|
||||
# Optimisation: Usually the content we get is legally valid. Do a quick
|
||||
# check to see if we really need to do anything here. This saves time from
|
||||
# generating legal_content by about 50%.
|
||||
if content and content[0] != "\n" and "\n\n" not in content:
|
||||
return content
|
||||
|
||||
legal_content = MULTI_WS_REGEX.sub("\n", content.strip("\n"))
|
||||
LOG.info("Legalised content %r to %r", content, legal_content)
|
||||
return legal_content
|
||||
|
||||
|
||||
def timedelta_to_srt_timestamp(timedelta_timestamp: timedelta) -> str:
|
||||
r"""
|
||||
Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> import datetime
|
||||
>>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
|
||||
>>> timedelta_to_srt_timestamp(delta)
|
||||
'01:23:04,000'
|
||||
|
||||
:param datetime.timedelta timedelta_timestamp: A datetime to convert to an
|
||||
SRT timestamp
|
||||
:returns: The timestamp in SRT format
|
||||
:rtype: str
|
||||
"""
|
||||
|
||||
hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
|
||||
hrs += timedelta_timestamp.days * HOURS_IN_DAY
|
||||
mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
|
||||
msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
|
||||
return f"{int(hrs):02}:{int(mins):02}:{int(secs):02},{int(msecs):03}"
|
||||
|
||||
|
||||
def sort_and_reindex(
|
||||
subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
|
||||
start_index: int = 1,
|
||||
in_place: bool = False,
|
||||
skip: bool = True,
|
||||
) -> Generator[Subtitle, None, None]:
|
||||
"""
|
||||
Reorder subtitles to be sorted by start time order, and rewrite the indexes
|
||||
to be in that same order. This ensures that the SRT file will play in an
|
||||
expected fashion after, for example, times were changed in some subtitles
|
||||
and they may need to be resorted.
|
||||
|
||||
If skip=True, subtitles will also be skipped if they are considered not to
|
||||
be useful. Currently, the conditions to be considered "not useful" are as
|
||||
follows:
|
||||
|
||||
- Content is empty, or only whitespace
|
||||
- The start time is negative
|
||||
- The start time is equal to or later than the end time
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> from datetime import timedelta
|
||||
>>> one = timedelta(seconds=1)
|
||||
>>> two = timedelta(seconds=2)
|
||||
>>> three = timedelta(seconds=3)
|
||||
>>> subs = [
|
||||
... Subtitle(index=999, start=one, end=two, content='1'),
|
||||
... Subtitle(index=0, start=two, end=three, content='2'),
|
||||
... ]
|
||||
>>> list(sort_and_reindex(subs)) # doctest: +ELLIPSIS
|
||||
[Subtitle(...index=1...), Subtitle(...index=2...)]
|
||||
|
||||
:param subtitles: :py:class:`Subtitle` objects in any order
|
||||
:param int start_index: The index to start from
|
||||
:param bool in_place: Whether to modify subs in-place for performance
|
||||
(version <=1.0.0 behaviour)
|
||||
:param bool skip: Whether to skip subtitles considered not useful (see
|
||||
above for rules)
|
||||
:returns: The sorted subtitles
|
||||
:rtype: :term:`generator` of :py:class:`Subtitle` objects
|
||||
"""
|
||||
skipped_subs = 0
|
||||
for sub_num, subtitle in enumerate(sorted(subtitles), start=start_index):
|
||||
if not in_place:
|
||||
subtitle = Subtitle(**vars(subtitle))
|
||||
|
||||
if skip:
|
||||
try:
|
||||
_should_skip_sub(subtitle)
|
||||
except _ShouldSkipException as thrown_exc:
|
||||
if subtitle.index is None:
|
||||
LOG.info("Skipped subtitle with no index: %s", thrown_exc)
|
||||
else:
|
||||
LOG.info(
|
||||
"Skipped subtitle at index %d: %s", subtitle.index, thrown_exc
|
||||
)
|
||||
skipped_subs += 1
|
||||
continue
|
||||
|
||||
subtitle.index = sub_num - skipped_subs
|
||||
|
||||
yield subtitle
|
||||
|
||||
|
||||
def _should_skip_sub(subtitle: Subtitle) -> None:
|
||||
"""
|
||||
Check if a subtitle should be skipped based on the rules in
|
||||
SUBTITLE_SKIP_CONDITIONS.
|
||||
|
||||
:param subtitle: A :py:class:`Subtitle` to check whether to skip
|
||||
:raises _ShouldSkipException: If the subtitle should be skipped
|
||||
"""
|
||||
for info_msg, sub_skipper in SUBTITLE_SKIP_CONDITIONS:
|
||||
if sub_skipper(subtitle):
|
||||
raise _ShouldSkipException(info_msg)
|
||||
|
||||
|
||||
def compose(
|
||||
subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
|
||||
reindex: bool = True,
|
||||
start_index: int = 1,
|
||||
eol: Union[str, None] = None,
|
||||
in_place: bool = False,
|
||||
) -> str:
|
||||
r"""
|
||||
Convert an iterator of :py:class:`Subtitle` objects to a string of joined
|
||||
SRT blocks.
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> from datetime import timedelta
|
||||
>>> start = timedelta(seconds=1)
|
||||
>>> end = timedelta(seconds=2)
|
||||
>>> subs = [
|
||||
... Subtitle(index=1, start=start, end=end, content='x'),
|
||||
... Subtitle(index=2, start=start, end=end, content='y'),
|
||||
... ]
|
||||
>>> compose(subs) # doctest: +ELLIPSIS
|
||||
'1\n00:00:01,000 --> 00:00:02,000\nx\n\n2\n00:00:01,000 --> ...'
|
||||
|
||||
:param subtitles: The subtitles to convert to SRT blocks
|
||||
:type subtitles: :term:`iterator` of :py:class:`Subtitle` objects
|
||||
:param bool reindex: Whether to reindex subtitles based on start time
|
||||
:param int start_index: If reindexing, the index to start reindexing from
|
||||
:param str eol: The end of line string to use (default "\\n")
|
||||
:returns: A single SRT formatted string, with each input
|
||||
:py:class:`Subtitle` represented as an SRT block
|
||||
:param bool in_place: Whether to reindex subs in-place for performance
|
||||
(version <=1.0.0 behaviour)
|
||||
:rtype: str
|
||||
"""
|
||||
if reindex:
|
||||
subtitles = sort_and_reindex(
|
||||
subtitles, start_index=start_index, in_place=in_place
|
||||
)
|
||||
|
||||
return "".join(subtitle.to_srt(eol=eol) for subtitle in subtitles)
|
||||
|
||||
|
||||
class _ShouldSkipException(Exception):
|
||||
"""
|
||||
Raised when a subtitle should be skipped.
|
||||
"""
|
||||
@@ -1,9 +1,9 @@
|
||||
"""SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events."""
|
||||
|
||||
from datetime import timedelta
|
||||
from typing import List
|
||||
|
||||
import srt # type: ignore
|
||||
|
||||
from .srt_composer import Subtitle, compose
|
||||
from .typing import TTSChunk
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ class SubMaker:
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.cues: List[srt.Subtitle] = [] # type: ignore
|
||||
self.cues: List[Subtitle] = []
|
||||
|
||||
def feed(self, msg: TTSChunk) -> None:
|
||||
"""
|
||||
@@ -29,10 +29,10 @@ class SubMaker:
|
||||
raise ValueError("Invalid message type, expected 'WordBoundary'")
|
||||
|
||||
self.cues.append(
|
||||
srt.Subtitle(
|
||||
Subtitle(
|
||||
index=len(self.cues) + 1,
|
||||
start=srt.timedelta(microseconds=msg["offset"] / 10),
|
||||
end=srt.timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10),
|
||||
start=timedelta(microseconds=msg["offset"] / 10),
|
||||
end=timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10),
|
||||
content=msg["text"],
|
||||
)
|
||||
)
|
||||
@@ -53,15 +53,15 @@ class SubMaker:
|
||||
if len(self.cues) == 0:
|
||||
return
|
||||
|
||||
new_cues: List[srt.Subtitle] = [] # type: ignore
|
||||
current_cue: srt.Subtitle = self.cues[0] # type: ignore
|
||||
new_cues: List[Subtitle] = []
|
||||
current_cue: Subtitle = self.cues[0]
|
||||
for cue in self.cues[1:]:
|
||||
if len(current_cue.content.split()) < words:
|
||||
current_cue = srt.Subtitle(
|
||||
current_cue = Subtitle(
|
||||
index=current_cue.index,
|
||||
start=current_cue.start,
|
||||
end=cue.end,
|
||||
content=current_cue.content + " " + cue.content,
|
||||
content=f"{current_cue.content} {cue.content}",
|
||||
)
|
||||
else:
|
||||
new_cues.append(current_cue)
|
||||
@@ -76,7 +76,7 @@ class SubMaker:
|
||||
Returns:
|
||||
str: The SRT formatted subtitles.
|
||||
"""
|
||||
return srt.compose(self.cues) # type: ignore
|
||||
return compose(self.cues)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.get_srt()
|
||||
|
||||
@@ -134,7 +134,7 @@ async def amain() -> None:
|
||||
if args.file in ("-", "/dev/stdin"):
|
||||
args.text = sys.stdin.read()
|
||||
else:
|
||||
with open(args.file, "r", encoding="utf-8") as file:
|
||||
with open(args.file, encoding="utf-8") as file:
|
||||
args.text = file.read()
|
||||
|
||||
if args.text is not None:
|
||||
|
||||
Reference in New Issue
Block a user