Bundle srt module with edge-tts (#393)
Fixes https://github.com/rany2/edge-tts/issues/383 Signed-off-by: rany <rany2@riseup.net>
This commit is contained in:
30
LICENSE
30
LICENSE
@@ -1,3 +1,33 @@
|
|||||||
|
The MIT license is used for 'src/edge_tts/srt_composer.py' only. All
|
||||||
|
remaining files are licensed under the LGPLv3.
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------
|
||||||
|
|
||||||
|
The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2014-2023 Christopher Down
|
||||||
|
Copyright (c) 2025- rany <rany@riseup.net>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------
|
||||||
|
|
||||||
GNU LESSER GENERAL PUBLIC LICENSE
|
GNU LESSER GENERAL PUBLIC LICENSE
|
||||||
Version 3, 29 June 2007
|
Version 3, 29 June 2007
|
||||||
|
|
||||||
|
|||||||
@@ -35,4 +35,4 @@ dev =
|
|||||||
isort
|
isort
|
||||||
mypy
|
mypy
|
||||||
pylint
|
pylint
|
||||||
types-tabulate
|
types-tabulate
|
||||||
|
|||||||
1
setup.py
1
setup.py
@@ -6,7 +6,6 @@ setup(
|
|||||||
install_requires=[
|
install_requires=[
|
||||||
"aiohttp>=3.8.0,<4.0.0",
|
"aiohttp>=3.8.0,<4.0.0",
|
||||||
"certifi>=2023.11.17",
|
"certifi>=2023.11.17",
|
||||||
"srt>=3.4.1,<4.0.0",
|
|
||||||
"tabulate>=0.4.4,<1.0.0",
|
"tabulate>=0.4.4,<1.0.0",
|
||||||
"typing-extensions>=4.1.0,<5.0.0",
|
"typing-extensions>=4.1.0,<5.0.0",
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ class TTSConfig:
|
|||||||
region = match.group(2)
|
region = match.group(2)
|
||||||
name = match.group(3)
|
name = match.group(3)
|
||||||
if name.find("-") != -1:
|
if name.find("-") != -1:
|
||||||
region = region + "-" + name[: name.find("-")]
|
region = f"{region}-{name[:name.find('-')]}"
|
||||||
name = name[name.find("-") + 1 :]
|
name = name[name.find("-") + 1 :]
|
||||||
self.voice = (
|
self.voice = (
|
||||||
"Microsoft Server Speech Text to Speech Voice"
|
"Microsoft Server Speech Text to Speech Voice"
|
||||||
|
|||||||
294
src/edge_tts/srt_composer.py
Normal file
294
src/edge_tts/srt_composer.py
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
"""A tiny library for composing SRT files.
|
||||||
|
|
||||||
|
Based on https://github.com/cdown/srt with parsing, subtitle modifying,
|
||||||
|
functionality and Python 2 support removed. This is because of
|
||||||
|
https://github.com/rany2/edge-tts/issues/383.
|
||||||
|
|
||||||
|
Typing support was added, and more Python 3 features were used.
|
||||||
|
|
||||||
|
Copyright (c) 2014-2023 Christopher Down
|
||||||
|
Copyright (c) 2025- rany <rany@riseup.net>
|
||||||
|
|
||||||
|
This file is licensed under the MIT License (MIT).
|
||||||
|
See the LICENSE-MIT file for details.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import functools
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Generator, List, Union
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MULTI_WS_REGEX = re.compile(r"\n\n+")
|
||||||
|
|
||||||
|
ZERO_TIMEDELTA = timedelta(0)
|
||||||
|
|
||||||
|
# Info message if truthy return -> Function taking a Subtitle, skip if True
|
||||||
|
SUBTITLE_SKIP_CONDITIONS = (
|
||||||
|
("No content", lambda sub: not sub.content.strip()),
|
||||||
|
("Start time < 0 seconds", lambda sub: sub.start < ZERO_TIMEDELTA),
|
||||||
|
("Subtitle start time >= end time", lambda sub: sub.start >= sub.end),
|
||||||
|
)
|
||||||
|
|
||||||
|
SECONDS_IN_HOUR = 3600
|
||||||
|
SECONDS_IN_MINUTE = 60
|
||||||
|
HOURS_IN_DAY = 24
|
||||||
|
MICROSECONDS_IN_MILLISECOND = 1000
|
||||||
|
|
||||||
|
|
||||||
|
@functools.total_ordering
|
||||||
|
class Subtitle:
|
||||||
|
r"""
|
||||||
|
The metadata relating to a single subtitle. Subtitles are sorted by start
|
||||||
|
time by default. If no index was provided, index 0 will be used on writing
|
||||||
|
an SRT block.
|
||||||
|
|
||||||
|
:param index: The SRT index for this subtitle
|
||||||
|
:type index: int or None
|
||||||
|
:param start: The time that the subtitle should start being shown
|
||||||
|
:type start: :py:class:`datetime.timedelta`
|
||||||
|
:param end: The time that the subtitle should stop being shown
|
||||||
|
:type end: :py:class:`datetime.timedelta`
|
||||||
|
:param str content: The subtitle content. Should not contain OS-specific
|
||||||
|
line separators, only \\n. This is taken care of
|
||||||
|
already if you use :py:func:`srt.parse` to generate
|
||||||
|
Subtitle objects.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# pylint: disable=R0913
|
||||||
|
def __init__(
|
||||||
|
self, index: Union[int, None], start: timedelta, end: timedelta, content: str
|
||||||
|
) -> None:
|
||||||
|
self.index = index
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
self.content = content
|
||||||
|
|
||||||
|
def __hash__(self) -> int:
|
||||||
|
return hash(frozenset(vars(self).items()))
|
||||||
|
|
||||||
|
def __eq__(self, other: object) -> bool:
|
||||||
|
if not isinstance(other, Subtitle):
|
||||||
|
return NotImplemented
|
||||||
|
|
||||||
|
return vars(self) == vars(other)
|
||||||
|
|
||||||
|
def __lt__(self, other: object) -> bool:
|
||||||
|
if not isinstance(other, Subtitle):
|
||||||
|
return NotImplemented
|
||||||
|
|
||||||
|
return (self.start, self.end, self.index) < (
|
||||||
|
other.start,
|
||||||
|
other.end,
|
||||||
|
other.index,
|
||||||
|
)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
# Python 2/3 cross compatibility
|
||||||
|
var_items = getattr(vars(self), "iteritems", getattr(vars(self), "items"))
|
||||||
|
item_list = ", ".join(f"{k}={v!r}" for k, v in var_items())
|
||||||
|
return f"{type(self).__name__}({item_list})"
|
||||||
|
|
||||||
|
def to_srt(self, eol: Union[str, None] = None) -> str:
|
||||||
|
r"""
|
||||||
|
Convert the current :py:class:`Subtitle` to an SRT block.
|
||||||
|
|
||||||
|
:param str eol: The end of line string to use (default "\\n")
|
||||||
|
:returns: The metadata of the current :py:class:`Subtitle` object as an
|
||||||
|
SRT formatted subtitle block
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
output_content = make_legal_content(self.content)
|
||||||
|
|
||||||
|
if eol is None:
|
||||||
|
eol = "\n"
|
||||||
|
elif eol != "\n":
|
||||||
|
output_content = output_content.replace("\n", eol)
|
||||||
|
|
||||||
|
template = "{idx}{eol}{start} --> {end}{eol}{content}{eol}{eol}"
|
||||||
|
return template.format(
|
||||||
|
idx=self.index or 0,
|
||||||
|
start=timedelta_to_srt_timestamp(self.start),
|
||||||
|
end=timedelta_to_srt_timestamp(self.end),
|
||||||
|
content=output_content,
|
||||||
|
eol=eol,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def make_legal_content(content: str) -> str:
|
||||||
|
r"""
|
||||||
|
Remove illegal content from a content block. Illegal content includes:
|
||||||
|
|
||||||
|
* Blank lines
|
||||||
|
* Starting or ending with a blank line
|
||||||
|
|
||||||
|
.. doctest::
|
||||||
|
|
||||||
|
>>> make_legal_content('\nfoo\n\nbar\n')
|
||||||
|
'foo\nbar'
|
||||||
|
|
||||||
|
:param str content: The content to make legal
|
||||||
|
:returns: The legalised content
|
||||||
|
:rtype: srt
|
||||||
|
"""
|
||||||
|
# Optimisation: Usually the content we get is legally valid. Do a quick
|
||||||
|
# check to see if we really need to do anything here. This saves time from
|
||||||
|
# generating legal_content by about 50%.
|
||||||
|
if content and content[0] != "\n" and "\n\n" not in content:
|
||||||
|
return content
|
||||||
|
|
||||||
|
legal_content = MULTI_WS_REGEX.sub("\n", content.strip("\n"))
|
||||||
|
LOG.info("Legalised content %r to %r", content, legal_content)
|
||||||
|
return legal_content
|
||||||
|
|
||||||
|
|
||||||
|
def timedelta_to_srt_timestamp(timedelta_timestamp: timedelta) -> str:
|
||||||
|
r"""
|
||||||
|
Convert a :py:class:`~datetime.timedelta` to an SRT timestamp.
|
||||||
|
|
||||||
|
.. doctest::
|
||||||
|
|
||||||
|
>>> import datetime
|
||||||
|
>>> delta = datetime.timedelta(hours=1, minutes=23, seconds=4)
|
||||||
|
>>> timedelta_to_srt_timestamp(delta)
|
||||||
|
'01:23:04,000'
|
||||||
|
|
||||||
|
:param datetime.timedelta timedelta_timestamp: A datetime to convert to an
|
||||||
|
SRT timestamp
|
||||||
|
:returns: The timestamp in SRT format
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
|
||||||
|
hrs, secs_remainder = divmod(timedelta_timestamp.seconds, SECONDS_IN_HOUR)
|
||||||
|
hrs += timedelta_timestamp.days * HOURS_IN_DAY
|
||||||
|
mins, secs = divmod(secs_remainder, SECONDS_IN_MINUTE)
|
||||||
|
msecs = timedelta_timestamp.microseconds // MICROSECONDS_IN_MILLISECOND
|
||||||
|
return f"{int(hrs):02}:{int(mins):02}:{int(secs):02},{int(msecs):03}"
|
||||||
|
|
||||||
|
|
||||||
|
def sort_and_reindex(
|
||||||
|
subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
|
||||||
|
start_index: int = 1,
|
||||||
|
in_place: bool = False,
|
||||||
|
skip: bool = True,
|
||||||
|
) -> Generator[Subtitle, None, None]:
|
||||||
|
"""
|
||||||
|
Reorder subtitles to be sorted by start time order, and rewrite the indexes
|
||||||
|
to be in that same order. This ensures that the SRT file will play in an
|
||||||
|
expected fashion after, for example, times were changed in some subtitles
|
||||||
|
and they may need to be resorted.
|
||||||
|
|
||||||
|
If skip=True, subtitles will also be skipped if they are considered not to
|
||||||
|
be useful. Currently, the conditions to be considered "not useful" are as
|
||||||
|
follows:
|
||||||
|
|
||||||
|
- Content is empty, or only whitespace
|
||||||
|
- The start time is negative
|
||||||
|
- The start time is equal to or later than the end time
|
||||||
|
|
||||||
|
.. doctest::
|
||||||
|
|
||||||
|
>>> from datetime import timedelta
|
||||||
|
>>> one = timedelta(seconds=1)
|
||||||
|
>>> two = timedelta(seconds=2)
|
||||||
|
>>> three = timedelta(seconds=3)
|
||||||
|
>>> subs = [
|
||||||
|
... Subtitle(index=999, start=one, end=two, content='1'),
|
||||||
|
... Subtitle(index=0, start=two, end=three, content='2'),
|
||||||
|
... ]
|
||||||
|
>>> list(sort_and_reindex(subs)) # doctest: +ELLIPSIS
|
||||||
|
[Subtitle(...index=1...), Subtitle(...index=2...)]
|
||||||
|
|
||||||
|
:param subtitles: :py:class:`Subtitle` objects in any order
|
||||||
|
:param int start_index: The index to start from
|
||||||
|
:param bool in_place: Whether to modify subs in-place for performance
|
||||||
|
(version <=1.0.0 behaviour)
|
||||||
|
:param bool skip: Whether to skip subtitles considered not useful (see
|
||||||
|
above for rules)
|
||||||
|
:returns: The sorted subtitles
|
||||||
|
:rtype: :term:`generator` of :py:class:`Subtitle` objects
|
||||||
|
"""
|
||||||
|
skipped_subs = 0
|
||||||
|
for sub_num, subtitle in enumerate(sorted(subtitles), start=start_index):
|
||||||
|
if not in_place:
|
||||||
|
subtitle = Subtitle(**vars(subtitle))
|
||||||
|
|
||||||
|
if skip:
|
||||||
|
try:
|
||||||
|
_should_skip_sub(subtitle)
|
||||||
|
except _ShouldSkipException as thrown_exc:
|
||||||
|
if subtitle.index is None:
|
||||||
|
LOG.info("Skipped subtitle with no index: %s", thrown_exc)
|
||||||
|
else:
|
||||||
|
LOG.info(
|
||||||
|
"Skipped subtitle at index %d: %s", subtitle.index, thrown_exc
|
||||||
|
)
|
||||||
|
skipped_subs += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
subtitle.index = sub_num - skipped_subs
|
||||||
|
|
||||||
|
yield subtitle
|
||||||
|
|
||||||
|
|
||||||
|
def _should_skip_sub(subtitle: Subtitle) -> None:
|
||||||
|
"""
|
||||||
|
Check if a subtitle should be skipped based on the rules in
|
||||||
|
SUBTITLE_SKIP_CONDITIONS.
|
||||||
|
|
||||||
|
:param subtitle: A :py:class:`Subtitle` to check whether to skip
|
||||||
|
:raises _ShouldSkipException: If the subtitle should be skipped
|
||||||
|
"""
|
||||||
|
for info_msg, sub_skipper in SUBTITLE_SKIP_CONDITIONS:
|
||||||
|
if sub_skipper(subtitle):
|
||||||
|
raise _ShouldSkipException(info_msg)
|
||||||
|
|
||||||
|
|
||||||
|
def compose(
|
||||||
|
subtitles: Union[Generator[Subtitle, None, None], List[Subtitle]],
|
||||||
|
reindex: bool = True,
|
||||||
|
start_index: int = 1,
|
||||||
|
eol: Union[str, None] = None,
|
||||||
|
in_place: bool = False,
|
||||||
|
) -> str:
|
||||||
|
r"""
|
||||||
|
Convert an iterator of :py:class:`Subtitle` objects to a string of joined
|
||||||
|
SRT blocks.
|
||||||
|
|
||||||
|
.. doctest::
|
||||||
|
|
||||||
|
>>> from datetime import timedelta
|
||||||
|
>>> start = timedelta(seconds=1)
|
||||||
|
>>> end = timedelta(seconds=2)
|
||||||
|
>>> subs = [
|
||||||
|
... Subtitle(index=1, start=start, end=end, content='x'),
|
||||||
|
... Subtitle(index=2, start=start, end=end, content='y'),
|
||||||
|
... ]
|
||||||
|
>>> compose(subs) # doctest: +ELLIPSIS
|
||||||
|
'1\n00:00:01,000 --> 00:00:02,000\nx\n\n2\n00:00:01,000 --> ...'
|
||||||
|
|
||||||
|
:param subtitles: The subtitles to convert to SRT blocks
|
||||||
|
:type subtitles: :term:`iterator` of :py:class:`Subtitle` objects
|
||||||
|
:param bool reindex: Whether to reindex subtitles based on start time
|
||||||
|
:param int start_index: If reindexing, the index to start reindexing from
|
||||||
|
:param str eol: The end of line string to use (default "\\n")
|
||||||
|
:returns: A single SRT formatted string, with each input
|
||||||
|
:py:class:`Subtitle` represented as an SRT block
|
||||||
|
:param bool in_place: Whether to reindex subs in-place for performance
|
||||||
|
(version <=1.0.0 behaviour)
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
if reindex:
|
||||||
|
subtitles = sort_and_reindex(
|
||||||
|
subtitles, start_index=start_index, in_place=in_place
|
||||||
|
)
|
||||||
|
|
||||||
|
return "".join(subtitle.to_srt(eol=eol) for subtitle in subtitles)
|
||||||
|
|
||||||
|
|
||||||
|
class _ShouldSkipException(Exception):
|
||||||
|
"""
|
||||||
|
Raised when a subtitle should be skipped.
|
||||||
|
"""
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
"""SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events."""
|
"""SubMaker module is used to generate subtitles from WordBoundary and SentenceBoundary events."""
|
||||||
|
|
||||||
|
from datetime import timedelta
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import srt # type: ignore
|
from .srt_composer import Subtitle, compose
|
||||||
|
|
||||||
from .typing import TTSChunk
|
from .typing import TTSChunk
|
||||||
|
|
||||||
|
|
||||||
@@ -13,7 +13,7 @@ class SubMaker:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.cues: List[srt.Subtitle] = [] # type: ignore
|
self.cues: List[Subtitle] = []
|
||||||
|
|
||||||
def feed(self, msg: TTSChunk) -> None:
|
def feed(self, msg: TTSChunk) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -29,10 +29,10 @@ class SubMaker:
|
|||||||
raise ValueError("Invalid message type, expected 'WordBoundary'")
|
raise ValueError("Invalid message type, expected 'WordBoundary'")
|
||||||
|
|
||||||
self.cues.append(
|
self.cues.append(
|
||||||
srt.Subtitle(
|
Subtitle(
|
||||||
index=len(self.cues) + 1,
|
index=len(self.cues) + 1,
|
||||||
start=srt.timedelta(microseconds=msg["offset"] / 10),
|
start=timedelta(microseconds=msg["offset"] / 10),
|
||||||
end=srt.timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10),
|
end=timedelta(microseconds=(msg["offset"] + msg["duration"]) / 10),
|
||||||
content=msg["text"],
|
content=msg["text"],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -53,15 +53,15 @@ class SubMaker:
|
|||||||
if len(self.cues) == 0:
|
if len(self.cues) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
new_cues: List[srt.Subtitle] = [] # type: ignore
|
new_cues: List[Subtitle] = []
|
||||||
current_cue: srt.Subtitle = self.cues[0] # type: ignore
|
current_cue: Subtitle = self.cues[0]
|
||||||
for cue in self.cues[1:]:
|
for cue in self.cues[1:]:
|
||||||
if len(current_cue.content.split()) < words:
|
if len(current_cue.content.split()) < words:
|
||||||
current_cue = srt.Subtitle(
|
current_cue = Subtitle(
|
||||||
index=current_cue.index,
|
index=current_cue.index,
|
||||||
start=current_cue.start,
|
start=current_cue.start,
|
||||||
end=cue.end,
|
end=cue.end,
|
||||||
content=current_cue.content + " " + cue.content,
|
content=f"{current_cue.content} {cue.content}",
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
new_cues.append(current_cue)
|
new_cues.append(current_cue)
|
||||||
@@ -76,7 +76,7 @@ class SubMaker:
|
|||||||
Returns:
|
Returns:
|
||||||
str: The SRT formatted subtitles.
|
str: The SRT formatted subtitles.
|
||||||
"""
|
"""
|
||||||
return srt.compose(self.cues) # type: ignore
|
return compose(self.cues)
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return self.get_srt()
|
return self.get_srt()
|
||||||
|
|||||||
@@ -134,7 +134,7 @@ async def amain() -> None:
|
|||||||
if args.file in ("-", "/dev/stdin"):
|
if args.file in ("-", "/dev/stdin"):
|
||||||
args.text = sys.stdin.read()
|
args.text = sys.stdin.read()
|
||||||
else:
|
else:
|
||||||
with open(args.file, "r", encoding="utf-8") as file:
|
with open(args.file, encoding="utf-8") as file:
|
||||||
args.text = file.read()
|
args.text = file.read()
|
||||||
|
|
||||||
if args.text is not None:
|
if args.text is not None:
|
||||||
|
|||||||
Reference in New Issue
Block a user