Update edge-tts to version 4.0.11
* Add pylint check for lint.sh
* Change overlapping default from 5second to 1second for SubMaker and util.py
* Default to WordBoundary for edge-playback (from SentenceBoundary)
* Drop SentenceBoundary support (never works properly and too many hacks)
- No longer actually supported by Azure's official SDK for a few years
already
* Stop attempting to correct broken offsets sent back to us by Azure
- Fixes never work properly because AI voice sometimes takes odd pauses
at the start and sometimes doesn't do so. It's never predictable and
cannot be fixed on the library's end.
- Solution is for Microsoft to fix the integer overflow bug they are facing
in the {Word,Sentence}Boundary offsets. It doesn't affect us until we reach
30min long TTS anyway.
* Have edge-tts --list-voices use the configured HTTP proxy
* More misc changes and fixes
This commit is contained in:
1
lint.sh
1
lint.sh
@@ -1,2 +1,3 @@
|
|||||||
find src examples -name '*.py' | xargs black
|
find src examples -name '*.py' | xargs black
|
||||||
find src examples -name '*.py' | xargs isort
|
find src examples -name '*.py' | xargs isort
|
||||||
|
find src examples -name '*.py' | xargs pylint
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
name = edge-tts
|
name = edge-tts
|
||||||
version = 4.0.10
|
version = 4.0.11
|
||||||
author = rany
|
author = rany
|
||||||
author_email = ranygh@riseup.net
|
author_email = ranygh@riseup.net
|
||||||
description = Microsoft Edge's TTS
|
description = Microsoft Edge's TTS
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ def main():
|
|||||||
with subprocess.Popen(
|
with subprocess.Popen(
|
||||||
[
|
[
|
||||||
"edge-tts",
|
"edge-tts",
|
||||||
"--boundary-type=2",
|
"--boundary-type=1",
|
||||||
f"--write-media={media.name}",
|
f"--write-media={media.name}",
|
||||||
f"--write-subtitles={subtitle.name}",
|
f"--write-subtitles={subtitle.name}",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -55,8 +55,8 @@ def remove_incompatible_characters(string):
|
|||||||
|
|
||||||
string = list(string)
|
string = list(string)
|
||||||
|
|
||||||
for idx in range(len(string)): # pylint: disable=consider-using-enumerate
|
for idx, char in enumerate(string):
|
||||||
code = ord(string[idx])
|
code = ord(char)
|
||||||
if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
|
if (0 <= code <= 8) or (11 <= code <= 12) or (14 <= code <= 31):
|
||||||
string[idx] = " "
|
string[idx] = " "
|
||||||
|
|
||||||
@@ -193,7 +193,7 @@ def ssml_headers_plus_data(request_id, timestamp, ssml):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class Communicate: # pylint: disable=too-few-public-methods
|
class Communicate:
|
||||||
"""
|
"""
|
||||||
Class for communicating with the service.
|
Class for communicating with the service.
|
||||||
"""
|
"""
|
||||||
@@ -215,7 +215,7 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
volume="+0%",
|
volume="+0%",
|
||||||
customspeak=False,
|
customspeak=False,
|
||||||
proxy=None,
|
proxy=None,
|
||||||
): # pylint: disable=too-many-arguments, too-many-locals
|
):
|
||||||
"""
|
"""
|
||||||
Runs the Communicate class.
|
Runs the Communicate class.
|
||||||
|
|
||||||
@@ -234,14 +234,14 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
word_boundary = False
|
word_boundary = False
|
||||||
sentence_boundary = False
|
|
||||||
|
|
||||||
if boundary_type > 0:
|
if boundary_type > 0:
|
||||||
word_boundary = True
|
word_boundary = True
|
||||||
if boundary_type > 1:
|
if boundary_type > 1:
|
||||||
sentence_boundary = True
|
raise ValueError(
|
||||||
|
"Invalid boundary type. SentenceBoundary is no longer supported."
|
||||||
|
)
|
||||||
|
|
||||||
sentence_boundary = str(sentence_boundary).lower()
|
|
||||||
word_boundary = str(word_boundary).lower()
|
word_boundary = str(word_boundary).lower()
|
||||||
|
|
||||||
if not customspeak:
|
if not customspeak:
|
||||||
@@ -262,12 +262,8 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
if isinstance(messages, str):
|
if isinstance(messages, str):
|
||||||
messages = [messages]
|
messages = [messages]
|
||||||
|
|
||||||
|
|
||||||
# Variables for the loop
|
# Variables for the loop
|
||||||
download = False
|
download = False
|
||||||
current_subtitle = ""
|
|
||||||
first_offset = None
|
|
||||||
last_offset = None
|
|
||||||
async with aiohttp.ClientSession(trust_env=True) as session:
|
async with aiohttp.ClientSession(trust_env=True) as session:
|
||||||
async with session.ws_connect(
|
async with session.ws_connect(
|
||||||
f"{WSS_URL}&ConnectionId={connect_id()}",
|
f"{WSS_URL}&ConnectionId={connect_id()}",
|
||||||
@@ -304,7 +300,7 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
"Content-Type:application/json; charset=utf-8\r\n"
|
"Content-Type:application/json; charset=utf-8\r\n"
|
||||||
"Path:speech.config\r\n\r\n"
|
"Path:speech.config\r\n\r\n"
|
||||||
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
|
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
|
||||||
f'"sentenceBoundaryEnabled":{sentence_boundary},'
|
f'"sentenceBoundaryEnabled":false,'
|
||||||
f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
|
f'"wordBoundaryEnabled":{word_boundary}}},"outputFormat":"{codec}"'
|
||||||
"}}}}\r\n"
|
"}}}}\r\n"
|
||||||
)
|
)
|
||||||
@@ -326,12 +322,6 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
|
|
||||||
# Begin listening for the response.
|
# Begin listening for the response.
|
||||||
async for received in websocket:
|
async for received in websocket:
|
||||||
if received.type in (
|
|
||||||
aiohttp.WSMsgType.CLOSED,
|
|
||||||
aiohttp.WSMsgType.ERROR,
|
|
||||||
):
|
|
||||||
break
|
|
||||||
|
|
||||||
if received.type == aiohttp.WSMsgType.TEXT:
|
if received.type == aiohttp.WSMsgType.TEXT:
|
||||||
parameters, data = get_headers_and_data(received.data)
|
parameters, data = get_headers_and_data(received.data)
|
||||||
if (
|
if (
|
||||||
@@ -358,12 +348,15 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
metadata_duration = metadata["Metadata"][0]["Data"][
|
metadata_duration = metadata["Metadata"][0]["Data"][
|
||||||
"Duration"
|
"Duration"
|
||||||
]
|
]
|
||||||
except KeyError:
|
except KeyError as exception:
|
||||||
metadata_duration = 0
|
raise ValueError(
|
||||||
|
"The metadata doesn't contain a Duration field. "
|
||||||
|
+ "This usually happens when SentenceBoundary metadata type is sent."
|
||||||
|
) from exception
|
||||||
metadata_text = metadata["Metadata"][0]["Data"]["text"][
|
metadata_text = metadata["Metadata"][0]["Data"]["text"][
|
||||||
"Text"
|
"Text"
|
||||||
]
|
]
|
||||||
if boundary_type == 1:
|
if metadata_type == "WordBoundary":
|
||||||
yield (
|
yield (
|
||||||
[
|
[
|
||||||
metadata_offset,
|
metadata_offset,
|
||||||
@@ -372,31 +365,32 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
metadata_text,
|
metadata_text,
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
elif metadata_type == "SentenceBoundary":
|
||||||
|
raise NotImplementedError(
|
||||||
|
"SentenceBoundary is not supported due to being broken."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if metadata_type == "WordBoundary":
|
raise NotImplementedError(
|
||||||
if current_subtitle:
|
f"Unknown metadata type: {metadata_type}"
|
||||||
current_subtitle += " "
|
)
|
||||||
current_subtitle += metadata_text
|
elif (
|
||||||
if first_offset is None:
|
"Path" in parameters
|
||||||
first_offset = metadata_offset
|
and parameters["Path"] == "response"
|
||||||
last_offset = [
|
):
|
||||||
metadata_offset,
|
# TODO: implement this:
|
||||||
metadata_duration,
|
"""
|
||||||
]
|
X-RequestId:xxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
elif metadata_type == "SentenceBoundary":
|
Content-Type:application/json; charset=utf-8
|
||||||
if current_subtitle:
|
Path:response
|
||||||
yield (
|
|
||||||
[
|
|
||||||
first_offset,
|
|
||||||
sum(last_offset) - first_offset,
|
|
||||||
],
|
|
||||||
current_subtitle,
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
current_subtitle = ""
|
|
||||||
first_offset = None
|
|
||||||
last_offset = None
|
|
||||||
|
|
||||||
|
{"context":{"serviceTag":"yyyyyyyyyyyyyyyyyyy"},"audio":{"type":"inline","streamId":"zzzzzzzzzzzzzzzzz"}}
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"The response from the service is not recognized.\n"
|
||||||
|
+ received.data
|
||||||
|
)
|
||||||
elif received.type == aiohttp.WSMsgType.BINARY:
|
elif received.type == aiohttp.WSMsgType.BINARY:
|
||||||
if download:
|
if download:
|
||||||
yield (
|
yield (
|
||||||
@@ -406,10 +400,8 @@ class Communicate: # pylint: disable=too-few-public-methods
|
|||||||
received.data.split(b"Path:audio\r\n")[1:]
|
received.data.split(b"Path:audio\r\n")[1:]
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
if current_subtitle:
|
else:
|
||||||
yield (
|
raise ValueError(
|
||||||
[first_offset, sum(last_offset) - first_offset],
|
"The service sent a binary message, but we are not expecting one."
|
||||||
current_subtitle,
|
)
|
||||||
None,
|
|
||||||
)
|
|
||||||
await websocket.close()
|
await websocket.close()
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import aiohttp
|
|||||||
from .constants import VOICE_LIST
|
from .constants import VOICE_LIST
|
||||||
|
|
||||||
|
|
||||||
async def list_voices():
|
async def list_voices(proxy=None):
|
||||||
"""
|
"""
|
||||||
List all available voices and their attributes.
|
List all available voices and their attributes.
|
||||||
|
|
||||||
@@ -36,6 +36,7 @@ async def list_voices():
|
|||||||
"Accept-Encoding": "gzip, deflate, br",
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
},
|
},
|
||||||
|
proxy=proxy,
|
||||||
) as url:
|
) as url:
|
||||||
data = json.loads(await url.text())
|
data = json.loads(await url.text())
|
||||||
return data
|
return data
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ class SubMaker:
|
|||||||
SubMaker class
|
SubMaker class
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, overlapping=5):
|
def __init__(self, overlapping=1):
|
||||||
"""
|
"""
|
||||||
SubMaker constructor.
|
SubMaker constructor.
|
||||||
|
|
||||||
@@ -48,7 +48,6 @@ class SubMaker:
|
|||||||
subtitles should overlap.
|
subtitles should overlap.
|
||||||
"""
|
"""
|
||||||
self.subs_and_offset = []
|
self.subs_and_offset = []
|
||||||
self.broken_offset = 0
|
|
||||||
self.overlapping = overlapping * (10**7)
|
self.overlapping = overlapping * (10**7)
|
||||||
|
|
||||||
def create_sub(self, timestamp, text):
|
def create_sub(self, timestamp, text):
|
||||||
@@ -64,13 +63,6 @@ class SubMaker:
|
|||||||
None
|
None
|
||||||
"""
|
"""
|
||||||
timestamp[1] += timestamp[0]
|
timestamp[1] += timestamp[0]
|
||||||
|
|
||||||
if len(self.subs_and_offset) >= 2:
|
|
||||||
if self.subs_and_offset[-2][1] >= timestamp[0] + self.broken_offset:
|
|
||||||
self.broken_offset = self.subs_and_offset[-2][1]
|
|
||||||
timestamp[0] += self.broken_offset
|
|
||||||
timestamp[1] += self.broken_offset
|
|
||||||
|
|
||||||
self.subs_and_offset.append(timestamp)
|
self.subs_and_offset.append(timestamp)
|
||||||
self.subs_and_offset.append(text)
|
self.subs_and_offset.append(text)
|
||||||
|
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ import sys
|
|||||||
from edge_tts import Communicate, SubMaker, list_voices
|
from edge_tts import Communicate, SubMaker, list_voices
|
||||||
|
|
||||||
|
|
||||||
async def _list_voices():
|
async def _list_voices(proxy):
|
||||||
"""
|
"""
|
||||||
List available voices.
|
List available voices.
|
||||||
"""
|
"""
|
||||||
for idx, voice in enumerate(await list_voices()):
|
for idx, voice in enumerate(await list_voices(proxy=proxy)):
|
||||||
if idx != 0:
|
if idx != 0:
|
||||||
print()
|
print()
|
||||||
|
|
||||||
@@ -112,13 +112,13 @@ async def _main():
|
|||||||
"-O",
|
"-O",
|
||||||
"--overlapping",
|
"--overlapping",
|
||||||
help="overlapping subtitles in seconds",
|
help="overlapping subtitles in seconds",
|
||||||
default=5,
|
default=1,
|
||||||
type=float,
|
type=float,
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-b",
|
"-b",
|
||||||
"--boundary-type",
|
"--boundary-type",
|
||||||
help="set boundary type for subtitles. Default 0 for none. Set 1 for word_boundary, 2 for sentence_boundary",
|
help="set boundary type for subtitles. Default 0 for none. Set 1 for word_boundary.",
|
||||||
default=0,
|
default=0,
|
||||||
type=int,
|
type=int,
|
||||||
)
|
)
|
||||||
@@ -136,7 +136,7 @@ async def _main():
|
|||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.list_voices:
|
if args.list_voices:
|
||||||
await _list_voices()
|
await _list_voices(args.proxy)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
if args.text is not None or args.file is not None:
|
if args.text is not None or args.file is not None:
|
||||||
|
|||||||
Reference in New Issue
Block a user