drop custom SSML support
This commit is contained in:
28
README.md
28
README.md
@@ -61,35 +61,11 @@ You must first check the available voices with the `--list-voices` option:
|
|||||||
|
|
||||||
### Custom SSML
|
### Custom SSML
|
||||||
|
|
||||||
It is possible to send Microsoft's text-to-speech servers a custom SSML document which would allow greater customization of the speech.
|
Support for custom SSML has been removed since 5.0.0 because Microsoft has taken the initiative to prevent it from working. You cannot use custom SSML anymore.
|
||||||
|
|
||||||
Information about the SSML format can be found here on Microsoft's own website: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup
|
|
||||||
|
|
||||||
As a short example, if you want to apply the following SSML document and play it back using `edge-tts`.
|
|
||||||
|
|
||||||
```
|
|
||||||
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
|
|
||||||
xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">
|
|
||||||
<voice name="en-US-AriaNeural">
|
|
||||||
<mstts:express-as style="cheerful">
|
|
||||||
That'd be just amazing!
|
|
||||||
</mstts:express-as>
|
|
||||||
</voice>
|
|
||||||
</speak>
|
|
||||||
```
|
|
||||||
|
|
||||||
It would be easiest to do the following:
|
|
||||||
|
|
||||||
1. Create a file called `custom_ssml.xml` with the above content.
|
|
||||||
2. Run the following command:
|
|
||||||
|
|
||||||
$ edge-tts --custom-ssml --file custom_ssml.xml --write-media amazing.mp3
|
|
||||||
|
|
||||||
3. Voila!
|
|
||||||
|
|
||||||
### Changing pitch, rate, volume, etc.
|
### Changing pitch, rate, volume, etc.
|
||||||
|
|
||||||
It is possible to make minor changes to the generated speech without resorting to custom SSML. However, you must note that you couldn't use the `--custom-ssml` option with the `--pitch`, `--rate`, `--volume`, etc. options.
|
It is possible to make minor changes to the generated speech.
|
||||||
|
|
||||||
$ edge-tts --pitch=-10Hz --text "Hello, world!" --write-media hello_with_pitch_down.mp3
|
$ edge-tts --pitch=-10Hz --text "Hello, world!" --write-media hello_with_pitch_down.mp3
|
||||||
$ edge-tts --rate=0.5 --text "Hello, world!" --write-media hello_with_rate_halved.mp3
|
$ edge-tts --rate=0.5 --text "Hello, world!" --write-media hello_with_rate_halved.mp3
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
name = edge-tts
|
name = edge-tts
|
||||||
version = 4.0.11
|
version = 5.0.0
|
||||||
author = rany
|
author = rany
|
||||||
author_email = ranygh@riseup.net
|
author_email = ranygh@riseup.net
|
||||||
description = Microsoft Edge's TTS
|
description = Microsoft Edge's TTS
|
||||||
|
|||||||
@@ -213,7 +213,6 @@ class Communicate:
|
|||||||
pitch="+0Hz",
|
pitch="+0Hz",
|
||||||
rate="+0%",
|
rate="+0%",
|
||||||
volume="+0%",
|
volume="+0%",
|
||||||
customspeak=False,
|
|
||||||
proxy=None,
|
proxy=None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@@ -223,11 +222,10 @@ class Communicate:
|
|||||||
messages (str or list): A list of SSML strings or a single text.
|
messages (str or list): A list of SSML strings or a single text.
|
||||||
boundery_type (int): The type of boundary to use. 0 for none, 1 for word_boundary, 2 for sentence_boundary.
|
boundery_type (int): The type of boundary to use. 0 for none, 1 for word_boundary, 2 for sentence_boundary.
|
||||||
codec (str): The codec to use.
|
codec (str): The codec to use.
|
||||||
voice (str): The voice to use (only applicable to non-customspeak).
|
voice (str): The voice to use.
|
||||||
pitch (str): The pitch to use (only applicable to non-customspeak).
|
pitch (str): The pitch to use.
|
||||||
rate (str): The rate to use (only applicable to non-customspeak).
|
rate (str): The rate to use.
|
||||||
volume (str): The volume to use (only applicable to non-customspeak).
|
volume (str): The volume to use.
|
||||||
customspeak (bool): Whether to create the SSML or treat the messages as SSML.
|
|
||||||
|
|
||||||
Yields:
|
Yields:
|
||||||
tuple: The subtitle offset, subtitle, and audio data.
|
tuple: The subtitle offset, subtitle, and audio data.
|
||||||
@@ -244,23 +242,19 @@ class Communicate:
|
|||||||
|
|
||||||
word_boundary = str(word_boundary).lower()
|
word_boundary = str(word_boundary).lower()
|
||||||
|
|
||||||
if not customspeak:
|
websocket_max_size = 2**16
|
||||||
websocket_max_size = 2**16
|
overhead_per_message = (
|
||||||
overhead_per_message = (
|
len(
|
||||||
len(
|
ssml_headers_plus_data(
|
||||||
ssml_headers_plus_data(
|
connect_id(), self.date, mkssml("", voice, pitch, rate, volume)
|
||||||
connect_id(), self.date, mkssml("", voice, pitch, rate, volume)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
+ 50
|
|
||||||
) # margin of error
|
|
||||||
messages = split_text_by_byte_length(
|
|
||||||
escape(remove_incompatible_characters(messages)),
|
|
||||||
websocket_max_size - overhead_per_message,
|
|
||||||
)
|
)
|
||||||
else:
|
+ 50
|
||||||
if isinstance(messages, str):
|
) # margin of error
|
||||||
messages = [messages]
|
messages = split_text_by_byte_length(
|
||||||
|
escape(remove_incompatible_characters(messages)),
|
||||||
|
websocket_max_size - overhead_per_message,
|
||||||
|
)
|
||||||
|
|
||||||
# Variables for the loop
|
# Variables for the loop
|
||||||
download = False
|
download = False
|
||||||
@@ -307,18 +301,13 @@ class Communicate:
|
|||||||
# Send the request to the service.
|
# Send the request to the service.
|
||||||
await websocket.send_str(request)
|
await websocket.send_str(request)
|
||||||
# Send the message itself.
|
# Send the message itself.
|
||||||
if not customspeak:
|
await websocket.send_str(
|
||||||
await websocket.send_str(
|
ssml_headers_plus_data(
|
||||||
ssml_headers_plus_data(
|
connect_id(),
|
||||||
connect_id(),
|
self.date,
|
||||||
self.date,
|
mkssml(message, voice, pitch, rate, volume),
|
||||||
mkssml(message, voice, pitch, rate, volume),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await websocket.send_str(
|
|
||||||
ssml_headers_plus_data(connect_id(), self.date, message)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Begin listening for the response.
|
# Begin listening for the response.
|
||||||
async for received in websocket:
|
async for received in websocket:
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ async def _tts(args):
|
|||||||
args.pitch,
|
args.pitch,
|
||||||
args.rate,
|
args.rate,
|
||||||
args.volume,
|
args.volume,
|
||||||
customspeak=args.custom_ssml,
|
|
||||||
proxy=args.proxy,
|
proxy=args.proxy,
|
||||||
):
|
):
|
||||||
if i[2] is not None:
|
if i[2] is not None:
|
||||||
@@ -62,12 +61,6 @@ async def _main():
|
|||||||
group = parser.add_mutually_exclusive_group(required=True)
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
group.add_argument("-t", "--text", help="what TTS will say")
|
group.add_argument("-t", "--text", help="what TTS will say")
|
||||||
group.add_argument("-f", "--file", help="same as --text but read from file")
|
group.add_argument("-f", "--file", help="same as --text but read from file")
|
||||||
parser.add_argument(
|
|
||||||
"-z",
|
|
||||||
"--custom-ssml",
|
|
||||||
help="treat text as ssml to send. For more info check https://bit.ly/3fIq13S",
|
|
||||||
action="store_true",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-v",
|
"-v",
|
||||||
"--voice",
|
"--voice",
|
||||||
|
|||||||
Reference in New Issue
Block a user