Some bug fixes and improvements
This commit is contained in:
@@ -10,7 +10,6 @@ ttsmpeg=$(mktemp)
|
|||||||
|
|
||||||
## Cleanup function to kill all processes and remove tmp file
|
## Cleanup function to kill all processes and remove tmp file
|
||||||
quitfunc() {
|
quitfunc() {
|
||||||
# shellcheck disable=SC2046
|
|
||||||
kill -- $(jobs -p)
|
kill -- $(jobs -p)
|
||||||
rm -f -- "${ttsmpeg:?}"
|
rm -f -- "${ttsmpeg:?}"
|
||||||
}
|
}
|
||||||
|
|||||||
44
edge-tts.py
44
edge-tts.py
@@ -9,6 +9,7 @@ import asyncio
|
|||||||
import ssl
|
import ssl
|
||||||
import websockets
|
import websockets
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
import logging
|
||||||
from email.utils import formatdate
|
from email.utils import formatdate
|
||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
@@ -17,8 +18,6 @@ trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
|
|||||||
wssUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken
|
wssUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken
|
||||||
voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken
|
voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken
|
||||||
|
|
||||||
def debug(msg, fd=sys.stderr):
|
|
||||||
if DEBUG: print(msg, file=fd)
|
|
||||||
def terminator(signo, stack_frame): sys.exit()
|
def terminator(signo, stack_frame): sys.exit()
|
||||||
signal.signal(signal.SIGINT, terminator)
|
signal.signal(signal.SIGINT, terminator)
|
||||||
signal.signal(signal.SIGTERM, terminator)
|
signal.signal(signal.SIGTERM, terminator)
|
||||||
@@ -37,12 +36,12 @@ def removeIncompatibleControlChars(s):
|
|||||||
|
|
||||||
def list_voices():
|
def list_voices():
|
||||||
with urllib.request.urlopen(voiceList) as url:
|
with urllib.request.urlopen(voiceList) as url:
|
||||||
debug("Loading json from %s" % voiceList)
|
logging.debug("Loading json from %s" % voiceList)
|
||||||
data = json.loads(url.read().decode('utf-8'))
|
data = json.loads(url.read().decode('utf-8'))
|
||||||
debug("JSON Loaded")
|
logging.debug("JSON Loaded")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def mkssmlmsg(text="", voice="", pitchString="", rateString="", volumeString="", customspeak=False):
|
def mkssmlmsg(text="", voice="en-US-AriaNeural", pitchString="+0Hz", rateString="+0%", volumeString="+0%", customspeak=False):
|
||||||
message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n'
|
message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n'
|
||||||
message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n'
|
message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n'
|
||||||
if customspeak:
|
if customspeak:
|
||||||
@@ -52,18 +51,18 @@ def mkssmlmsg(text="", voice="", pitchString="", rateString="", volumeString="",
|
|||||||
message+="<voice name='" + voice + "'>" + "<prosody pitch='" + pitchString + "' rate ='" + rateString + "' volume='" + volumeString + "'>" + text + '</prosody></voice></speak>'
|
message+="<voice name='" + voice + "'>" + "<prosody pitch='" + pitchString + "' rate ='" + rateString + "' volume='" + volumeString + "'>" + text + '</prosody></voice></speak>'
|
||||||
return message
|
return message
|
||||||
|
|
||||||
async def run_tts(msg, sentenceBoundaryEnabled="", wordBoundaryEnabled="", codec=""):
|
async def run_tts(msg, sentenceBoundaryEnabled="false", wordBoundaryEnabled="false", codec="audio-24khz-48kbitrate-mono-mp3"):
|
||||||
debug("Doing %s!" % msg)
|
logging.debug("Doing %s!" % msg)
|
||||||
async with websockets.connect(wssUrl, ssl=ssl_context) as ws:
|
async with websockets.connect(wssUrl, ssl=ssl_context) as ws:
|
||||||
message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
|
message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
|
||||||
message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
|
message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
|
||||||
await ws.send(message)
|
await ws.send(message)
|
||||||
debug("> %s" % message)
|
logging.debug("> %s" % message)
|
||||||
await ws.send(msg)
|
await ws.send(msg)
|
||||||
debug("> %s" % msg)
|
logging.debug("> %s" % msg)
|
||||||
async for recv in ws:
|
async for recv in ws:
|
||||||
recv = recv.encode('utf-8') if type(recv) is not bytes else recv
|
recv = recv.encode('utf-8') if type(recv) is not bytes else recv
|
||||||
debug("< %s" % recv)
|
logging.debug("< %s" % recv)
|
||||||
if b'turn.end' in recv:
|
if b'turn.end' in recv:
|
||||||
await ws.close()
|
await ws.close()
|
||||||
elif b'Path:audio\r\n' in recv:
|
elif b'Path:audio\r\n' in recv:
|
||||||
@@ -98,6 +97,13 @@ async def main():
|
|||||||
group = parser.add_mutually_exclusive_group(required=True)
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
group.add_argument('-t', '--text', help='what TTS will say')
|
group.add_argument('-t', '--text', help='what TTS will say')
|
||||||
group.add_argument('-f', '--file', help='same as --text but read from file')
|
group.add_argument('-f', '--file', help='same as --text but read from file')
|
||||||
|
parser.add_argument(
|
||||||
|
"-L",
|
||||||
|
"--log-level",
|
||||||
|
default=logging.CRITICAL,
|
||||||
|
type=lambda x: getattr(logging, x),
|
||||||
|
help="Configure the logging level."
|
||||||
|
)
|
||||||
parser.add_argument('-z', '--custom-ssml', help='treat text as ssml to send. For more info check https://bit.ly/3fIq13S', action='store_true')
|
parser.add_argument('-z', '--custom-ssml', help='treat text as ssml to send. For more info check https://bit.ly/3fIq13S', action='store_true')
|
||||||
parser.add_argument('-v', '--voice', help='voice for TTS. Default: en-US-AriaNeural', default='en-US-AriaNeural')
|
parser.add_argument('-v', '--voice', help='voice for TTS. Default: en-US-AriaNeural', default='en-US-AriaNeural')
|
||||||
parser.add_argument('-c', '--codec', help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus", default='audio-24khz-48kbitrate-mono-mp3')
|
parser.add_argument('-c', '--codec', help="codec format. Default: audio-24khz-48kbitrate-mono-mp3. Another choice is webm-24khz-16bit-mono-opus", default='audio-24khz-48kbitrate-mono-mp3')
|
||||||
@@ -105,21 +111,19 @@ async def main():
|
|||||||
parser.add_argument('-p', '--pitch', help="set TTS pitch. Default +0Hz, For more info check https://bit.ly/3eAE5Nx", default="+0Hz")
|
parser.add_argument('-p', '--pitch', help="set TTS pitch. Default +0Hz, For more info check https://bit.ly/3eAE5Nx", default="+0Hz")
|
||||||
parser.add_argument('-r', '--rate', help="set TTS rate. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
parser.add_argument('-r', '--rate', help="set TTS rate. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
||||||
parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
||||||
parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true')
|
parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but settable)", action='store_true')
|
||||||
parser.add_argument('-w', '--enable-word-boundary', help="enable word boundary (not implemented but set)", action='store_true')
|
parser.add_argument('-w', '--enable-word-boundary', help="enable word boundary (not implemented but settable)", action='store_true')
|
||||||
parser.add_argument('-D', '--debug', help="some debugging", action='store_true')
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
global DEBUG
|
logging.basicConfig(level=args.log_level)
|
||||||
DEBUG = args.debug
|
|
||||||
if (args.text or args.file) is not None:
|
if (args.text or args.file) is not None:
|
||||||
if args.file is not None:
|
if args.file is not None:
|
||||||
# we need to use sys.stdin.read() because some devices
|
# we need to use sys.stdin.read() because some devices
|
||||||
# like Windows and Termux don't have a /dev/stdin.
|
# like Windows and Termux don't have a /dev/stdin.
|
||||||
if args.file == "/dev/stdin":
|
if args.file == "/dev/stdin":
|
||||||
debug("stdin detected, reading natively from stdin")
|
logging.debug("stdin detected, reading natively from stdin")
|
||||||
args.text = sys.stdin.read()
|
args.text = sys.stdin.read()
|
||||||
else:
|
else:
|
||||||
debug("reading from %s" % args.file)
|
logging.debug("reading from %s" % args.file)
|
||||||
with open(args.file, 'r') as file:
|
with open(args.file, 'r') as file:
|
||||||
args.text = file.read()
|
args.text = file.read()
|
||||||
sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false'
|
sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false'
|
||||||
@@ -128,7 +132,7 @@ async def main():
|
|||||||
async for i in run_tts(mkssmlmsg(text=args.text, customspeak=True), sentenceBoundaryEnabled, wordBoundaryEnabled, args.codec):
|
async for i in run_tts(mkssmlmsg(text=args.text, customspeak=True), sentenceBoundaryEnabled, wordBoundaryEnabled, args.codec):
|
||||||
sys.stdout.buffer.write(i)
|
sys.stdout.buffer.write(i)
|
||||||
else:
|
else:
|
||||||
overhead = len(mkssmlmsg().encode('utf-8'))
|
overhead = len(mkssmlmsg('', args.voice, args.pitch, args.rate, args.volume).encode('utf-8'))
|
||||||
wsmax = 65536 - overhead
|
wsmax = 65536 - overhead
|
||||||
for text in _minimize(escape(removeIncompatibleControlChars(args.text)), b" ", wsmax):
|
for text in _minimize(escape(removeIncompatibleControlChars(args.text)), b" ", wsmax):
|
||||||
async for i in run_tts(mkssmlmsg(text.decode('utf-8'), args.voice, args.pitch, args.rate, args.volume), sentenceBoundaryEnabled, wordBoundaryEnabled, args.codec):
|
async for i in run_tts(mkssmlmsg(text.decode('utf-8'), args.voice, args.pitch, args.rate, args.volume), sentenceBoundaryEnabled, wordBoundaryEnabled, args.codec):
|
||||||
@@ -138,9 +142,9 @@ async def main():
|
|||||||
for voice in list_voices():
|
for voice in list_voices():
|
||||||
if seperator: print()
|
if seperator: print()
|
||||||
for key in voice.keys():
|
for key in voice.keys():
|
||||||
debug("Processing key %s" % key)
|
logging.debug("Processing key %s" % key)
|
||||||
if key in ["Name", "SuggestedCodec", "FriendlyName", "Status"]:
|
if key in ["Name", "SuggestedCodec", "FriendlyName", "Status"]:
|
||||||
debug("Key %s skipped" % key)
|
logging.debug("Key %s skipped" % key)
|
||||||
continue
|
continue
|
||||||
print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
|
print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
|
||||||
seperator = True
|
seperator = True
|
||||||
|
|||||||
Reference in New Issue
Block a user