More improvements
This commit is contained in:
50
edge-tts.py
50
edge-tts.py
@@ -12,10 +12,10 @@ import unicodedata
|
|||||||
from email.utils import formatdate
|
from email.utils import formatdate
|
||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
|
|
||||||
ssl_context = ssl.create_default_context()
|
ssl_context = ssl.create_default_context()
|
||||||
|
trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
|
||||||
|
wssUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken
|
||||||
voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken
|
voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken
|
||||||
wsUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken
|
|
||||||
|
|
||||||
def debug(msg, fd=sys.stderr):
|
def debug(msg, fd=sys.stderr):
|
||||||
if DEBUG: print(msg, file=fd)
|
if DEBUG: print(msg, file=fd)
|
||||||
@@ -26,7 +26,7 @@ def connectId(): return str(uuid.uuid4()).replace("-", "")
|
|||||||
def removeIncompatibleControlChars(s):
|
def removeIncompatibleControlChars(s):
|
||||||
output = []
|
output = []
|
||||||
for ch in s:
|
for ch in s:
|
||||||
# We consider these control characters as whitespace
|
# We consider that these control characters are whitespace
|
||||||
if ch in ['\t','\n','\r']:
|
if ch in ['\t','\n','\r']:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
@@ -35,6 +35,29 @@ def removeIncompatibleControlChars(s):
|
|||||||
output += [ ch ]
|
output += [ ch ]
|
||||||
return "".join(output)
|
return "".join(output)
|
||||||
|
|
||||||
|
# From https://github.com/pndurette/gTTS/blob/master/gtts/utils.py
|
||||||
|
def _minimize(the_string, delim, max_size):
|
||||||
|
# Remove `delim` from start of `the_string`
|
||||||
|
# i.e. prevent a recursive infinite loop on `the_string[0:0]`
|
||||||
|
# if `the_string` starts with `delim` and is larger than `max_size`
|
||||||
|
if the_string.startswith(delim):
|
||||||
|
the_string = the_string[len(delim):]
|
||||||
|
|
||||||
|
if len(the_string) > max_size:
|
||||||
|
try:
|
||||||
|
# Find the highest index of `delim` in `the_string[0:max_size]`
|
||||||
|
# i.e. `the_string` will be cut in half on `delim` index
|
||||||
|
idx = the_string.rindex(delim, 0, max_size)
|
||||||
|
except ValueError:
|
||||||
|
# `delim` not found in `the_string`, index becomes `max_size`
|
||||||
|
# i.e. `the_string` will be cut in half arbitrarily on `max_size`
|
||||||
|
idx = max_size
|
||||||
|
# Call itself again for `the_string[idx:]`
|
||||||
|
return [the_string[:idx]] + \
|
||||||
|
_minimize(the_string[idx:], delim, max_size)
|
||||||
|
else:
|
||||||
|
return [the_string]
|
||||||
|
|
||||||
def list_voices():
|
def list_voices():
|
||||||
with urllib.request.urlopen(voiceList) as url:
|
with urllib.request.urlopen(voiceList) as url:
|
||||||
debug("Loading json from %s" % voiceList)
|
debug("Loading json from %s" % voiceList)
|
||||||
@@ -51,7 +74,7 @@ def list_voices():
|
|||||||
print()
|
print()
|
||||||
|
|
||||||
async def run_tts():
|
async def run_tts():
|
||||||
async with websockets.connect(wsUrl, ssl=ssl_context) as ws:
|
async with websockets.connect(wssUrl, ssl=ssl_context) as ws:
|
||||||
message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
|
message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
|
||||||
message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
|
message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
|
||||||
await ws.send(message)
|
await ws.send(message)
|
||||||
@@ -84,6 +107,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
||||||
parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true')
|
parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true')
|
||||||
parser.add_argument('-w', '--disable-word-boundary', help="disable word boundary (not implemented but set)", action='store_false')
|
parser.add_argument('-w', '--disable-word-boundary', help="disable word boundary (not implemented but set)", action='store_false')
|
||||||
|
parser.add_argument('-S', '--dont-split-sentences', help="sends entire text as is (careful because limit is unknown)", action='store_true')
|
||||||
parser.add_argument('-D', '--debug', help="some debugging", action='store_true')
|
parser.add_argument('-D', '--debug', help="some debugging", action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
DEBUG = args.debug
|
DEBUG = args.debug
|
||||||
@@ -106,7 +130,23 @@ if __name__ == "__main__":
|
|||||||
volumeString = args.volume
|
volumeString = args.volume
|
||||||
sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false'
|
sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false'
|
||||||
wordBoundaryEnabled = 'true' if args.disable_word_boundary else 'false'
|
wordBoundaryEnabled = 'true' if args.disable_word_boundary else 'false'
|
||||||
text = removeIncompatibleControlChars(args.text)
|
# Websocket max is 65536, lets say that overhead is approx. 5k
|
||||||
|
max_size = 65536 - 5000
|
||||||
|
if not args.dont_split_sentences:
|
||||||
|
try:
|
||||||
|
from nltk.tokenize import sent_tokenize
|
||||||
|
debug("Was able to load nltk module")
|
||||||
|
except Exception as e:
|
||||||
|
print("You need nltk for sentence splitting.", file=sys.stderr)
|
||||||
|
print("If you can't install it you could use the --dont-split-sentences flag.", file=sys.stderr)
|
||||||
|
debug("Exception was %s %s" % (e.message, e.args))
|
||||||
|
sys.exit(1)
|
||||||
|
for text in _minimize(" ".join(sent_tokenize(removeIncompatibleControlChars(args.text))), " ", max_size):
|
||||||
|
debug ("Sent %s to be TTSed!" % text)
|
||||||
|
asyncio.get_event_loop().run_until_complete(run_tts())
|
||||||
|
else:
|
||||||
|
for text in _minimize(removeIncompatibleControlChars(args.text), " ", max_size):
|
||||||
|
debug ("Sent %s to be TTSed!" % text)
|
||||||
asyncio.get_event_loop().run_until_complete(run_tts())
|
asyncio.get_event_loop().run_until_complete(run_tts())
|
||||||
elif args.list_voices:
|
elif args.list_voices:
|
||||||
list_voices()
|
list_voices()
|
||||||
|
|||||||
Reference in New Issue
Block a user