get rid of nltk dependency
This commit is contained in:
120
edge-tts.py
120
edge-tts.py
@@ -35,8 +35,65 @@ def removeIncompatibleControlChars(s):
|
|||||||
output += [ ch ]
|
output += [ ch ]
|
||||||
return "".join(output)
|
return "".join(output)
|
||||||
|
|
||||||
# From https://github.com/pndurette/gTTS/blob/master/gtts/utils.py
|
def list_voices():
|
||||||
|
with urllib.request.urlopen(voiceList) as url:
|
||||||
|
debug("Loading json from %s" % voiceList)
|
||||||
|
data = json.loads(url.read().decode())
|
||||||
|
debug("JSON Loaded")
|
||||||
|
for voice in data:
|
||||||
|
print()
|
||||||
|
for key in voice.keys():
|
||||||
|
debug("Processing key %s" % key)
|
||||||
|
if key in ["Name", "SuggestedCodec", "FriendlyName", "Status"]:
|
||||||
|
debug("Key %s skipped" % key)
|
||||||
|
continue
|
||||||
|
print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
|
||||||
|
print()
|
||||||
|
|
||||||
|
def mkssmlmsg(text=""):
|
||||||
|
message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n'
|
||||||
|
message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n'
|
||||||
|
message+="<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
|
||||||
|
message+="<voice name='" + voice + "'>" + "<prosody pitch='" + pitchString + "' rate ='" + rateString + "' volume='" + volumeString + "'>" + text + '</prosody></voice></speak>'
|
||||||
|
return message
|
||||||
|
|
||||||
|
async def run_tts(msg):
|
||||||
|
debug("Doing %s!" % msg)
|
||||||
|
async with websockets.connect(wssUrl, ssl=ssl_context) as ws:
|
||||||
|
message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
|
||||||
|
message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
|
||||||
|
await ws.send(message)
|
||||||
|
debug("> %s" % message)
|
||||||
|
await ws.send(msg)
|
||||||
|
debug("> %s" % msg)
|
||||||
|
while True:
|
||||||
|
recv = await ws.recv()
|
||||||
|
recv = recv.encode() if type(recv) is not bytes else recv
|
||||||
|
debug("< %s" % recv)
|
||||||
|
if b'turn.end' in recv:
|
||||||
|
break
|
||||||
|
elif b'Path:audio\r\n' in recv:
|
||||||
|
sys.stdout.buffer.write(recv.split(b'Path:audio\r\n')[1])
|
||||||
|
|
||||||
|
# From https://github.com/pndurette/gTTS/blob/6d9309f05b3ad26ca356654732f3b5b9c3bec538/gtts/utils.py#L13-L54
|
||||||
def _minimize(the_string, delim, max_size):
|
def _minimize(the_string, delim, max_size):
|
||||||
|
"""Recursively split a string in the largest chunks
|
||||||
|
possible from the highest position of a delimiter all the way
|
||||||
|
to a maximum size
|
||||||
|
Args:
|
||||||
|
the_string (string): The string to split.
|
||||||
|
delim (string): The delimiter to split on.
|
||||||
|
max_size (int): The maximum size of a chunk.
|
||||||
|
Returns:
|
||||||
|
list: the minimized string in tokens
|
||||||
|
Every chunk size will be at minimum ``the_string[0:idx]`` where ``idx``
|
||||||
|
is the highest index of ``delim`` found in ``the_string``; and at maximum
|
||||||
|
``the_string[0:max_size]`` if no ``delim`` was found in ``the_string``.
|
||||||
|
In the latter case, the split will occur at ``the_string[max_size]``
|
||||||
|
which can be any character. The function runs itself again on the rest of
|
||||||
|
``the_string`` (``the_string[idx:]``) until no chunk is larger than
|
||||||
|
``max_size``.
|
||||||
|
"""
|
||||||
# Remove `delim` from start of `the_string`
|
# Remove `delim` from start of `the_string`
|
||||||
# i.e. prevent a recursive infinite loop on `the_string[0:0]`
|
# i.e. prevent a recursive infinite loop on `the_string[0:0]`
|
||||||
# if `the_string` starts with `delim` and is larger than `max_size`
|
# if `the_string` starts with `delim` and is larger than `max_size`
|
||||||
@@ -58,42 +115,6 @@ def _minimize(the_string, delim, max_size):
|
|||||||
else:
|
else:
|
||||||
return [the_string]
|
return [the_string]
|
||||||
|
|
||||||
def list_voices():
|
|
||||||
with urllib.request.urlopen(voiceList) as url:
|
|
||||||
debug("Loading json from %s" % voiceList)
|
|
||||||
data = json.loads(url.read().decode())
|
|
||||||
debug("JSON Loaded")
|
|
||||||
for voice in data:
|
|
||||||
print()
|
|
||||||
for key in voice.keys():
|
|
||||||
debug("Processing key %s" % key)
|
|
||||||
if key in ["Name", "SuggestedCodec", "FriendlyName", "Status"]:
|
|
||||||
debug("Key %s skipped" % key)
|
|
||||||
continue
|
|
||||||
print("%s: %s" % ("Name" if key == "ShortName" else key, voice[key]))
|
|
||||||
print()
|
|
||||||
|
|
||||||
async def run_tts():
|
|
||||||
async with websockets.connect(wssUrl, ssl=ssl_context) as ws:
|
|
||||||
message='X-Timestamp:'+formatdate()+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
|
|
||||||
message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundaryEnabled+'","wordBoundaryEnabled":"'+wordBoundaryEnabled+'"},"outputFormat":"' + codec + '"}}}}\r\n'
|
|
||||||
await ws.send(message)
|
|
||||||
debug("> %s" % message)
|
|
||||||
message='X-RequestId:'+connectId()+'\r\nContent-Type:application/ssml+xml\r\n'
|
|
||||||
message+='X-Timestamp:'+formatdate()+'Z\r\nPath:ssml\r\n\r\n'
|
|
||||||
message+="<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
|
|
||||||
message+="<voice name='" + voice + "'>" + "<prosody pitch='" + pitchString + "' rate ='" + rateString + "' volume='" + volumeString + "'>" + escape(text) + '</prosody></voice></speak>'
|
|
||||||
await ws.send(message)
|
|
||||||
debug("> %s" % message)
|
|
||||||
while True:
|
|
||||||
recv = await ws.recv()
|
|
||||||
recv = recv.encode() if type(recv) is not bytes else recv
|
|
||||||
debug("< %s" % recv)
|
|
||||||
if b'turn.end' in recv:
|
|
||||||
break
|
|
||||||
elif b'Path:audio\r\n' in recv:
|
|
||||||
sys.stdout.buffer.write(recv.split(b'Path:audio\r\n')[1])
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="Microsoft Edge's Online TTS Reader")
|
parser = argparse.ArgumentParser(description="Microsoft Edge's Online TTS Reader")
|
||||||
group = parser.add_mutually_exclusive_group(required=True)
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
@@ -107,7 +128,6 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
parser.add_argument('-V', '--volume', help="set TTS volume. Default +0%%. For more info check https://bit.ly/3eAE5Nx", default="+0%")
|
||||||
parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true')
|
parser.add_argument('-s', '--enable-sentence-boundary', help="enable sentence boundary (not implemented but set)", action='store_true')
|
||||||
parser.add_argument('-w', '--enable-word-boundary', help="enable word boundary (not implemented but set)", action='store_true')
|
parser.add_argument('-w', '--enable-word-boundary', help="enable word boundary (not implemented but set)", action='store_true')
|
||||||
parser.add_argument('-S', '--dont-split-sentences', help="sends entire text as is (careful because limit is unknown)", action='store_true')
|
|
||||||
parser.add_argument('-D', '--debug', help="some debugging", action='store_true')
|
parser.add_argument('-D', '--debug', help="some debugging", action='store_true')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
DEBUG = args.debug
|
DEBUG = args.debug
|
||||||
@@ -130,23 +150,11 @@ if __name__ == "__main__":
|
|||||||
volumeString = args.volume
|
volumeString = args.volume
|
||||||
sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false'
|
sentenceBoundaryEnabled = 'true' if args.enable_sentence_boundary else 'false'
|
||||||
wordBoundaryEnabled = 'true' if args.enable_word_boundary else 'false'
|
wordBoundaryEnabled = 'true' if args.enable_word_boundary else 'false'
|
||||||
# Websocket max is 65536, lets say that overhead is approx. 5k
|
# https://hpbn.co/websocket/ says client must also send a masking key,
|
||||||
max_size = 65536 - 5000
|
# which adds an extra 4 bytes to the header, resulting in 6–14 bytes over overhead
|
||||||
if not args.dont_split_sentences:
|
overhead = len(mkssmlmsg()) + 14
|
||||||
try:
|
wsmax = 65536 - overhead
|
||||||
from nltk.tokenize import sent_tokenize
|
for text in _minimize(escape(removeIncompatibleControlChars(args.text)), " ", wsmax):
|
||||||
debug("Was able to load nltk module")
|
asyncio.get_event_loop().run_until_complete(run_tts(mkssmlmsg(text)))
|
||||||
except Exception as e:
|
|
||||||
print("You need nltk for sentence splitting.", file=sys.stderr)
|
|
||||||
print("If you can't install it you could use the --dont-split-sentences flag.", file=sys.stderr)
|
|
||||||
debug("Exception was %s %s" % (e.message, e.args))
|
|
||||||
sys.exit(1)
|
|
||||||
for text in _minimize(" ".join(sent_tokenize(removeIncompatibleControlChars(args.text))), " ", max_size):
|
|
||||||
debug ("Sent %s to be TTSed!" % text)
|
|
||||||
asyncio.get_event_loop().run_until_complete(run_tts())
|
|
||||||
else:
|
|
||||||
for text in _minimize(removeIncompatibleControlChars(args.text), " ", max_size):
|
|
||||||
debug ("Sent %s to be TTSed!" % text)
|
|
||||||
asyncio.get_event_loop().run_until_complete(run_tts())
|
|
||||||
elif args.list_voices:
|
elif args.list_voices:
|
||||||
list_voices()
|
list_voices()
|
||||||
|
|||||||
Reference in New Issue
Block a user