Switch to aiohttp

This commit is contained in:
rany
2021-06-18 02:29:43 +03:00
parent a38cacce87
commit 66511837ab
2 changed files with 67 additions and 62 deletions

View File

@@ -1,6 +1,6 @@
[metadata] [metadata]
name = edge-tts name = edge-tts
version = 2.0.9 version = 2.1.0
author = rany author = rany
author_email = ranygh@riseup.net author_email = ranygh@riseup.net
description = Microsoft Edge's TTS description = Microsoft Edge's TTS

View File

@@ -5,15 +5,13 @@ import uuid
import argparse import argparse
import asyncio import asyncio
import ssl import ssl
import websockets
import logging import logging
import httpx
import time import time
import math import math
import aiohttp
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
# Default variables # Default variables
ssl_context = ssl.create_default_context()
trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4' trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
wssUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken wssUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken
voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken
@@ -55,23 +53,24 @@ def mktimestamp(ns):
# Return loaded JSON data of list of Edge's voices # Return loaded JSON data of list of Edge's voices
# NOTE: It's not the total list of available voices. # NOTE: It's not the total list of available voices.
# This is only what is presented in the UI. # This is only what is presented in the UI.
def list_voices(): async def list_voices():
logger = logging.getLogger("edgeTTS.list_voices") logger = logging.getLogger("edgeTTS.list_voices")
with httpx.Client(http2=True, headers={ async with aiohttp.ClientSession(trust_env=True) as session:
'Authority': 'speech.platform.bing.com', async with session.get(voiceList, headers={
'Sec-CH-UA': "\" Not;A Brand\";v=\"99\", \"Microsoft Edge\";v=\"91\", \"Chromium\";v=\"91\"", 'Authority': 'speech.platform.bing.com',
'Sec-CH-UA-Mobile': '?0', 'Sec-CH-UA': "\" Not;A Brand\";v=\"99\", \"Microsoft Edge\";v=\"91\", \"Chromium\";v=\"91\"",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", 'Sec-CH-UA-Mobile': '?0',
'Accept': '*/*', 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
'Sec-Fetch-Site': 'none', 'Accept': '*/*',
'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'none',
'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors',
'Accept-Encoding': 'gzip, deflate, br', 'Sec-Fetch-Dest': 'empty',
'Accept-Language': 'en-US,en;q=0.9' 'Accept-Encoding': 'gzip, deflate, br',
}) as url: 'Accept-Language': 'en-US,en;q=0.9'
logger.debug("Loading json from %s" % voiceList) }) as url:
data = json.loads(url.get(voiceList).content) logger.debug("Loading json from %s" % voiceList)
logger.debug("JSON Loaded") data = json.loads(await url.text())
logger.debug("JSON Loaded")
return data return data
class SubMaker: class SubMaker:
@@ -132,50 +131,56 @@ class Communicate:
if type(msgs) is str: if type(msgs) is str:
msgs = [msgs] msgs = [msgs]
async with websockets.connect( async with aiohttp.ClientSession(trust_env=True) as session:
wssUrl + "&ConnectionId=" + connectId(), async with session.ws_connect(
ssl=ssl_context, wssUrl + "&ConnectionId=" + connectId(),
compression="deflate", compress = 15,
extra_headers={ autoclose = True,
"Pragma": "no-cache", autoping = True,
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", headers={
"Accept-Encoding": "gzip, deflate, br", "Pragma": "no-cache",
"Accept-Language": "en-US,en;q=0.9", "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", "Accept-Encoding": "gzip, deflate, br",
"Cache-Control": "no-cache" "Accept-Language": "en-US,en;q=0.9",
} "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
) as ws: "Cache-Control": "no-cache"
for msg in msgs: }
self.date = formatdate() # Each message needs to have its send date ) as ws:
for msg in msgs:
self.date = formatdate() # Each message needs to have its send date
if not customspeak: if not customspeak:
msg = self.mkssmlmsg(msg.decode('utf-8'), voice, pitch, rate, volume, customspeak=False) msg = self.mkssmlmsg(msg.decode('utf-8'), voice, pitch, rate, volume, customspeak=False)
else: else:
msg = self.mkssmlmsg(msg, customspeak=True) msg = self.mkssmlmsg(msg, customspeak=True)
message='X-Timestamp:'+self.date+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n' message='X-Timestamp:'+self.date+'\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n'
message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundary+'","wordBoundaryEnabled":"'+wordBoundary+'"},"outputFormat":"' + codec + '"}}}}\r\n' message+='{"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"'+sentenceBoundary+'","wordBoundaryEnabled":"'+wordBoundary+'"},"outputFormat":"' + codec + '"}}}}\r\n'
await ws.send(message) await ws.send_str(message)
await ws.send(msg) await ws.send_str(msg)
download = False download = False
async for recv in ws: async for recv in ws:
if type(recv) is str: if recv.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.ERROR):
if 'turn.start' in recv:
download = True
elif 'turn.end' in recv:
download = False
break break
elif 'audio.metadata' in recv:
#print("".join(recv.split('Path:audio.metadata\r\n\r\n')[1:]), file=sys.stderr)
metadata = json.loads("".join(recv.split('Path:audio.metadata\r\n\r\n')[1:]))
text = metadata['Metadata'][0]['Data']['text']['Text']
offset = metadata['Metadata'][0]['Data']['Offset']
yield [ offset, text, None ]
elif type(recv) is bytes:
if download:
yield [ None, None, b"".join(recv.split(b'Path:audio\r\n')[1:]) ]
await ws.close() if recv.type == aiohttp.WSMsgType.TEXT:
if 'turn.start' in recv.data:
download = True
elif 'turn.end' in recv.data:
download = False
break
elif 'audio.metadata' in recv.data:
#print("".join(recv.split('Path:audio.metadata\r\n\r\n')[1:]), file=sys.stderr)
metadata = json.loads("".join(recv.split('Path:audio.metadata\r\n\r\n')[1:]))
text = metadata['Metadata'][0]['Data']['text']['Text']
offset = metadata['Metadata'][0]['Data']['Offset']
yield [ offset, text, None ]
elif recv.type == aiohttp.WSMsgType.BINARY:
if download:
yield [ None, None, b"".join(recv.data.split(b'Path:audio\r\n')[1:]) ]
await ws.close()
# Based on https://github.com/pndurette/gTTS/blob/6d9309f05b3ad26ca356654732f3b5b9c3bec538/gtts/utils.py#L13-L54 # Based on https://github.com/pndurette/gTTS/blob/6d9309f05b3ad26ca356654732f3b5b9c3bec538/gtts/utils.py#L13-L54
# Modified to measure based on bytes rather than number of characters # Modified to measure based on bytes rather than number of characters
@@ -250,7 +255,7 @@ async def _main():
media_file.write(i[2]) media_file.write(i[2])
elif i[0] is not None and i[1] is not None: elif i[0] is not None and i[1] is not None:
subs.createSub(i[0], i[1]) subs.createSub(i[0], i[1])
media_file.close() if args.write_media: media_file.close()
if not subs.subsAndOffset == {}: if not subs.subsAndOffset == {}:
if not args.write_subtitles: if not args.write_subtitles:
sys.stderr.write(subs.generateSubs()) sys.stderr.write(subs.generateSubs())
@@ -260,7 +265,7 @@ async def _main():
subtitle_file.close() subtitle_file.close()
elif args.list_voices: elif args.list_voices:
seperator = False seperator = False
for voice in list_voices(): for voice in await list_voices():
if seperator: print() if seperator: print()
for key in voice.keys(): for key in voice.keys():
logger.debug("Processing key %s" % key) logger.debug("Processing key %s" % key)