Resolve *almost* all pylint complaints and setup pylint

This commit is contained in:
rany2
2023-01-05 07:48:40 +02:00
parent b5b7a42354
commit b68b27103f
12 changed files with 823 additions and 222 deletions

View File

@@ -7,12 +7,29 @@ import json
import re
import time
import uuid
from typing import Any, AsyncGenerator, Dict, Generator, List, Optional, Tuple, Union
from contextlib import nullcontext
from io import TextIOWrapper
from typing import (
Any,
AsyncGenerator,
ContextManager,
Dict,
Generator,
List,
Optional,
Tuple,
Union,
)
from xml.sax.saxutils import escape
import aiohttp
from edge_tts.exceptions import NoAudioReceived, UnexpectedResponse, UnknownResponse
from edge_tts.exceptions import (
NoAudioReceived,
UnexpectedResponse,
UnknownResponse,
WebSocketError,
)
from .constants import WSS_URL
@@ -161,8 +178,6 @@ def date_to_string() -> str:
# without having to use a library. We'll just use UTC and hope for the best.
# For example, right now %Z would return EEST when we need it to return
# Eastern European Summer Time.
#
# return time.strftime("%a %b %d %Y %H:%M:%S GMT%z (%Z)")
return time.strftime(
"%a %b %d %Y %H:%M:%S GMT+0000 (Coordinated Universal Time)", time.gmtime()
)
@@ -185,6 +200,26 @@ def ssml_headers_plus_data(request_id: str, timestamp: str, ssml: str) -> str:
)
def calc_max_mesg_size(voice: str, rate: str, volume: str) -> int:
"""Calculates the maximum message size for the given voice, rate, and volume.
Returns:
int: The maximum message size.
"""
websocket_max_size: int = 2**16
overhead_per_message: int = (
len(
ssml_headers_plus_data(
connect_id(),
date_to_string(),
mkssml("", voice, rate, volume),
)
)
+ 50 # margin of error
)
return websocket_max_size - overhead_per_message
class Communicate:
"""
Class for communicating with the service.
@@ -206,7 +241,6 @@ class Communicate:
ValueError: If the voice is not valid.
"""
self.text: str = text
self.codec: str = "audio-24khz-48kbitrate-mono-mp3"
self.voice: str = voice
# Possible values for voice are:
# - Microsoft Server Speech Text to Speech Voice (cy-GB, NiaNeural)
@@ -241,158 +275,123 @@ class Communicate:
async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
"""Streams audio and metadata from the service."""
websocket_max_size = 2**16
overhead_per_message = (
len(
ssml_headers_plus_data(
connect_id(),
date_to_string(),
mkssml("", self.voice, self.rate, self.volume),
)
)
+ 50 # margin of error
)
texts = split_text_by_byte_length(
escape(remove_incompatible_characters(self.text)),
websocket_max_size - overhead_per_message,
calc_max_mesg_size(self.voice, self.rate, self.volume),
)
async with aiohttp.ClientSession(trust_env=True) as session:
async with session.ws_connect(
f"{WSS_URL}&ConnectionId={connect_id()}",
compress=15,
autoclose=True,
autoping=True,
proxy=self.proxy,
headers={
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
},
) as websocket:
for text in texts:
# download indicates whether we should be expecting audio data,
# this is so what we avoid getting binary data from the websocket
# and falsely thinking it's audio data.
download_audio = False
async with aiohttp.ClientSession(trust_env=True) as session, session.ws_connect(
f"{WSS_URL}&ConnectionId={connect_id()}",
compress=15,
autoclose=True,
autoping=True,
proxy=self.proxy,
headers={
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
},
) as websocket:
for text in texts:
# download indicates whether we should be expecting audio data,
# this is so what we avoid getting binary data from the websocket
# and falsely thinking it's audio data.
download_audio = False
# audio_was_received indicates whether we have received audio data
# from the websocket. This is so we can raise an exception if we
# don't receive any audio data.
audio_was_received = False
# audio_was_received indicates whether we have received audio data
# from the websocket. This is so we can raise an exception if we
# don't receive any audio data.
audio_was_received = False
# Each message needs to have the proper date
date = date_to_string()
# Each message needs to have the proper date.
date = date_to_string()
# Prepare the request to be sent to the service.
#
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
# to be booleans, but Edge Browser seems to send them as strings.
#
# This is a bug in Edge as Azure Cognitive Services actually sends them as
# bool and not string. For now I will send them as bool unless it causes
# any problems.
#
# Also pay close attention to double { } in request (escape for f-string).
request = (
f"X-Timestamp:{date}\r\n"
"Content-Type:application/json; charset=utf-8\r\n"
"Path:speech.config\r\n\r\n"
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
'"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":true},'
f'"outputFormat":"{self.codec}"'
"}}}}\r\n"
# Prepare the request to be sent to the service.
#
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
# to be booleans, but Edge Browser seems to send them as strings.
#
# This is a bug in Edge as Azure Cognitive Services actually sends them as
# bool and not string. For now I will send them as bool unless it causes
# any problems.
#
# Also pay close attention to double { } in request (escape for f-string).
await websocket.send_str(
f"X-Timestamp:{date}\r\n"
"Content-Type:application/json; charset=utf-8\r\n"
"Path:speech.config\r\n\r\n"
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
'"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":true},'
'"outputFormat":"audio-24khz-48kbitrate-mono-mp3"'
"}}}}\r\n"
)
await websocket.send_str(
ssml_headers_plus_data(
connect_id(),
date,
mkssml(text, self.voice, self.rate, self.volume),
)
await websocket.send_str(request)
)
await websocket.send_str(
ssml_headers_plus_data(
connect_id(),
date,
mkssml(text, self.voice, self.rate, self.volume),
)
async for received in websocket:
if received.type == aiohttp.WSMsgType.TEXT:
parameters, data = get_headers_and_data(received.data)
if parameters.get("Path") == "turn.start":
download_audio = True
elif parameters.get("Path") == "turn.end":
download_audio = False
break # End of audio data
elif parameters.get("Path") == "audio.metadata":
meta = json.loads(data)
for i in range(len(meta["Metadata"])):
meta_obj = meta["Metadata"][i]
meta_type = meta_obj["Type"]
if meta_type == "WordBoundary":
yield {
"type": meta_type,
"offset": meta_obj["Data"]["Offset"],
"duration": meta_obj["Data"]["Duration"],
"text": meta_obj["Data"]["text"]["Text"],
}
elif meta_type == "SessionEnd":
continue
else:
raise UnknownResponse(
f"Unknown metadata type: {meta_type}"
)
elif parameters.get("Path") == "response":
pass
else:
raise UnknownResponse(
"The response from the service is not recognized.\n"
+ received.data
)
elif received.type == aiohttp.WSMsgType.BINARY:
if not download_audio:
raise UnexpectedResponse(
"We received a binary message, but we are not expecting one."
)
yield {
"type": "audio",
"data": b"Path:audio\r\n".join(
received.data.split(b"Path:audio\r\n")[1:]
),
}
audio_was_received = True
elif received.type == aiohttp.WSMsgType.ERROR:
raise WebSocketError(received.data)
if not audio_was_received:
raise NoAudioReceived(
"No audio was received. Please verify that your parameters are correct."
)
async for received in websocket:
if received.type == aiohttp.WSMsgType.TEXT:
parameters, data = get_headers_and_data(received.data)
if (
"Path" in parameters
and parameters["Path"] == "turn.start"
):
download_audio = True
elif (
"Path" in parameters
and parameters["Path"] == "turn.end"
):
download_audio = False
break
elif (
"Path" in parameters
and parameters["Path"] == "audio.metadata"
):
metadata = json.loads(data)
for i in range(len(metadata["Metadata"])):
metadata_type = metadata["Metadata"][i]["Type"]
metadata_offset = metadata["Metadata"][i]["Data"][
"Offset"
]
if metadata_type == "WordBoundary":
metadata_duration = metadata["Metadata"][i][
"Data"
]["Duration"]
metadata_text = metadata["Metadata"][i]["Data"][
"text"
]["Text"]
yield {
"type": metadata_type,
"offset": metadata_offset,
"duration": metadata_duration,
"text": metadata_text,
}
elif metadata_type == "SentenceBoundary":
raise UnknownResponse(
"SentenceBoundary is not supported due to being broken."
)
elif metadata_type == "SessionEnd":
continue
else:
raise UnknownResponse(
f"Unknown metadata type: {metadata_type}"
)
elif (
"Path" in parameters
and parameters["Path"] == "response"
):
pass
else:
raise UnknownResponse(
"The response from the service is not recognized.\n"
+ received.data
)
elif received.type == aiohttp.WSMsgType.BINARY:
if download_audio:
yield {
"type": "audio",
"data": b"Path:audio\r\n".join(
received.data.split(b"Path:audio\r\n")[1:]
),
}
audio_was_received = True
else:
raise UnexpectedResponse(
"We received a binary message, but we are not expecting one."
)
if not audio_was_received:
raise NoAudioReceived(
"No audio was received. Please verify that your parameters are correct."
)
async def save(
self,
audio_fname: Union[str, bytes],
@@ -401,24 +400,23 @@ class Communicate:
"""
Save the audio and metadata to the specified files.
"""
written_audio = False
try:
audio = open(audio_fname, "wb")
metadata = None
if metadata_fname is not None:
metadata = open(metadata_fname, "w", encoding="utf-8")
written_audio: bool = False
metadata: Union[TextIOWrapper, ContextManager[None]] = (
open(metadata_fname, "w", encoding="utf-8")
if metadata_fname is not None
else nullcontext()
)
with metadata, open(audio_fname, "wb") as audio:
async for message in self.stream():
if message["type"] == "audio":
audio.write(message["data"])
written_audio = True
elif metadata is not None and message["type"] == "WordBoundary":
elif (
isinstance(metadata, TextIOWrapper)
and message["type"] == "WordBoundary"
):
json.dump(message, metadata)
metadata.write("\n")
finally:
audio.close()
if metadata is not None:
metadata.close()
if not written_audio:
raise NoAudioReceived(