Refactor communicate for better readability

Also improve performance on larger documents.

Signed-off-by: rany2 <rany2@riseup.net>
This commit is contained in:
rany2
2024-02-16 18:36:20 +02:00
parent df6bac8b54
commit c9bf4247a8
2 changed files with 224 additions and 179 deletions

View File

@@ -5,6 +5,10 @@
# only in one or another interpreter, leading to false positives when analysed. # only in one or another interpreter, leading to false positives when analysed.
analyse-fallback-blocks=no analyse-fallback-blocks=no
# Clear in-memory caches upon conclusion of linting. Useful if running pylint
# in a server-like mode.
clear-cache-post-run=no
# Load and enable all available extensions. Use --list-extensions to see a list # Load and enable all available extensions. Use --list-extensions to see a list
# all available extensions. # all available extensions.
#enable-all-extensions= #enable-all-extensions=
@@ -46,8 +50,8 @@ ignore=CVS
# Add files or directories matching the regular expressions patterns to the # Add files or directories matching the regular expressions patterns to the
# ignore-list. The regex matches against paths and can be in Posix or Windows # ignore-list. The regex matches against paths and can be in Posix or Windows
# format. Because '\' represents the directory delimiter on Windows systems, it # format. Because '\\' represents the directory delimiter on Windows systems,
# can't be used as an escape character. # it can't be used as an escape character.
ignore-paths= ignore-paths=
# Files or directories matching the regular expression patterns are skipped. # Files or directories matching the regular expression patterns are skipped.
@@ -84,11 +88,17 @@ persistent=yes
# Minimum Python version to use for version dependent checks. Will default to # Minimum Python version to use for version dependent checks. Will default to
# the version used to run pylint. # the version used to run pylint.
py-version=3.10 py-version=3.11
# Discover python modules and packages in the file system subtree. # Discover python modules and packages in the file system subtree.
recursive=no recursive=no
# Add paths to the list of the source roots. Supports globbing patterns. The
# source root is an absolute path or a path relative to the current working
# directory used to determine a package namespace for modules located under the
# source root.
source-roots=
# When enabled, pylint would attempt to guess common misconfiguration and emit # When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages. # user-friendly hints instead of false-positive error messages.
suggestion-mode=yes suggestion-mode=yes
@@ -224,6 +234,10 @@ no-docstring-rgx=^_
# These decorators are taken in consideration only for invalid-name. # These decorators are taken in consideration only for invalid-name.
property-classes=abc.abstractproperty property-classes=abc.abstractproperty
# Regular expression matching correct type alias names. If left empty, type
# alias names will be checked with the set naming style.
#typealias-rgx=
# Regular expression matching correct type variable names. If left empty, type # Regular expression matching correct type variable names. If left empty, type
# variable names will be checked with the set naming style. # variable names will be checked with the set naming style.
#typevar-rgx= #typevar-rgx=
@@ -246,21 +260,18 @@ check-protected-access-in-special-methods=no
defining-attr-methods=__init__, defining-attr-methods=__init__,
__new__, __new__,
setUp, setUp,
asyncSetUp,
__post_init__ __post_init__
# List of member names, which should be excluded from the protected access # List of member names, which should be excluded from the protected access
# warning. # warning.
exclude-protected=_asdict, exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
_fields,
_replace,
_source,
_make
# List of valid names for the first argument in a class method. # List of valid names for the first argument in a class method.
valid-classmethod-first-arg=cls valid-classmethod-first-arg=cls
# List of valid names for the first argument in a metaclass class method. # List of valid names for the first argument in a metaclass class method.
valid-metaclass-classmethod-first-arg=cls valid-metaclass-classmethod-first-arg=mcs
[DESIGN] [DESIGN]
@@ -274,7 +285,7 @@ exclude-too-few-public-methods=
ignored-parents= ignored-parents=
# Maximum number of arguments for function / method. # Maximum number of arguments for function / method.
max-args=5 max-args=10
# Maximum number of attributes for a class (see R0902). # Maximum number of attributes for a class (see R0902).
max-attributes=7 max-attributes=7
@@ -307,8 +318,7 @@ min-public-methods=2
[EXCEPTIONS] [EXCEPTIONS]
# Exceptions that will emit a warning when caught. # Exceptions that will emit a warning when caught.
overgeneral-exceptions=builtins.BaseException, overgeneral-exceptions=builtins.BaseException,builtins.Exception
builtins.Exception
[FORMAT] [FORMAT]
@@ -327,7 +337,7 @@ indent-after-paren=4
indent-string=' ' indent-string=' '
# Maximum number of characters on a single line. # Maximum number of characters on a single line.
max-line-length=240 max-line-length=100
# Maximum number of lines in a module. # Maximum number of lines in a module.
max-module-lines=1000 max-module-lines=1000
@@ -347,6 +357,9 @@ single-line-if-stmt=no
# one. # one.
allow-any-import-level= allow-any-import-level=
# Allow explicit reexports by alias from a package __init__.
allow-reexport-from-package=no
# Allow wildcard imports from modules that define __all__. # Allow wildcard imports from modules that define __all__.
allow-wildcard-with-all=no allow-wildcard-with-all=no
@@ -408,14 +421,24 @@ confidence=HIGH,
# --enable=similarities". If you want to run only the classes checker, but have # --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes # no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W". # --disable=W".
disable=duplicate-code, disable=raw-checker-failed,
bad-inline-option,
locally-disabled,
file-ignored,
suppressed-message,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
use-implicit-booleaness-not-comparison-to-string,
use-implicit-booleaness-not-comparison-to-zero,
duplicate-code,
consider-using-with consider-using-with
# Enable the message, report, category or checker with the given id(s). You can # Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option # either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where # multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples. # it should appear only once). See also the "--disable" option for examples.
enable=c-extension-no-member enable=
[METHOD_ARGS] [METHOD_ARGS]
@@ -461,8 +484,9 @@ evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor
# used to format the message information. See doc for all details. # used to format the message information. See doc for all details.
msg-template= msg-template=
# Set the output format. Available formats are text, parseable, colorized, json # Set the output format. Available formats are: text, parseable, colorized,
# and msvs (visual studio). You can also give a reporter class, e.g. # json2 (improved json format), json (old json format) and msvs (visual
# studio). You can also give a reporter class, e.g.
# mypackage.mymodule.MyReporterClass. # mypackage.mymodule.MyReporterClass.
#output-format= #output-format=
@@ -496,8 +520,8 @@ min-similarity-lines=4
# Limits count of emitted suggestions for spelling mistakes. # Limits count of emitted suggestions for spelling mistakes.
max-spelling-suggestions=4 max-spelling-suggestions=4
# Spelling dictionary name. Available dictionaries: none. To make it work, # Spelling dictionary name. No available dictionaries : You need to install
# install the 'python-enchant' package. # both the python package and the system dependency for enchant to work.
spelling-dict= spelling-dict=
# List of comma separated words that should be considered directives if they # List of comma separated words that should be considered directives if they

View File

@@ -229,6 +229,25 @@ class Communicate:
Class for communicating with the service. Class for communicating with the service.
""" """
@staticmethod
def validate_string_param(param_name: str, param_value: str, pattern: str) -> str:
"""
Validates the given string parameter based on type and pattern.
Args:
param_name (str): The name of the parameter.
param_value (str): The value of the parameter.
pattern (str): The pattern to validate the parameter against.
Returns:
str: The validated parameter.
"""
if not isinstance(param_value, str):
raise TypeError(f"{param_name} must be str")
if re.match(pattern, param_value) is None:
raise ValueError(f"Invalid {param_name} '{param_value}'.")
return param_value
def __init__( def __init__(
self, self,
text: str, text: str,
@@ -238,6 +257,7 @@ class Communicate:
volume: str = "+0%", volume: str = "+0%",
pitch: str = "+0Hz", pitch: str = "+0Hz",
proxy: Optional[str] = None, proxy: Optional[str] = None,
receive_timeout: int = 5,
): ):
""" """
Initializes the Communicate class. Initializes the Communicate class.
@@ -270,190 +290,191 @@ class Communicate:
+ f" ({lang}-{region}, {name})" + f" ({lang}-{region}, {name})"
) )
if ( self.voice = self.validate_string_param(
re.match( "voice",
r"^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$", self.voice,
self.voice, r"^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$",
) )
is None self.rate = self.validate_string_param("rate", rate, r"^[+-]\d+%$")
): self.volume = self.validate_string_param("volume", volume, r"^[+-]\d+%$")
raise ValueError(f"Invalid voice '{voice}'.") self.pitch = self.validate_string_param("pitch", pitch, r"^[+-]\d+Hz$")
if not isinstance(rate, str):
raise TypeError("rate must be str")
if re.match(r"^[+-]\d+%$", rate) is None:
raise ValueError(f"Invalid rate '{rate}'.")
self.rate: str = rate
if not isinstance(volume, str):
raise TypeError("volume must be str")
if re.match(r"^[+-]\d+%$", volume) is None:
raise ValueError(f"Invalid volume '{volume}'.")
self.volume: str = volume
if not isinstance(pitch, str):
raise TypeError("pitch must be str")
if re.match(r"^[+-]\d+Hz$", pitch) is None:
raise ValueError(f"Invalid pitch '{pitch}'.")
self.pitch: str = pitch
if proxy is not None and not isinstance(proxy, str): if proxy is not None and not isinstance(proxy, str):
raise TypeError("proxy must be str") raise TypeError("proxy must be str")
self.proxy: Optional[str] = proxy self.proxy: Optional[str] = proxy
if not isinstance(receive_timeout, int):
raise TypeError("receive_timeout must be int")
self.receive_timeout: int = receive_timeout
async def stream(self) -> AsyncGenerator[Dict[str, Any], None]: async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
"""Streams audio and metadata from the service.""" """Streams audio and metadata from the service."""
async def send_command_request() -> None:
"""Sends the request to the service."""
# Prepare the request to be sent to the service.
#
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
# to be booleans, but Edge Browser seems to send them as strings.
#
# This is a bug in Edge as Azure Cognitive Services actually sends them as
# bool and not string. For now I will send them as bool unless it causes
# any problems.
#
# Also pay close attention to double { } in request (escape for f-string).
await websocket.send_str(
f"X-Timestamp:{date_to_string()}\r\n"
"Content-Type:application/json; charset=utf-8\r\n"
"Path:speech.config\r\n\r\n"
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
'"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":true},'
'"outputFormat":"audio-24khz-48kbitrate-mono-mp3"'
"}}}}\r\n"
)
async def send_ssml_request() -> bool:
"""Sends the SSML request to the service."""
# Get the next string from the generator.
text = next(texts, None)
# If there are no more strings, return False.
if text is None:
return False
# Send the request to the service and return True.
await websocket.send_str(
ssml_headers_plus_data(
connect_id(),
date_to_string(),
mkssml(text, self.voice, self.rate, self.volume, self.pitch),
)
)
return True
def parse_metadata():
for meta_obj in json.loads(data)["Metadata"]:
meta_type = meta_obj["Type"]
if meta_type == "WordBoundary":
current_offset = meta_obj["Data"]["Offset"] + offset_compensation
current_duration = meta_obj["Data"]["Duration"]
return {
"type": meta_type,
"offset": current_offset,
"duration": current_duration,
"text": meta_obj["Data"]["text"]["Text"],
}
elif meta_type in ("SessionEnd",):
continue
else:
raise UnknownResponse(f"Unknown metadata type: {meta_type}")
# Split the text into multiple strings if it is too long for the service.
texts = split_text_by_byte_length( texts = split_text_by_byte_length(
escape(remove_incompatible_characters(self.text)), escape(remove_incompatible_characters(self.text)),
calc_max_mesg_size(self.voice, self.rate, self.volume, self.pitch), calc_max_mesg_size(self.voice, self.rate, self.volume, self.pitch),
) )
final_utterance: Dict[int, int] = {}
prev_idx = -1
shift_time = -1
# Keep track of last duration + offset to calculate the offset
# upon word split.
last_duration_offset = 0
# Current offset compensations.
offset_compensation = 0
# Create a new connection to the service.
ssl_ctx = ssl.create_default_context(cafile=certifi.where()) ssl_ctx = ssl.create_default_context(cafile=certifi.where())
for idx, text in enumerate(texts): async with aiohttp.ClientSession(
async with aiohttp.ClientSession( trust_env=True,
trust_env=True, ) as session, session.ws_connect(
) as session, session.ws_connect( f"{WSS_URL}&ConnectionId={connect_id()}",
f"{WSS_URL}&ConnectionId={connect_id()}", compress=15,
compress=15, proxy=self.proxy,
autoclose=True, receive_timeout=self.receive_timeout,
autoping=True, headers={
proxy=self.proxy, "Pragma": "no-cache",
headers={ "Cache-Control": "no-cache",
"Pragma": "no-cache", "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
"Cache-Control": "no-cache", "Accept-Encoding": "gzip, deflate, br",
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", "Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
"Accept-Language": "en-US,en;q=0.9", " (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" },
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41", ssl=ssl_ctx,
}, ) as websocket:
ssl=ssl_ctx, # audio_was_received indicates whether we have received audio data
) as websocket: # from the websocket. This is so we can raise an exception if we
# download indicates whether we should be expecting audio data, # don't receive any audio data.
# this is so what we avoid getting binary data from the websocket audio_was_received = False
# and falsely thinking it's audio data.
download_audio = False
# audio_was_received indicates whether we have received audio data # Send the request to the service.
# from the websocket. This is so we can raise an exception if we await send_command_request()
# don't receive any audio data.
audio_was_received = False
# Each message needs to have the proper date. # Send the SSML request to the service.
date = date_to_string() await send_ssml_request()
# Prepare the request to be sent to the service. async for received in websocket:
# if received.type == aiohttp.WSMsgType.TEXT:
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed parameters, data = get_headers_and_data(received.data)
# to be booleans, but Edge Browser seems to send them as strings. path = parameters.get(b"Path")
# if path == b"audio.metadata":
# This is a bug in Edge as Azure Cognitive Services actually sends them as # Parse the metadata and yield it.
# bool and not string. For now I will send them as bool unless it causes parsed_metadata = parse_metadata()
# any problems. yield parsed_metadata
#
# Also pay close attention to double { } in request (escape for f-string).
await websocket.send_str(
f"X-Timestamp:{date}\r\n"
"Content-Type:application/json; charset=utf-8\r\n"
"Path:speech.config\r\n\r\n"
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
'"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":true},'
'"outputFormat":"audio-24khz-48kbitrate-mono-mp3"'
"}}}}\r\n"
)
await websocket.send_str( # Update the last duration offset for use by the next SSML request.
ssml_headers_plus_data( last_duration_offset = (
connect_id(), parsed_metadata["offset"] + parsed_metadata["duration"]
date, )
mkssml(text, self.voice, self.rate, self.volume, self.pitch), elif path == b"turn.end":
) # Update the offset compensation for the next SSML request.
) offset_compensation = last_duration_offset
async for received in websocket: # Use average padding typically added by the service
if received.type == aiohttp.WSMsgType.TEXT: # to the end of the audio data. This seems to work pretty
parameters, data = get_headers_and_data(received.data) # well for now, but we might ultimately need to use a
path = parameters.get(b"Path") # more sophisticated method like using ffmpeg to get
if path == b"turn.start": # the actual duration of the audio data.
download_audio = True offset_compensation += 8_750_000
elif path == b"turn.end":
download_audio = False
break # End of audio data
elif path == b"audio.metadata":
for meta_obj in json.loads(data)["Metadata"]:
meta_type = meta_obj["Type"]
if idx != prev_idx:
shift_time = sum(
final_utterance[i] for i in range(idx)
)
prev_idx = idx
if meta_type == "WordBoundary":
final_utterance[idx] = (
meta_obj["Data"]["Offset"]
+ meta_obj["Data"]["Duration"]
# Average padding added by the service
# Alternatively we could use ffmpeg to get value properly
# but I don't want to add an additional dependency
# if this is found to work well enough.
+ 8_750_000
)
yield {
"type": meta_type,
"offset": meta_obj["Data"]["Offset"]
+ shift_time,
"duration": meta_obj["Data"]["Duration"],
"text": meta_obj["Data"]["text"]["Text"],
}
elif meta_type == "SessionEnd":
continue
else:
raise UnknownResponse(
f"Unknown metadata type: {meta_type}"
)
elif path == b"response":
pass
else:
raise UnknownResponse(
"The response from the service is not recognized.\n"
+ received.data
)
elif received.type == aiohttp.WSMsgType.BINARY:
if not download_audio:
raise UnexpectedResponse(
"We received a binary message, but we are not expecting one."
)
if len(received.data) < 2: # Send the next SSML request to the service.
raise UnexpectedResponse( if not await send_ssml_request():
"We received a binary message, but it is missing the header length." break
) elif path in (b"response", b"turn.start"):
pass
# See: https://github.com/microsoft/cognitive-services-speech-sdk-js/blob/d071d11/src/common.speech/WebsocketMessageFormatter.ts#L46 else:
header_length = int.from_bytes(received.data[:2], "big") raise UnknownResponse(
if len(received.data) < header_length + 2: "The response from the service is not recognized.\n"
raise UnexpectedResponse( + received.data
"We received a binary message, but it is missing the audio data." )
) elif received.type == aiohttp.WSMsgType.BINARY:
if len(received.data) < 2:
yield { raise UnexpectedResponse(
"type": "audio", "We received a binary message, but it is missing the header length."
"data": received.data[header_length + 2 :],
}
audio_was_received = True
elif received.type == aiohttp.WSMsgType.ERROR:
raise WebSocketError(
received.data if received.data else "Unknown error"
) )
if not audio_was_received: header_length = int.from_bytes(received.data[:2], "big")
raise NoAudioReceived( if len(received.data) < header_length + 2:
"No audio was received. Please verify that your parameters are correct." raise UnexpectedResponse(
"We received a binary message, but it is missing the audio data."
)
audio_was_received = True
yield {
"type": "audio",
"data": received.data[header_length + 2 :],
}
elif received.type == aiohttp.WSMsgType.ERROR:
raise WebSocketError(
received.data if received.data else "Unknown error"
) )
if not audio_was_received:
raise NoAudioReceived(
"No audio was received. Please verify that your parameters are correct."
)
async def save( async def save(
self, self,
audio_fname: Union[str, bytes], audio_fname: Union[str, bytes],