Refactor communicate for better readability
Also improve performance on larger documents. Signed-off-by: rany2 <rany2@riseup.net>
This commit is contained in:
62
pylintrc
62
pylintrc
@@ -5,6 +5,10 @@
|
|||||||
# only in one or another interpreter, leading to false positives when analysed.
|
# only in one or another interpreter, leading to false positives when analysed.
|
||||||
analyse-fallback-blocks=no
|
analyse-fallback-blocks=no
|
||||||
|
|
||||||
|
# Clear in-memory caches upon conclusion of linting. Useful if running pylint
|
||||||
|
# in a server-like mode.
|
||||||
|
clear-cache-post-run=no
|
||||||
|
|
||||||
# Load and enable all available extensions. Use --list-extensions to see a list
|
# Load and enable all available extensions. Use --list-extensions to see a list
|
||||||
# all available extensions.
|
# all available extensions.
|
||||||
#enable-all-extensions=
|
#enable-all-extensions=
|
||||||
@@ -46,8 +50,8 @@ ignore=CVS
|
|||||||
|
|
||||||
# Add files or directories matching the regular expressions patterns to the
|
# Add files or directories matching the regular expressions patterns to the
|
||||||
# ignore-list. The regex matches against paths and can be in Posix or Windows
|
# ignore-list. The regex matches against paths and can be in Posix or Windows
|
||||||
# format. Because '\' represents the directory delimiter on Windows systems, it
|
# format. Because '\\' represents the directory delimiter on Windows systems,
|
||||||
# can't be used as an escape character.
|
# it can't be used as an escape character.
|
||||||
ignore-paths=
|
ignore-paths=
|
||||||
|
|
||||||
# Files or directories matching the regular expression patterns are skipped.
|
# Files or directories matching the regular expression patterns are skipped.
|
||||||
@@ -84,11 +88,17 @@ persistent=yes
|
|||||||
|
|
||||||
# Minimum Python version to use for version dependent checks. Will default to
|
# Minimum Python version to use for version dependent checks. Will default to
|
||||||
# the version used to run pylint.
|
# the version used to run pylint.
|
||||||
py-version=3.10
|
py-version=3.11
|
||||||
|
|
||||||
# Discover python modules and packages in the file system subtree.
|
# Discover python modules and packages in the file system subtree.
|
||||||
recursive=no
|
recursive=no
|
||||||
|
|
||||||
|
# Add paths to the list of the source roots. Supports globbing patterns. The
|
||||||
|
# source root is an absolute path or a path relative to the current working
|
||||||
|
# directory used to determine a package namespace for modules located under the
|
||||||
|
# source root.
|
||||||
|
source-roots=
|
||||||
|
|
||||||
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||||
# user-friendly hints instead of false-positive error messages.
|
# user-friendly hints instead of false-positive error messages.
|
||||||
suggestion-mode=yes
|
suggestion-mode=yes
|
||||||
@@ -224,6 +234,10 @@ no-docstring-rgx=^_
|
|||||||
# These decorators are taken in consideration only for invalid-name.
|
# These decorators are taken in consideration only for invalid-name.
|
||||||
property-classes=abc.abstractproperty
|
property-classes=abc.abstractproperty
|
||||||
|
|
||||||
|
# Regular expression matching correct type alias names. If left empty, type
|
||||||
|
# alias names will be checked with the set naming style.
|
||||||
|
#typealias-rgx=
|
||||||
|
|
||||||
# Regular expression matching correct type variable names. If left empty, type
|
# Regular expression matching correct type variable names. If left empty, type
|
||||||
# variable names will be checked with the set naming style.
|
# variable names will be checked with the set naming style.
|
||||||
#typevar-rgx=
|
#typevar-rgx=
|
||||||
@@ -246,21 +260,18 @@ check-protected-access-in-special-methods=no
|
|||||||
defining-attr-methods=__init__,
|
defining-attr-methods=__init__,
|
||||||
__new__,
|
__new__,
|
||||||
setUp,
|
setUp,
|
||||||
|
asyncSetUp,
|
||||||
__post_init__
|
__post_init__
|
||||||
|
|
||||||
# List of member names, which should be excluded from the protected access
|
# List of member names, which should be excluded from the protected access
|
||||||
# warning.
|
# warning.
|
||||||
exclude-protected=_asdict,
|
exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
|
||||||
_fields,
|
|
||||||
_replace,
|
|
||||||
_source,
|
|
||||||
_make
|
|
||||||
|
|
||||||
# List of valid names for the first argument in a class method.
|
# List of valid names for the first argument in a class method.
|
||||||
valid-classmethod-first-arg=cls
|
valid-classmethod-first-arg=cls
|
||||||
|
|
||||||
# List of valid names for the first argument in a metaclass class method.
|
# List of valid names for the first argument in a metaclass class method.
|
||||||
valid-metaclass-classmethod-first-arg=cls
|
valid-metaclass-classmethod-first-arg=mcs
|
||||||
|
|
||||||
|
|
||||||
[DESIGN]
|
[DESIGN]
|
||||||
@@ -274,7 +285,7 @@ exclude-too-few-public-methods=
|
|||||||
ignored-parents=
|
ignored-parents=
|
||||||
|
|
||||||
# Maximum number of arguments for function / method.
|
# Maximum number of arguments for function / method.
|
||||||
max-args=5
|
max-args=10
|
||||||
|
|
||||||
# Maximum number of attributes for a class (see R0902).
|
# Maximum number of attributes for a class (see R0902).
|
||||||
max-attributes=7
|
max-attributes=7
|
||||||
@@ -307,8 +318,7 @@ min-public-methods=2
|
|||||||
[EXCEPTIONS]
|
[EXCEPTIONS]
|
||||||
|
|
||||||
# Exceptions that will emit a warning when caught.
|
# Exceptions that will emit a warning when caught.
|
||||||
overgeneral-exceptions=builtins.BaseException,
|
overgeneral-exceptions=builtins.BaseException,builtins.Exception
|
||||||
builtins.Exception
|
|
||||||
|
|
||||||
|
|
||||||
[FORMAT]
|
[FORMAT]
|
||||||
@@ -327,7 +337,7 @@ indent-after-paren=4
|
|||||||
indent-string=' '
|
indent-string=' '
|
||||||
|
|
||||||
# Maximum number of characters on a single line.
|
# Maximum number of characters on a single line.
|
||||||
max-line-length=240
|
max-line-length=100
|
||||||
|
|
||||||
# Maximum number of lines in a module.
|
# Maximum number of lines in a module.
|
||||||
max-module-lines=1000
|
max-module-lines=1000
|
||||||
@@ -347,6 +357,9 @@ single-line-if-stmt=no
|
|||||||
# one.
|
# one.
|
||||||
allow-any-import-level=
|
allow-any-import-level=
|
||||||
|
|
||||||
|
# Allow explicit reexports by alias from a package __init__.
|
||||||
|
allow-reexport-from-package=no
|
||||||
|
|
||||||
# Allow wildcard imports from modules that define __all__.
|
# Allow wildcard imports from modules that define __all__.
|
||||||
allow-wildcard-with-all=no
|
allow-wildcard-with-all=no
|
||||||
|
|
||||||
@@ -408,14 +421,24 @@ confidence=HIGH,
|
|||||||
# --enable=similarities". If you want to run only the classes checker, but have
|
# --enable=similarities". If you want to run only the classes checker, but have
|
||||||
# no Warning level messages displayed, use "--disable=all --enable=classes
|
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||||
# --disable=W".
|
# --disable=W".
|
||||||
disable=duplicate-code,
|
disable=raw-checker-failed,
|
||||||
|
bad-inline-option,
|
||||||
|
locally-disabled,
|
||||||
|
file-ignored,
|
||||||
|
suppressed-message,
|
||||||
|
useless-suppression,
|
||||||
|
deprecated-pragma,
|
||||||
|
use-symbolic-message-instead,
|
||||||
|
use-implicit-booleaness-not-comparison-to-string,
|
||||||
|
use-implicit-booleaness-not-comparison-to-zero,
|
||||||
|
duplicate-code,
|
||||||
consider-using-with
|
consider-using-with
|
||||||
|
|
||||||
# Enable the message, report, category or checker with the given id(s). You can
|
# Enable the message, report, category or checker with the given id(s). You can
|
||||||
# either give multiple identifier separated by comma (,) or put this option
|
# either give multiple identifier separated by comma (,) or put this option
|
||||||
# multiple time (only on the command line, not in the configuration file where
|
# multiple time (only on the command line, not in the configuration file where
|
||||||
# it should appear only once). See also the "--disable" option for examples.
|
# it should appear only once). See also the "--disable" option for examples.
|
||||||
enable=c-extension-no-member
|
enable=
|
||||||
|
|
||||||
|
|
||||||
[METHOD_ARGS]
|
[METHOD_ARGS]
|
||||||
@@ -461,8 +484,9 @@ evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor
|
|||||||
# used to format the message information. See doc for all details.
|
# used to format the message information. See doc for all details.
|
||||||
msg-template=
|
msg-template=
|
||||||
|
|
||||||
# Set the output format. Available formats are text, parseable, colorized, json
|
# Set the output format. Available formats are: text, parseable, colorized,
|
||||||
# and msvs (visual studio). You can also give a reporter class, e.g.
|
# json2 (improved json format), json (old json format) and msvs (visual
|
||||||
|
# studio). You can also give a reporter class, e.g.
|
||||||
# mypackage.mymodule.MyReporterClass.
|
# mypackage.mymodule.MyReporterClass.
|
||||||
#output-format=
|
#output-format=
|
||||||
|
|
||||||
@@ -496,8 +520,8 @@ min-similarity-lines=4
|
|||||||
# Limits count of emitted suggestions for spelling mistakes.
|
# Limits count of emitted suggestions for spelling mistakes.
|
||||||
max-spelling-suggestions=4
|
max-spelling-suggestions=4
|
||||||
|
|
||||||
# Spelling dictionary name. Available dictionaries: none. To make it work,
|
# Spelling dictionary name. No available dictionaries : You need to install
|
||||||
# install the 'python-enchant' package.
|
# both the python package and the system dependency for enchant to work.
|
||||||
spelling-dict=
|
spelling-dict=
|
||||||
|
|
||||||
# List of comma separated words that should be considered directives if they
|
# List of comma separated words that should be considered directives if they
|
||||||
|
|||||||
@@ -229,6 +229,25 @@ class Communicate:
|
|||||||
Class for communicating with the service.
|
Class for communicating with the service.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def validate_string_param(param_name: str, param_value: str, pattern: str) -> str:
|
||||||
|
"""
|
||||||
|
Validates the given string parameter based on type and pattern.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
param_name (str): The name of the parameter.
|
||||||
|
param_value (str): The value of the parameter.
|
||||||
|
pattern (str): The pattern to validate the parameter against.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The validated parameter.
|
||||||
|
"""
|
||||||
|
if not isinstance(param_value, str):
|
||||||
|
raise TypeError(f"{param_name} must be str")
|
||||||
|
if re.match(pattern, param_value) is None:
|
||||||
|
raise ValueError(f"Invalid {param_name} '{param_value}'.")
|
||||||
|
return param_value
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
text: str,
|
text: str,
|
||||||
@@ -238,6 +257,7 @@ class Communicate:
|
|||||||
volume: str = "+0%",
|
volume: str = "+0%",
|
||||||
pitch: str = "+0Hz",
|
pitch: str = "+0Hz",
|
||||||
proxy: Optional[str] = None,
|
proxy: Optional[str] = None,
|
||||||
|
receive_timeout: int = 5,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initializes the Communicate class.
|
Initializes the Communicate class.
|
||||||
@@ -270,190 +290,191 @@ class Communicate:
|
|||||||
+ f" ({lang}-{region}, {name})"
|
+ f" ({lang}-{region}, {name})"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
self.voice = self.validate_string_param(
|
||||||
re.match(
|
"voice",
|
||||||
r"^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$",
|
self.voice,
|
||||||
self.voice,
|
r"^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$",
|
||||||
)
|
)
|
||||||
is None
|
self.rate = self.validate_string_param("rate", rate, r"^[+-]\d+%$")
|
||||||
):
|
self.volume = self.validate_string_param("volume", volume, r"^[+-]\d+%$")
|
||||||
raise ValueError(f"Invalid voice '{voice}'.")
|
self.pitch = self.validate_string_param("pitch", pitch, r"^[+-]\d+Hz$")
|
||||||
|
|
||||||
if not isinstance(rate, str):
|
|
||||||
raise TypeError("rate must be str")
|
|
||||||
if re.match(r"^[+-]\d+%$", rate) is None:
|
|
||||||
raise ValueError(f"Invalid rate '{rate}'.")
|
|
||||||
self.rate: str = rate
|
|
||||||
|
|
||||||
if not isinstance(volume, str):
|
|
||||||
raise TypeError("volume must be str")
|
|
||||||
if re.match(r"^[+-]\d+%$", volume) is None:
|
|
||||||
raise ValueError(f"Invalid volume '{volume}'.")
|
|
||||||
self.volume: str = volume
|
|
||||||
|
|
||||||
if not isinstance(pitch, str):
|
|
||||||
raise TypeError("pitch must be str")
|
|
||||||
if re.match(r"^[+-]\d+Hz$", pitch) is None:
|
|
||||||
raise ValueError(f"Invalid pitch '{pitch}'.")
|
|
||||||
self.pitch: str = pitch
|
|
||||||
|
|
||||||
if proxy is not None and not isinstance(proxy, str):
|
if proxy is not None and not isinstance(proxy, str):
|
||||||
raise TypeError("proxy must be str")
|
raise TypeError("proxy must be str")
|
||||||
self.proxy: Optional[str] = proxy
|
self.proxy: Optional[str] = proxy
|
||||||
|
|
||||||
|
if not isinstance(receive_timeout, int):
|
||||||
|
raise TypeError("receive_timeout must be int")
|
||||||
|
self.receive_timeout: int = receive_timeout
|
||||||
|
|
||||||
async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
|
async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
|
||||||
"""Streams audio and metadata from the service."""
|
"""Streams audio and metadata from the service."""
|
||||||
|
|
||||||
|
async def send_command_request() -> None:
|
||||||
|
"""Sends the request to the service."""
|
||||||
|
|
||||||
|
# Prepare the request to be sent to the service.
|
||||||
|
#
|
||||||
|
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
|
||||||
|
# to be booleans, but Edge Browser seems to send them as strings.
|
||||||
|
#
|
||||||
|
# This is a bug in Edge as Azure Cognitive Services actually sends them as
|
||||||
|
# bool and not string. For now I will send them as bool unless it causes
|
||||||
|
# any problems.
|
||||||
|
#
|
||||||
|
# Also pay close attention to double { } in request (escape for f-string).
|
||||||
|
await websocket.send_str(
|
||||||
|
f"X-Timestamp:{date_to_string()}\r\n"
|
||||||
|
"Content-Type:application/json; charset=utf-8\r\n"
|
||||||
|
"Path:speech.config\r\n\r\n"
|
||||||
|
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
|
||||||
|
'"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":true},'
|
||||||
|
'"outputFormat":"audio-24khz-48kbitrate-mono-mp3"'
|
||||||
|
"}}}}\r\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def send_ssml_request() -> bool:
|
||||||
|
"""Sends the SSML request to the service."""
|
||||||
|
|
||||||
|
# Get the next string from the generator.
|
||||||
|
text = next(texts, None)
|
||||||
|
|
||||||
|
# If there are no more strings, return False.
|
||||||
|
if text is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Send the request to the service and return True.
|
||||||
|
await websocket.send_str(
|
||||||
|
ssml_headers_plus_data(
|
||||||
|
connect_id(),
|
||||||
|
date_to_string(),
|
||||||
|
mkssml(text, self.voice, self.rate, self.volume, self.pitch),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def parse_metadata():
|
||||||
|
for meta_obj in json.loads(data)["Metadata"]:
|
||||||
|
meta_type = meta_obj["Type"]
|
||||||
|
if meta_type == "WordBoundary":
|
||||||
|
current_offset = meta_obj["Data"]["Offset"] + offset_compensation
|
||||||
|
current_duration = meta_obj["Data"]["Duration"]
|
||||||
|
return {
|
||||||
|
"type": meta_type,
|
||||||
|
"offset": current_offset,
|
||||||
|
"duration": current_duration,
|
||||||
|
"text": meta_obj["Data"]["text"]["Text"],
|
||||||
|
}
|
||||||
|
elif meta_type in ("SessionEnd",):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise UnknownResponse(f"Unknown metadata type: {meta_type}")
|
||||||
|
|
||||||
|
# Split the text into multiple strings if it is too long for the service.
|
||||||
texts = split_text_by_byte_length(
|
texts = split_text_by_byte_length(
|
||||||
escape(remove_incompatible_characters(self.text)),
|
escape(remove_incompatible_characters(self.text)),
|
||||||
calc_max_mesg_size(self.voice, self.rate, self.volume, self.pitch),
|
calc_max_mesg_size(self.voice, self.rate, self.volume, self.pitch),
|
||||||
)
|
)
|
||||||
final_utterance: Dict[int, int] = {}
|
|
||||||
prev_idx = -1
|
|
||||||
shift_time = -1
|
|
||||||
|
|
||||||
|
# Keep track of last duration + offset to calculate the offset
|
||||||
|
# upon word split.
|
||||||
|
last_duration_offset = 0
|
||||||
|
|
||||||
|
# Current offset compensations.
|
||||||
|
offset_compensation = 0
|
||||||
|
|
||||||
|
# Create a new connection to the service.
|
||||||
ssl_ctx = ssl.create_default_context(cafile=certifi.where())
|
ssl_ctx = ssl.create_default_context(cafile=certifi.where())
|
||||||
for idx, text in enumerate(texts):
|
async with aiohttp.ClientSession(
|
||||||
async with aiohttp.ClientSession(
|
trust_env=True,
|
||||||
trust_env=True,
|
) as session, session.ws_connect(
|
||||||
) as session, session.ws_connect(
|
f"{WSS_URL}&ConnectionId={connect_id()}",
|
||||||
f"{WSS_URL}&ConnectionId={connect_id()}",
|
compress=15,
|
||||||
compress=15,
|
proxy=self.proxy,
|
||||||
autoclose=True,
|
receive_timeout=self.receive_timeout,
|
||||||
autoping=True,
|
headers={
|
||||||
proxy=self.proxy,
|
"Pragma": "no-cache",
|
||||||
headers={
|
"Cache-Control": "no-cache",
|
||||||
"Pragma": "no-cache",
|
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
||||||
"Cache-Control": "no-cache",
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
"Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
},
|
||||||
" (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36 Edg/91.0.864.41",
|
ssl=ssl_ctx,
|
||||||
},
|
) as websocket:
|
||||||
ssl=ssl_ctx,
|
# audio_was_received indicates whether we have received audio data
|
||||||
) as websocket:
|
# from the websocket. This is so we can raise an exception if we
|
||||||
# download indicates whether we should be expecting audio data,
|
# don't receive any audio data.
|
||||||
# this is so what we avoid getting binary data from the websocket
|
audio_was_received = False
|
||||||
# and falsely thinking it's audio data.
|
|
||||||
download_audio = False
|
|
||||||
|
|
||||||
# audio_was_received indicates whether we have received audio data
|
# Send the request to the service.
|
||||||
# from the websocket. This is so we can raise an exception if we
|
await send_command_request()
|
||||||
# don't receive any audio data.
|
|
||||||
audio_was_received = False
|
|
||||||
|
|
||||||
# Each message needs to have the proper date.
|
# Send the SSML request to the service.
|
||||||
date = date_to_string()
|
await send_ssml_request()
|
||||||
|
|
||||||
# Prepare the request to be sent to the service.
|
async for received in websocket:
|
||||||
#
|
if received.type == aiohttp.WSMsgType.TEXT:
|
||||||
# Note sentenceBoundaryEnabled and wordBoundaryEnabled are actually supposed
|
parameters, data = get_headers_and_data(received.data)
|
||||||
# to be booleans, but Edge Browser seems to send them as strings.
|
path = parameters.get(b"Path")
|
||||||
#
|
if path == b"audio.metadata":
|
||||||
# This is a bug in Edge as Azure Cognitive Services actually sends them as
|
# Parse the metadata and yield it.
|
||||||
# bool and not string. For now I will send them as bool unless it causes
|
parsed_metadata = parse_metadata()
|
||||||
# any problems.
|
yield parsed_metadata
|
||||||
#
|
|
||||||
# Also pay close attention to double { } in request (escape for f-string).
|
|
||||||
await websocket.send_str(
|
|
||||||
f"X-Timestamp:{date}\r\n"
|
|
||||||
"Content-Type:application/json; charset=utf-8\r\n"
|
|
||||||
"Path:speech.config\r\n\r\n"
|
|
||||||
'{"context":{"synthesis":{"audio":{"metadataoptions":{'
|
|
||||||
'"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":true},'
|
|
||||||
'"outputFormat":"audio-24khz-48kbitrate-mono-mp3"'
|
|
||||||
"}}}}\r\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
await websocket.send_str(
|
# Update the last duration offset for use by the next SSML request.
|
||||||
ssml_headers_plus_data(
|
last_duration_offset = (
|
||||||
connect_id(),
|
parsed_metadata["offset"] + parsed_metadata["duration"]
|
||||||
date,
|
)
|
||||||
mkssml(text, self.voice, self.rate, self.volume, self.pitch),
|
elif path == b"turn.end":
|
||||||
)
|
# Update the offset compensation for the next SSML request.
|
||||||
)
|
offset_compensation = last_duration_offset
|
||||||
|
|
||||||
async for received in websocket:
|
# Use average padding typically added by the service
|
||||||
if received.type == aiohttp.WSMsgType.TEXT:
|
# to the end of the audio data. This seems to work pretty
|
||||||
parameters, data = get_headers_and_data(received.data)
|
# well for now, but we might ultimately need to use a
|
||||||
path = parameters.get(b"Path")
|
# more sophisticated method like using ffmpeg to get
|
||||||
if path == b"turn.start":
|
# the actual duration of the audio data.
|
||||||
download_audio = True
|
offset_compensation += 8_750_000
|
||||||
elif path == b"turn.end":
|
|
||||||
download_audio = False
|
|
||||||
break # End of audio data
|
|
||||||
elif path == b"audio.metadata":
|
|
||||||
for meta_obj in json.loads(data)["Metadata"]:
|
|
||||||
meta_type = meta_obj["Type"]
|
|
||||||
if idx != prev_idx:
|
|
||||||
shift_time = sum(
|
|
||||||
final_utterance[i] for i in range(idx)
|
|
||||||
)
|
|
||||||
prev_idx = idx
|
|
||||||
if meta_type == "WordBoundary":
|
|
||||||
final_utterance[idx] = (
|
|
||||||
meta_obj["Data"]["Offset"]
|
|
||||||
+ meta_obj["Data"]["Duration"]
|
|
||||||
# Average padding added by the service
|
|
||||||
# Alternatively we could use ffmpeg to get value properly
|
|
||||||
# but I don't want to add an additional dependency
|
|
||||||
# if this is found to work well enough.
|
|
||||||
+ 8_750_000
|
|
||||||
)
|
|
||||||
yield {
|
|
||||||
"type": meta_type,
|
|
||||||
"offset": meta_obj["Data"]["Offset"]
|
|
||||||
+ shift_time,
|
|
||||||
"duration": meta_obj["Data"]["Duration"],
|
|
||||||
"text": meta_obj["Data"]["text"]["Text"],
|
|
||||||
}
|
|
||||||
elif meta_type == "SessionEnd":
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise UnknownResponse(
|
|
||||||
f"Unknown metadata type: {meta_type}"
|
|
||||||
)
|
|
||||||
elif path == b"response":
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise UnknownResponse(
|
|
||||||
"The response from the service is not recognized.\n"
|
|
||||||
+ received.data
|
|
||||||
)
|
|
||||||
elif received.type == aiohttp.WSMsgType.BINARY:
|
|
||||||
if not download_audio:
|
|
||||||
raise UnexpectedResponse(
|
|
||||||
"We received a binary message, but we are not expecting one."
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(received.data) < 2:
|
# Send the next SSML request to the service.
|
||||||
raise UnexpectedResponse(
|
if not await send_ssml_request():
|
||||||
"We received a binary message, but it is missing the header length."
|
break
|
||||||
)
|
elif path in (b"response", b"turn.start"):
|
||||||
|
pass
|
||||||
# See: https://github.com/microsoft/cognitive-services-speech-sdk-js/blob/d071d11/src/common.speech/WebsocketMessageFormatter.ts#L46
|
else:
|
||||||
header_length = int.from_bytes(received.data[:2], "big")
|
raise UnknownResponse(
|
||||||
if len(received.data) < header_length + 2:
|
"The response from the service is not recognized.\n"
|
||||||
raise UnexpectedResponse(
|
+ received.data
|
||||||
"We received a binary message, but it is missing the audio data."
|
)
|
||||||
)
|
elif received.type == aiohttp.WSMsgType.BINARY:
|
||||||
|
if len(received.data) < 2:
|
||||||
yield {
|
raise UnexpectedResponse(
|
||||||
"type": "audio",
|
"We received a binary message, but it is missing the header length."
|
||||||
"data": received.data[header_length + 2 :],
|
|
||||||
}
|
|
||||||
audio_was_received = True
|
|
||||||
elif received.type == aiohttp.WSMsgType.ERROR:
|
|
||||||
raise WebSocketError(
|
|
||||||
received.data if received.data else "Unknown error"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not audio_was_received:
|
header_length = int.from_bytes(received.data[:2], "big")
|
||||||
raise NoAudioReceived(
|
if len(received.data) < header_length + 2:
|
||||||
"No audio was received. Please verify that your parameters are correct."
|
raise UnexpectedResponse(
|
||||||
|
"We received a binary message, but it is missing the audio data."
|
||||||
|
)
|
||||||
|
|
||||||
|
audio_was_received = True
|
||||||
|
yield {
|
||||||
|
"type": "audio",
|
||||||
|
"data": received.data[header_length + 2 :],
|
||||||
|
}
|
||||||
|
elif received.type == aiohttp.WSMsgType.ERROR:
|
||||||
|
raise WebSocketError(
|
||||||
|
received.data if received.data else "Unknown error"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not audio_was_received:
|
||||||
|
raise NoAudioReceived(
|
||||||
|
"No audio was received. Please verify that your parameters are correct."
|
||||||
|
)
|
||||||
|
|
||||||
async def save(
|
async def save(
|
||||||
self,
|
self,
|
||||||
audio_fname: Union[str, bytes],
|
audio_fname: Union[str, bytes],
|
||||||
|
|||||||
Reference in New Issue
Block a user