make removeIncompatibleControlChars more accurate

This commit is contained in:
rany
2021-06-06 16:38:21 +03:00
parent f216d92ed3
commit fb591c54aa

View File

@@ -7,7 +7,6 @@ import argparse
import asyncio import asyncio
import ssl import ssl
import websockets import websockets
import unicodedata
import logging import logging
import httpx import httpx
from email.utils import formatdate from email.utils import formatdate
@@ -18,17 +17,18 @@ trustedClientToken = '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
wssUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken wssUrl = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=' + trustedClientToken
voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken voiceList = 'https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=' + trustedClientToken
def connectId(): return str(uuid.uuid4()).replace("-", "") def connectId():
return str(uuid.uuid4()).replace("-", "")
def removeIncompatibleControlChars(s): def removeIncompatibleControlChars(s):
output = [] output = []
for ch in s: for char in s:
# We consider that these control characters are whitespace char_code = ord(char)
if ch in ['\t','\n','\r']: if (char_code >= 0 and char_code <= 8) or (char_code >= 11 and char_code <= 12) \
pass or (char_code >= 14 and char_code <= 31):
output += [ ' ' ]
else: else:
abr = unicodedata.category(ch) output += [ char ]
if abr.startswith("C"): continue
output += [ ch ]
return "".join(output) return "".join(output)
def list_voices(): def list_voices():