From 185db5fe1c328dab5aef87714a54404952823933 Mon Sep 17 00:00:00 2001 From: rany Date: Sat, 22 May 2021 20:33:34 +0300 Subject: [PATCH] Unicode considers newlines as control characters... oops --- edge-tts.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/edge-tts.py b/edge-tts.py index b6da75a..fbeb472 100755 --- a/edge-tts.py +++ b/edge-tts.py @@ -23,7 +23,17 @@ def terminator(signo, stack_frame): sys.exit() signal.signal(signal.SIGINT, terminator) signal.signal(signal.SIGTERM, terminator) def connectId(): return str(uuid.uuid4()).replace("-", "") -def removeIncompatibleControlChars(s): return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C") +def removeIncompatibleControlChars(s): + output = [] + for ch in s: + # We consider these control characters as whitespace + if ch in ['\t','\n','\r']: + pass + else: + abr = unicodedata.category(ch) + if abr.startswith("C"): continue + output += [ ch ] + return "".join(output) def list_voices(): with urllib.request.urlopen(voiceList) as url: