Fix bug with split_text_by_byte_length

This commit is contained in:
rany
2021-12-07 22:36:03 +02:00
parent 4fcecddaf0
commit cbc6ca1b11

View File

@@ -105,21 +105,24 @@ def split_text_by_byte_length(text, byte_length):
if isinstance(text, str): if isinstance(text, str):
text = text.encode("utf-8") text = text.encode("utf-8")
split_text = [] words = []
current_string = b"" while len(text) > byte_length:
for character in iter_bytes(text): # Find the last space in the string
if len(current_string) + len(character) <= byte_length: last_space = text.rfind(b" ", 0, byte_length)
current_string += character if last_space == -1:
# No space found, just split at the byte length
words.append(text[:byte_length])
text = text[byte_length:]
else: else:
split_text.append(current_string) # Split at the last space
current_string = character words.append(text[:last_space])
if split_text[-1].find(b" ") != -1: text = text[last_space:]
while split_text[-1][-1] != b" ": words.append(text)
current_string = split_text[-1][-1] + current_string
split_text[-1] = split_text[-1][:-1] # Remove empty strings from the list
if current_string != b"": words = [word for word in words if word]
split_text.append(current_string) # Return the list
return split_text return words
def mkssml(text, voice, pitch, rate, volume): def mkssml(text, voice, pitch, rate, volume):