WebVTT isn't a very common format in the first place and attempting to make WordBoundary play nice with input text is very hard. Instead we now just display the word that the TTS is saying at a given time. In the future, we could try to enable SentenceBoundary but there is a risk that it will be banned by Microsoft as it is not used by Microsoft Edge itself. Closes: https://github.com/rany2/edge-tts/issues/118 Closes: https://github.com/rany2/edge-tts/issues/171 Closes: https://github.com/rany2/edge-tts/issues/229 Closes: https://github.com/rany2/edge-tts/issues/234 Signed-off-by: rany <rany2@riseup.net>
37 lines
936 B
Python
37 lines
936 B
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Streaming TTS example with subtitles.
|
|
|
|
This example is similar to the example basic_audio_streaming.py, but it shows
|
|
WordBoundary events to create subtitles using SubMaker.
|
|
"""
|
|
|
|
import asyncio
|
|
|
|
import edge_tts
|
|
|
|
TEXT = "Hello World!"
|
|
VOICE = "en-GB-SoniaNeural"
|
|
OUTPUT_FILE = "test.mp3"
|
|
SRT_FILE = "test.srt"
|
|
|
|
|
|
async def amain() -> None:
|
|
"""Main function"""
|
|
communicate = edge_tts.Communicate(TEXT, VOICE)
|
|
submaker = edge_tts.SubMaker()
|
|
with open(OUTPUT_FILE, "wb") as file:
|
|
async for chunk in communicate.stream():
|
|
if chunk["type"] == "audio":
|
|
file.write(chunk["data"])
|
|
elif chunk["type"] == "WordBoundary":
|
|
submaker.add_cue((chunk["offset"], chunk["duration"]), chunk["text"])
|
|
|
|
with open(SRT_FILE, "w", encoding="utf-8") as file:
|
|
file.write(submaker.get_srt())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(amain())
|