update media process
This commit is contained in:
13
course.py
13
course.py
@@ -10,12 +10,10 @@ from threading import Thread
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from fileconvert import mp4_to_mp3, mp4_to_wav
|
|
||||||
from headers import headers
|
from headers import headers
|
||||||
|
|
||||||
from logging_config import setup_logging
|
from logging_config import setup_logging
|
||||||
# from video_voice_process import process_audio_file
|
from transcribe_media import convert_media
|
||||||
from sense_voice_process import process_audio_file
|
|
||||||
|
|
||||||
logger = setup_logging()
|
logger = setup_logging()
|
||||||
|
|
||||||
@@ -225,14 +223,7 @@ def download_course_contents(course_ids, course_ids_dict):
|
|||||||
# 构建完整的 MP4 文件路径
|
# 构建完整的 MP4 文件路径
|
||||||
mp4_file = os.path.join(root, file)
|
mp4_file = os.path.join(root, file)
|
||||||
if (not exist_md_file) and mp4_file is not None:
|
if (not exist_md_file) and mp4_file is not None:
|
||||||
mp4_to_mp3(mp4_file)
|
convert_media(mp4_file)
|
||||||
# 调用 mp4_to_wav 函数进行转换
|
|
||||||
wav_file = mp4_to_wav(mp4_file)
|
|
||||||
if wav_file is not None:
|
|
||||||
try:
|
|
||||||
process_audio_file(wav_file)
|
|
||||||
except:
|
|
||||||
print('process_audio_file fail')
|
|
||||||
|
|
||||||
|
|
||||||
def get_course():
|
def get_course():
|
||||||
|
|||||||
BIN
courses.db
BIN
courses.db
Binary file not shown.
@@ -12,6 +12,7 @@ from gradio_client import Client, handle_file
|
|||||||
import json
|
import json
|
||||||
from logging_config import setup_logging
|
from logging_config import setup_logging
|
||||||
from sense_voice_process import short_audio_process
|
from sense_voice_process import short_audio_process
|
||||||
|
from transcribe_media import convert_media
|
||||||
|
|
||||||
# 读取配置文件
|
# 读取配置文件
|
||||||
config = configparser.ConfigParser()
|
config = configparser.ConfigParser()
|
||||||
@@ -84,7 +85,7 @@ def voice2txt(voice_path):
|
|||||||
# api_name="/model_inference"
|
# api_name="/model_inference"
|
||||||
# )
|
# )
|
||||||
# logger.info(text)
|
# logger.info(text)
|
||||||
text = short_audio_process(voice_path)
|
text = convert_media(voice_path)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ def transcribe_audio_funasr(audio_path, device="cuda:0"):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def main(file_path):
|
def convert_media(file_path):
|
||||||
try:
|
try:
|
||||||
audio_file = extract_or_convert_audio(file_path)
|
audio_file = extract_or_convert_audio(file_path)
|
||||||
transcript = transcribe_audio_funasr(audio_file)
|
transcript = transcribe_audio_funasr(audio_file)
|
||||||
@@ -56,13 +56,15 @@ def main(file_path):
|
|||||||
print(transcript)
|
print(transcript)
|
||||||
|
|
||||||
# ✅ Save transcript to disk
|
# ✅ Save transcript to disk
|
||||||
output_path = os.path.splitext(file_path)[0] + "_transcript.txt"
|
output_path = os.path.splitext(file_path)[0] + "_transcript.md"
|
||||||
with open(output_path, "w", encoding="utf-8") as f:
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
f.write(transcript)
|
f.write(transcript)
|
||||||
|
|
||||||
print(f"✅ Transcript saved to: {output_path}")
|
print(f"✅ Transcript saved to: {output_path}")
|
||||||
|
return transcript
|
||||||
finally:
|
finally:
|
||||||
if os.path.exists("processed_audio.wav"):
|
if os.path.exists("processed_audio.wav"):
|
||||||
os.remove("processed_audio.wav")
|
os.remove("processed_audio.wav")
|
||||||
|
|
||||||
main("./course/676/mp4/20250413142836-第36期茶话会-视频-1.mp4")
|
if __name__ == '__main__':
|
||||||
|
convert_media("./course/676/mp4/20250413142836-第36期茶话会-视频-1.mp4")
|
||||||
|
|||||||
Reference in New Issue
Block a user