diff --git a/config.ini b/config.ini index 74b02a4..96097e2 100755 --- a/config.ini +++ b/config.ini @@ -5,4 +5,7 @@ sort= max_download_threads = 5 max_retry_attempts = 3 -authorization_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3NDI2MDc3NDAsImp0aSI6IjEwMjM4YTJmLTBiN2QtNDIwNi1iNDU2LTQ1MTRiMjFjZGM4MyIsImlhdCI6MTc0MDAxNTc0MCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiZDliZjIzNjQtNjVhYi00ZWNkLThhZjctY2MzMDcxODU0M2M5In0.05GqlG4rhwwlbuQUfEHlHTB-vAz2lOh5JCVlQ6j4V8s \ No newline at end of file +authorization_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3NDI2MDc3NDAsImp0aSI6IjEwMjM4YTJmLTBiN2QtNDIwNi1iNDU2LTQ1MTRiMjFjZGM4MyIsImlhdCI6MTc0MDAxNTc0MCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiZDliZjIzNjQtNjVhYi00ZWNkLThhZjctY2MzMDcxODU0M2M5In0.05GqlG4rhwwlbuQUfEHlHTB-vAz2lOh5JCVlQ6j4V8s + +voice2txt_url=https://api.siliconflow.cn/v1/audio/transcriptions +voice_token=sk-vksrlpckcpttnpjgftpgwytmiipjmvhyzmnffhbhjpahbfiq \ No newline at end of file diff --git a/course.py b/course.py index e3a4d10..c629077 100755 --- a/course.py +++ b/course.py @@ -154,8 +154,8 @@ def get_course(): else: response = requests.get(f'https://bandu-api.songy.info/v2/courses/{course_id}/contents', headers=headers) contents_data = response.json() - with open(json_filename, 'w', encoding='utf-8') as json_file: - json.dump(contents_data, json_file, ensure_ascii=False, indent=4) + with open(json_filename, 'w', encoding='utf-8') as save_json_file: + json.dump(contents_data, save_json_file, ensure_ascii=False, indent=4) for item in contents_data['data']: cursor.execute(''' @@ -242,7 +242,7 @@ def get_course(): folder_path = os.path.join(course_id_folder, folder_name) if not os.path.exists(folder_path): os.makedirs(folder_path) - move_file = os.path.join(folder_path, attachment['name']); + move_file = os.path.join(folder_path, attachment['name']) shutil.move(filename, move_file) # 保存category为text的content到TXT文件 @@ -253,21 +253,27 @@ def get_course(): txt_file.write(content + '\n') # 处理mp4文件 mp4_folder = os.path.join(course_id_folder, 'mp4') + mp4_file = None + exist_md_file = False if os.path.exists(mp4_folder): # 遍历指定文件夹内的所有文件和子文件夹 for root, dirs, files in os.walk(mp4_folder): for file in files: + # 检查是否已经存在 + if file.lower().endswith('.md'): + exist_md_file = True # 检查文件扩展名是否为.mp4 if file.lower().endswith('.mp4'): # 构建完整的 MP4 文件路径 mp4_file = os.path.join(root, file) - # 调用 mp4_to_wav 函数进行转换 - wav_file = convert_mp4(mp4_file) - if wav_file is not None: - try: - process_audio_file(wav_file) - except: - print('process_audio_file fail') + if (not exist_md_file) and mp4_file is not None: + # 调用 mp4_to_wav 函数进行转换 + wav_file = convert_mp4(mp4_file) + if wav_file is not None: + try: + process_audio_file(wav_file) + except: + print('process_audio_file fail') if __name__ == '__main__': diff --git a/courses.db b/courses.db index 3000ddf..f1296b1 100755 Binary files a/courses.db and b/courses.db differ diff --git a/monitor b/monitor index bd25bd3..b1d647b 160000 --- a/monitor +++ b/monitor @@ -1 +1 @@ -Subproject commit bd25bd3ec0b655b8994dfd568abbfa52b22d1baa +Subproject commit b1d647b3b89691b8373d6fb6806711614281fd02 diff --git a/requirements.txt b/requirements.txt index cfd7b1c..33d5171 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ -requests -pydub \ No newline at end of file +requests~=2.32.3 +pydub~=0.25.1 +gradio_client~=1.7.2 \ No newline at end of file diff --git a/video_voice_process.py b/video_voice_process.py index 4d8b98e..b3be43a 100644 --- a/video_voice_process.py +++ b/video_voice_process.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import configparser import logging import os import shutil @@ -6,11 +7,20 @@ import threading from concurrent.futures import ThreadPoolExecutor, as_completed import requests +from gradio_client import Client, handle_file from pydub import AudioSegment from pydub.silence import split_on_silence -# 创建一个锁对象 -file_write_lock = threading.Lock() +use_remote_api = False +process_workers = 5 if use_remote_api else 2 + +config = configparser.ConfigParser() +config.read('config.ini') +token = config['DEFAULT']['voice_token'] +url = config['DEFAULT']['voice2txt_url'] +headers = { + "Authorization": f'Bearer {token}' +} # 配置日志 @@ -44,6 +54,8 @@ def process_audio_file(audio_file_path): wav_filename = os.path.splitext(os.path.basename(audio_file_path))[0] # 生成对应的 WAV 文件路径 md_file = os.path.join(wav_dir, f"{wav_filename}.md") + # 创建一个锁对象 + file_write_lock = threading.Lock() try: audio_file = AudioSegment.from_wav(audio_file_path) @@ -57,45 +69,11 @@ def process_audio_file(audio_file_path): else: new_audio_chunks.append(chunk) - url = "https://api.siliconflow.cn/v1/audio/transcriptions" - headers = { - "Authorization": "Bearer sk-lakndqcjlmtukekcliwkkryaxquifduhvzgcnlhofzvofllv" - } - sentences = [] # 用于存储所有句子的列表 - def send_request(chunk, index): - try: - audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav") - chunk.export(audio_part_path, format="wav") - logging.info(f'Exported chunk file {audio_part_path} for {file_name_with_extension}') - - multipart_form_data = { - 'file': (os.path.basename(audio_part_path), open(audio_part_path, 'rb')), - 'model': (None, 'FunAudioLLM/SenseVoiceSmall') - } - - response = requests.post(url, files=multipart_form_data, headers=headers) - result = response.json() - text = result["text"] - print(text) - return index, text # 返回索引和文本 - except Exception as e: - logging.error(f'Error processing {file_name_with_extension}, chunk {index}: {str(e)}') - # 将出错的音频片段复制到error文件夹 - error_dir = os.path.join(os.getcwd(), 'media', 'error') - if not os.path.exists(error_dir): - os.makedirs(error_dir) - error_path = os.path.join(error_dir, f"{file_name_without_extension}_chunk_{index}.wav") - shutil.copy(audio_part_path, error_path) - logging.error(f'Copied request failed chunk file {file_name_with_extension} to {error_path}') - return index, "" # 返回空文本 - finally: - if os.path.exists(audio_part_path): - os.remove(audio_part_path) - - with ThreadPoolExecutor(max_workers=5) as executor: - futures = {executor.submit(send_request, chunk, i): i for i, chunk in enumerate(new_audio_chunks)} + with ThreadPoolExecutor(max_workers=process_workers) as executor: + futures = {executor.submit(send_request, chunk, i, file_name_without_extension): i for i, chunk in + enumerate(new_audio_chunks)} for future in as_completed(futures): index = futures[future] @@ -110,22 +88,63 @@ def process_audio_file(audio_file_path): for sentence in sorted(sentences, key=lambda x: x[0]): # 根据索引排序 markdown_content += f"{sentence[1]}\n\n" - with file_write_lock: # 确保文件写入操作的线程安全 + # with file_write_lock: # 确保文件写入操作的线程安全 # md_file_path = os.path.join('media', file_name_without_extension + '.md') - with open(md_file, "w", encoding="utf-8") as f: - f.write(markdown_content) + with open(md_file, "w", encoding="utf-8") as f: + f.write(markdown_content) logging.info(f"Finished processing {file_name_with_extension}") except Exception as e: logging.error(f"Failed to process {file_name_with_extension}: {str(e)}") +def send_request(chunk, index, file_name_without_extension): + audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav") + chunk.export(audio_part_path, format="wav") + logging.info(f'Exported chunk file {audio_part_path} for {file_name_without_extension}') + try: + if use_remote_api: + multipart_form_data = { + 'file': (os.path.basename(audio_part_path), open(audio_part_path, 'rb')), + 'model': (None, 'FunAudioLLM/SenseVoiceSmall') + } + + response = requests.post(url, files=multipart_form_data, headers=headers) + result = response.json() + text = result["text"] + print(text) + + else: + client = Client("http://192.168.31.3:7860/") + text = client.predict( + input_wav=handle_file(audio_part_path), + language="zh", + api_name="/model_inference" + ) + print(text) + + return index, text # 返回索引和文本 + except Exception as e: + logging.error(f'Error processing {file_name_without_extension}, chunk {index}: {str(e)}') + # 将出错的音频片段复制到error文件夹 + error_dir = os.path.join(os.getcwd(), 'media', 'error') + if not os.path.exists(error_dir): + os.makedirs(error_dir) + error_path = os.path.join(error_dir, f"{file_name_without_extension}_chunk_{index}.wav") + shutil.copy(audio_part_path, error_path) + logging.error(f'Copied request failed chunk file {file_name_without_extension} to {error_path}') + return index, "" # 返回空文本 + finally: + if os.path.exists(audio_part_path): + os.remove(audio_part_path) + + def main(): all_files = os.listdir('media') audio_files = [file for file in all_files if file.endswith('.wav')] print(audio_files) - with ThreadPoolExecutor(max_workers=5) as executor: + with ThreadPoolExecutor(max_workers=process_workers) as executor: for audio_file in audio_files: audio_file_path = os.path.join('media', audio_file) executor.submit(process_audio_file, audio_file_path)