update voice process use gradio

2025-03-05 12:48:47 +08:00
parent 8cf2db5dbc
commit 5b5570ccc8
6 changed files with 86 additions and 57 deletions
--- a/config.ini
+++ b/config.ini
@@ -5,4 +5,7 @@ sort=

 max_download_threads = 5
 max_retry_attempts = 3
-authorization_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3NDI2MDc3NDAsImp0aSI6IjEwMjM4YTJmLTBiN2QtNDIwNi1iNDU2LTQ1MTRiMjFjZGM4MyIsImlhdCI6MTc0MDAxNTc0MCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiZDliZjIzNjQtNjVhYi00ZWNkLThhZjctY2MzMDcxODU0M2M5In0.05GqlG4rhwwlbuQUfEHlHTB-vAz2lOh5JCVlQ6j4V8s
+authorization_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3NDI2MDc3NDAsImp0aSI6IjEwMjM4YTJmLTBiN2QtNDIwNi1iNDU2LTQ1MTRiMjFjZGM4MyIsImlhdCI6MTc0MDAxNTc0MCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiZDliZjIzNjQtNjVhYi00ZWNkLThhZjctY2MzMDcxODU0M2M5In0.05GqlG4rhwwlbuQUfEHlHTB-vAz2lOh5JCVlQ6j4V8s
+
+voice2txt_url=https://api.siliconflow.cn/v1/audio/transcriptions
+voice_token=sk-vksrlpckcpttnpjgftpgwytmiipjmvhyzmnffhbhjpahbfiq
--- a/course.py
+++ b/course.py
@@ -154,8 +154,8 @@ def get_course():
        else:
            response = requests.get(f'https://bandu-api.songy.info/v2/courses/{course_id}/contents', headers=headers)
            contents_data = response.json()
-            with open(json_filename, 'w', encoding='utf-8') as json_file:
-                json.dump(contents_data, json_file, ensure_ascii=False, indent=4)
+            with open(json_filename, 'w', encoding='utf-8') as save_json_file:
+                json.dump(contents_data, save_json_file, ensure_ascii=False, indent=4)

        for item in contents_data['data']:
            cursor.execute('''
@@ -242,7 +242,7 @@ def get_course():
                    folder_path = os.path.join(course_id_folder, folder_name)
                    if not os.path.exists(folder_path):
                        os.makedirs(folder_path)
-                    move_file = os.path.join(folder_path, attachment['name']);
+                    move_file = os.path.join(folder_path, attachment['name'])
                    shutil.move(filename, move_file)

        # 保存category为text的content到TXT文件
@@ -253,21 +253,27 @@ def get_course():
                    txt_file.write(content + '\n')
        # 处理mp4文件
        mp4_folder = os.path.join(course_id_folder, 'mp4')
+        mp4_file = None
+        exist_md_file = False
        if os.path.exists(mp4_folder):
            # 遍历指定文件夹内的所有文件和子文件夹
            for root, dirs, files in os.walk(mp4_folder):
                for file in files:
+                    # 检查是否已经存在
+                    if file.lower().endswith('.md'):
+                        exist_md_file = True
                    # 检查文件扩展名是否为.mp4
                    if file.lower().endswith('.mp4'):
                        # 构建完整的 MP4 文件路径
                        mp4_file = os.path.join(root, file)
-                        # 调用 mp4_to_wav 函数进行转换
-                        wav_file = convert_mp4(mp4_file)
-                        if wav_file is not None:
-                            try:
-                                process_audio_file(wav_file)
-                            except:
-                                print('process_audio_file fail')
+        if (not exist_md_file) and mp4_file is not None:
+            # 调用 mp4_to_wav 函数进行转换
+            wav_file = convert_mp4(mp4_file)
+            if wav_file is not None:
+                try:
+                    process_audio_file(wav_file)
+                except:
+                    print('process_audio_file fail')


 if __name__ == '__main__':
--- a/courses.db
+++ b/courses.db
--- a/2
+++ b/2
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
-requests
-pydub
+requests~=2.32.3
+pydub~=0.25.1
+gradio_client~=1.7.2
--- a/video_voice_process.py
+++ b/video_voice_process.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import configparser
 import logging
 import os
 import shutil
@@ -6,11 +7,20 @@ import threading
 from concurrent.futures import ThreadPoolExecutor, as_completed

 import requests
+from gradio_client import Client, handle_file
 from pydub import AudioSegment
 from pydub.silence import split_on_silence

-# 创建一个锁对象
-file_write_lock = threading.Lock()
+use_remote_api = False
+process_workers = 5 if use_remote_api else 2
+
+config = configparser.ConfigParser()
+config.read('config.ini')
+token = config['DEFAULT']['voice_token']
+url = config['DEFAULT']['voice2txt_url']
+headers = {
+    "Authorization": f'Bearer {token}'
+}


 # 配置日志
@@ -44,6 +54,8 @@ def process_audio_file(audio_file_path):
    wav_filename = os.path.splitext(os.path.basename(audio_file_path))[0]
    # 生成对应的 WAV 文件路径
    md_file = os.path.join(wav_dir, f"{wav_filename}.md")
+    # 创建一个锁对象
+    file_write_lock = threading.Lock()

    try:
        audio_file = AudioSegment.from_wav(audio_file_path)
@@ -57,45 +69,11 @@ def process_audio_file(audio_file_path):
            else:
                new_audio_chunks.append(chunk)

-        url = "https://api.siliconflow.cn/v1/audio/transcriptions"
-        headers = {
-            "Authorization": "Bearer sk-lakndqcjlmtukekcliwkkryaxquifduhvzgcnlhofzvofllv"
-        }
-
        sentences = []  # 用于存储所有句子的列表

-        def send_request(chunk, index):
-            try:
-                audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav")
-                chunk.export(audio_part_path, format="wav")
-                logging.info(f'Exported chunk file {audio_part_path} for {file_name_with_extension}')
-
-                multipart_form_data = {
-                    'file': (os.path.basename(audio_part_path), open(audio_part_path, 'rb')),
-                    'model': (None, 'FunAudioLLM/SenseVoiceSmall')
-                }
-
-                response = requests.post(url, files=multipart_form_data, headers=headers)
-                result = response.json()
-                text = result["text"]
-                print(text)
-                return index, text  # 返回索引和文本
-            except Exception as e:
-                logging.error(f'Error processing {file_name_with_extension}, chunk {index}: {str(e)}')
-                # 将出错的音频片段复制到error文件夹
-                error_dir = os.path.join(os.getcwd(), 'media', 'error')
-                if not os.path.exists(error_dir):
-                    os.makedirs(error_dir)
-                error_path = os.path.join(error_dir, f"{file_name_without_extension}_chunk_{index}.wav")
-                shutil.copy(audio_part_path, error_path)
-                logging.error(f'Copied request failed chunk file {file_name_with_extension} to {error_path}')
-                return index, ""  # 返回空文本
-            finally:
-                if os.path.exists(audio_part_path):
-                    os.remove(audio_part_path)
-
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = {executor.submit(send_request, chunk, i): i for i, chunk in enumerate(new_audio_chunks)}
+        with ThreadPoolExecutor(max_workers=process_workers) as executor:
+            futures = {executor.submit(send_request, chunk, i, file_name_without_extension): i for i, chunk in
+                       enumerate(new_audio_chunks)}

            for future in as_completed(futures):
                index = futures[future]
@@ -110,22 +88,63 @@ def process_audio_file(audio_file_path):
        for sentence in sorted(sentences, key=lambda x: x[0]):  # 根据索引排序
            markdown_content += f"{sentence[1]}\n\n"

-        with file_write_lock:  # 确保文件写入操作的线程安全
+        # with file_write_lock:  # 确保文件写入操作的线程安全
            # md_file_path = os.path.join('media', file_name_without_extension + '.md')
-            with open(md_file, "w", encoding="utf-8") as f:
-                f.write(markdown_content)
+        with open(md_file, "w", encoding="utf-8") as f:
+            f.write(markdown_content)

        logging.info(f"Finished processing {file_name_with_extension}")
    except Exception as e:
        logging.error(f"Failed to process {file_name_with_extension}: {str(e)}")


+def send_request(chunk, index, file_name_without_extension):
+    audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav")
+    chunk.export(audio_part_path, format="wav")
+    logging.info(f'Exported chunk file {audio_part_path} for {file_name_without_extension}')
+    try:
+        if use_remote_api:
+            multipart_form_data = {
+                'file': (os.path.basename(audio_part_path), open(audio_part_path, 'rb')),
+                'model': (None, 'FunAudioLLM/SenseVoiceSmall')
+            }
+
+            response = requests.post(url, files=multipart_form_data, headers=headers)
+            result = response.json()
+            text = result["text"]
+            print(text)
+
+        else:
+            client = Client("http://192.168.31.3:7860/")
+            text = client.predict(
+                input_wav=handle_file(audio_part_path),
+                language="zh",
+                api_name="/model_inference"
+            )
+            print(text)
+
+        return index, text  # 返回索引和文本
+    except Exception as e:
+        logging.error(f'Error processing {file_name_without_extension}, chunk {index}: {str(e)}')
+        # 将出错的音频片段复制到error文件夹
+        error_dir = os.path.join(os.getcwd(), 'media', 'error')
+        if not os.path.exists(error_dir):
+            os.makedirs(error_dir)
+        error_path = os.path.join(error_dir, f"{file_name_without_extension}_chunk_{index}.wav")
+        shutil.copy(audio_part_path, error_path)
+        logging.error(f'Copied request failed chunk file {file_name_without_extension} to {error_path}')
+        return index, ""  # 返回空文本
+    finally:
+        if os.path.exists(audio_part_path):
+            os.remove(audio_part_path)
+
+
 def main():
    all_files = os.listdir('media')
    audio_files = [file for file in all_files if file.endswith('.wav')]
    print(audio_files)

-    with ThreadPoolExecutor(max_workers=5) as executor:
+    with ThreadPoolExecutor(max_workers=process_workers) as executor:
        for audio_file in audio_files:
            audio_file_path = os.path.join('media', audio_file)
            executor.submit(process_audio_file, audio_file_path)