change audio to text use local service

2025-03-10 15:32:05 +08:00
parent 05b8744a9d
commit 3d363ff1fd
3 changed files with 17 additions and 36 deletions
--- a/courses.db
+++ b/courses.db
--- a/markdown_generator.py
+++ b/markdown_generator.py
@@ -7,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor
 from os import makedirs

 import requests
+from gradio_client import Client, handle_file

 import json

@@ -14,15 +15,6 @@ import json
 config = configparser.ConfigParser()
 config.read('config.ini')
 max_download_threads = int(config['DEFAULT']['max_download_threads'])
-# start_course_id = int(config['DEFAULT']['start_course_id'])
-
-# 转译url
-trans_url = 'https://api.siliconflow.cn/v1/audio/transcriptions'
-
-headers = {
-    "Authorization": "Bearer sk-lakndqcjlmtukekcliwkkryaxquifduhvzgcnlhofzvofllv",
-    # "Content-Type": "multipart/form-data"
-}

 # 设置日志配置
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -55,9 +47,9 @@ def create_audio_transcriptions_table(db_path):
    try:
        # 执行创建表的SQL语句
        cursor.execute(CREATE_TABLE_SQL)
-        print("表audio_transcriptions创建成功。")
+        logging.info("表audio_transcriptions创建成功。")
    except sqlite3.Error as e:
-        print(f"创建表时出错: {e}")
+        logging.error(f"创建表时出错: {e}")
    finally:
        # 关闭数据库连接
        conn.close()
@@ -67,8 +59,6 @@ def create_audio_transcriptions_table(db_path):
 db_path = 'courses.db'  # 数据库文件路径


-# create_audio_transcriptions_table(db_path)
-
 # 下载音频文件
 def download_file(url, local_path):
    try:
@@ -87,23 +77,15 @@ def download_file(url, local_path):

 # 调用api将语音转换为文本
 def voice2txt(voice_path):
-    url = trans_url
-    multipart_form_data = {
-        'file': ('audio.mp3', open(voice_path, 'rb')),
-        'model': (None, 'FunAudioLLM/SenseVoiceSmall')
-    }
-    response = requests.request("POST", url, files=multipart_form_data, headers=headers)
-    # 检查请求是否成功
-    if response.status_code == 200:
-        # 解析JSON响应
-        data = response.json()

-        # 提取text的值
-        text_value = data.get('text', None)  # 使用get方法可以避免KeyError，如果'text'键不存在则返回None
-        logging.info(f"Text value: {text_value}")
-        return text_value
-    else:
-        print('请求失败，状态码：', response.status_code)
+    client = Client("http://192.168.31.3:7860/")
+    text = client.predict(
+        input_wav=handle_file(voice_path),
+        language="zh",
+        api_name="/model_inference"
+    )
+    logging.info(text)
+    return text


 # 保存文本到数据库
@@ -199,9 +181,9 @@ def get_content():
    max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1')  # 获取数据库中最大的课程ID
    if max_course_id:
        max_course_id = max_course_id.fetchone()[0]
-        print(f"The maximum course ID is {max_course_id}")
+        logging.info(f"The maximum course ID is {max_course_id}")
    else:
-        print("No courses found in the database.")
+        logging.info("No courses found in the database.")
        max_course_id = 11
    start_course_id = max_course_id - 5

@@ -219,7 +201,7 @@ def get_content():

    # 先请求全部的链接获取数据，并将获取到的课程信息保存到数据库中
    for course_id in course_ids:
-        print(f"Processing course ID: {course_id}")
+        logging.info(f"Processing course ID: {course_id}")

        json_filename = os.path.join('json', f'{course_id}.json')
        # copy_json_file_name = os.path.join('data', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '？')
@@ -228,17 +210,16 @@ def get_content():
        # md_file_name = os.path.join('data', 'markdown', f'{course_ids_dict[course_id]}.md')
        md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}.md')
        if os.path.exists(json_filename):
-            print(f"Course {course_id} JSON file already exists, using local file.")
+            logging.info(f"Course {course_id} JSON file already exists, using local file.")
            makedirs(f'course/{course_id}/json', exist_ok=True)
            shutil.copy2(json_filename, copy_json_file_name)
            json_to_markdown(copy_json_file_name, md_file_name)
        else:
            continue

-        # logseq_md_file_name = os.path.join('data', 'markdown_logseq', f'{course_ids_dict[course_id]}.md')
        logseq_md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}_logseq.md')
        if os.path.exists(json_filename):
-            print(f"Course {course_id} JSON file already exists, using local file.")
+            logging.info(f"Course {course_id} JSON file already exists, using local file.")
            shutil.copy2(json_filename, copy_json_file_name)
            json_to_markdown(copy_json_file_name, logseq_md_file_name, logseq=True)
        else:
--- a/video_voice_process.py
+++ b/video_voice_process.py
@@ -11,7 +11,7 @@ from gradio_client import Client, handle_file
 from pydub import AudioSegment
 from pydub.silence import split_on_silence

-use_remote_api = True
+use_remote_api = False
 process_workers = 5 if use_remote_api else 1

 config = configparser.ConfigParser()