优化代码

2025-03-07 15:10:44 +08:00
parent 5b5570ccc8
commit 05b8744a9d
3 changed files with 40 additions and 52 deletions
--- a/course.py
+++ b/course.py
@@ -11,7 +11,17 @@ from threading import Thread
 import requests

 from headers import headers
+import logging
 from video_voice_process import process_audio_file
+from logging.handlers import RotatingFileHandler
+
+# 配置日志
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s',
+                    handlers=[
+                        logging.StreamHandler(),  # 控制台日志
+                        RotatingFileHandler('app.log', maxBytes=1024*1024*5, backupCount=3)  # 日志文件
+                    ])

 # 读取配置文件
 config = configparser.ConfigParser()
@@ -26,13 +36,9 @@ headers['authorization'] = f'Bearer {authorization_token}'

 def download_attachment(attachment, course_id_folder, course_audio_filename, max_retries):
    if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"):
-        print("字符串为空")
-        # 找到最后一个斜杠的位置
+        logging.info("字符串为空")
        last_slash_index = attachment['url'].rfind('/')
-
-        # 截取最后一个斜杠之后的所有字符
        download_filename = attachment['url'][last_slash_index + 1:]
-        print(attachment['url'])
    else:
        download_filename = attachment['name']

@@ -40,35 +46,33 @@ def download_attachment(attachment, course_id_folder, course_audio_filename, max
    while attempt < max_retries:
        try:
            url = attachment['url']
-            print(download_filename)
-            print(attachment['name'])

            file_extension = attachment['name'].split('.')[-1].lower()
            if file_extension != 'mp3':
                course_id_folder = os.path.join(course_id_folder, file_extension)
            else:
                if os.path.exists(course_audio_filename):
-                    print(f"File {course_audio_filename} already exists, skipping download.")
+                    logging.info(f"File {course_audio_filename} already exists, skipping download.")
                    return

            filename = os.path.join(course_id_folder, download_filename)

            if os.path.exists(filename):
-                print(f"File {filename} already exists, skipping download.")
+                logging.info(f"File {filename} already exists, skipping download.")
                return

            command = f"aria2c -o {filename} -x 16 -s 16 {url}"
-            print(command)
            subprocess.run(command, shell=True, check=True)
+            logging.info(f"Download Command: {command}")

            return
        except subprocess.CalledProcessError as e:
-            print(f"Failed to download {attachment['name']}: {e}")
+            logging.error(f"Failed to download {attachment['name']}: {e}")
            attempt += 1
            if attempt == max_retries:
-                print(f"Failed to download {attachment['name']} after {max_retries} attempts.")
+                logging.error(f"Failed to download {attachment['name']} after {max_retries} attempts.")
            else:
-                print(f"Retrying {attachment['name']}... ({attempt}/{max_retries})")
+                logging.warning(f"Retrying {attachment['name']}... ({attempt}/{max_retries})")


 def worker(queue, course_id_folder, course_audio_filename, max_retries):
@@ -80,14 +84,10 @@ def worker(queue, course_id_folder, course_audio_filename, max_retries):

 def convert_mp4(mp4_file):
    try:
-        # 获取 MP4 文件所在的目录
        mp4_dir = os.path.dirname(mp4_file)
-        # 获取 MP4 文件的文件名（不包含扩展名）
        mp4_filename = os.path.splitext(os.path.basename(mp4_file))[0]
-        # 生成对应的 WAV 文件路径
        wav_file = os.path.join(mp4_dir, f"{mp4_filename}.wav")

-        # 构建 FFmpeg 命令
        command = [
            'ffmpeg',
            '-y',
@@ -99,56 +99,46 @@ def convert_mp4(mp4_file):
            wav_file
        ]

-        # 执行 FFmpeg 命令
        subprocess.run(command, check=True)
-        print(f"成功将 {mp4_file} 转换为 {wav_file}")
+        logging.info(f"成功将 {mp4_file} 转换为 {wav_file}")
        return wav_file
    except subprocess.CalledProcessError as e:
-        print(f"转换失败: {e}")
+        logging.error(f"转换失败: {e}")
        return None
    except FileNotFoundError:
-        print("未找到 FFmpeg，请确保已安装并配置好 FFmpeg 环境。")
+        logging.error("未找到 FFmpeg，请确保已安装并配置好 FFmpeg 环境。")
        return None

+
 def get_course():
-    # 连接到SQLite数据库
    conn = sqlite3.connect('courses.db')
    cursor = conn.cursor()
-    max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1')  # 获取数据库中最大的课程ID
+    max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1')
    if max_course_id:
        max_course_id = max_course_id.fetchone()[0]
-        print(f"The maximum course ID is {max_course_id}")
+        logging.info(f"The maximum course ID is {max_course_id}")
    else:
-        print("No courses found in the database.")
+        logging.info("No courses found in the database.")
        max_course_id = 11
    start_course_id = max_course_id - 5

-    # 查询courses表中的所有课程ID
    cursor.execute('SELECT id, title FROM courses where id >= ?', (start_course_id,))
-    # cursor.execute('SELECT id, title FROM courses where id >= ')
    course_ids_data = cursor.fetchall()
-    print(course_ids_data)
    course_ids = [row[0] for row in course_ids_data]
    course_ids_dict = dict(course_ids_data)
-    print(course_ids_dict)
-    print(course_ids)

-    # 创建json文件夹
    if not os.path.exists('json'):
        os.makedirs('json')

-    # 创建course文件夹
    if not os.path.exists('course'):
        os.makedirs('course')

-    # 先请求全部的链接获取数据，并将获取到的课程信息保存到数据库中
    for course_id in course_ids:
-        # course_id = course_id_tuple[0]
-        print(f"Processing course ID: {course_id}")
+        logging.info(f"Processing course ID: {course_id}")

        json_filename = os.path.join('json', f'{course_id}.json')
        if os.path.exists(json_filename):
-            print(f"Course {course_id} JSON file already exists, using local file.")
+            logging.info(f"Course {course_id} JSON file already exists, using local file.")
            with open(json_filename, 'r', encoding='utf-8') as json_file:
                contents_data = json.load(json_file)
        else:
@@ -171,7 +161,6 @@ def get_course():
    cursor.close()
    conn.close()

-    # 现在所有的课程信息都已经保存到数据库中，开始下载附件和进行后续操作
    for course_id in course_ids:
        course_id_folder = os.path.join('course', str(course_id))

@@ -185,27 +174,23 @@ def get_course():

        attachment_queue = Queue()

-        # 下载所有附件
        for attachment in [item['attachment'] for item in contents_data['data'] if item['attachment']]:
            attachment_queue.put(attachment)

-        # 创建并启动多个下载线程
        threads = []
        for _ in range(max_download_threads):
-            t = Thread(target=worker, args=(attachment_queue, course_id_folder, course_audio_filename, max_retry_attempts))
+            t = Thread(target=worker,
+                       args=(attachment_queue, course_id_folder, course_audio_filename, max_retry_attempts))
            t.start()
            threads.append(t)

-        # 等待所有下载任务完成
        attachment_queue.join()

        for t in threads:
            t.join()

-        # 检查是否存在音频文件
        audio_files = [item for item in contents_data['data'] if item['category'] == 'audio']
        if audio_files:
-            # 合并所有音频文件
            audio_files.sort(key=lambda x: x['order'])

            combined_audio_filename = os.path.join(course_id_folder, 'combined_audio.mp3')
@@ -221,15 +206,13 @@ def get_course():
                shutil.move(combined_audio_filename, course_audio_filename)
                os.remove(text_file)

-                # 删除下载的临时音频文件
                for item in audio_files:
                    audio_file_path = os.path.join(course_id_folder, item['attachment']['name'])
                    try:
                        os.remove(audio_file_path)
                    except:
-                        print('delete file fail')
+                        logging.error('delete file fail')

-        # 整理文件
        for item in contents_data['data']:
            attachment = item['attachment']
            if attachment:
--- a/courses.db
+++ b/courses.db
--- a/video_voice_process.py
+++ b/video_voice_process.py
@@ -11,8 +11,8 @@ from gradio_client import Client, handle_file
 from pydub import AudioSegment
 from pydub.silence import split_on_silence

-use_remote_api = False
-process_workers = 5 if use_remote_api else 2
+use_remote_api = True
+process_workers = 5 if use_remote_api else 1

 config = configparser.ConfigParser()
 config.read('config.ini')
@@ -101,7 +101,7 @@ def process_audio_file(audio_file_path):
 def send_request(chunk, index, file_name_without_extension):
    audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav")
    chunk.export(audio_part_path, format="wav")
-    logging.info(f'Exported chunk file {audio_part_path} for {file_name_without_extension}')
+    # logging.info(f'Exported chunk file {audio_part_path} for {file_name_without_extension}')
    try:
        if use_remote_api:
            multipart_form_data = {
@@ -140,13 +140,18 @@ def send_request(chunk, index, file_name_without_extension):


 def main():
-    all_files = os.listdir('media')
-    audio_files = [file for file in all_files if file.endswith('.wav')]
+    # all_files = os.listdir('media')
+    # audio_files = [file for file in all_files if file.endswith('.wav')]
+    audio_files = []
+    for root, dirs, files in os.walk('media'):
+        for file in files:
+            if file.endswith('.wav'):
+                audio_files.append(os.path.join(root, file))
    print(audio_files)

    with ThreadPoolExecutor(max_workers=process_workers) as executor:
        for audio_file in audio_files:
-            audio_file_path = os.path.join('media', audio_file)
+            audio_file_path = os.path.join(audio_file)
            executor.submit(process_audio_file, audio_file_path)