From 05b8744a9dc7dab75c8061d01f9edb9fbea181c2 Mon Sep 17 00:00:00 2001 From: YuanHui <31339626+alsesa@users.noreply.github.com> Date: Fri, 7 Mar 2025 15:10:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- course.py | 75 ++++++++++++++++------------------------- courses.db | Bin 2580480 -> 2588672 bytes video_voice_process.py | 17 ++++++---- 3 files changed, 40 insertions(+), 52 deletions(-) diff --git a/course.py b/course.py index c629077..a6701d7 100755 --- a/course.py +++ b/course.py @@ -11,7 +11,17 @@ from threading import Thread import requests from headers import headers +import logging from video_voice_process import process_audio_file +from logging.handlers import RotatingFileHandler + +# 配置日志 +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(), # 控制台日志 + RotatingFileHandler('app.log', maxBytes=1024*1024*5, backupCount=3) # 日志文件 + ]) # 读取配置文件 config = configparser.ConfigParser() @@ -26,13 +36,9 @@ headers['authorization'] = f'Bearer {authorization_token}' def download_attachment(attachment, course_id_folder, course_audio_filename, max_retries): if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"): - print("字符串为空") - # 找到最后一个斜杠的位置 + logging.info("字符串为空") last_slash_index = attachment['url'].rfind('/') - - # 截取最后一个斜杠之后的所有字符 download_filename = attachment['url'][last_slash_index + 1:] - print(attachment['url']) else: download_filename = attachment['name'] @@ -40,35 +46,33 @@ def download_attachment(attachment, course_id_folder, course_audio_filename, max while attempt < max_retries: try: url = attachment['url'] - print(download_filename) - print(attachment['name']) file_extension = attachment['name'].split('.')[-1].lower() if file_extension != 'mp3': course_id_folder = os.path.join(course_id_folder, file_extension) else: if os.path.exists(course_audio_filename): - print(f"File {course_audio_filename} already exists, skipping download.") + logging.info(f"File {course_audio_filename} already exists, skipping download.") return filename = os.path.join(course_id_folder, download_filename) if os.path.exists(filename): - print(f"File {filename} already exists, skipping download.") + logging.info(f"File {filename} already exists, skipping download.") return command = f"aria2c -o {filename} -x 16 -s 16 {url}" - print(command) subprocess.run(command, shell=True, check=True) + logging.info(f"Download Command: {command}") return except subprocess.CalledProcessError as e: - print(f"Failed to download {attachment['name']}: {e}") + logging.error(f"Failed to download {attachment['name']}: {e}") attempt += 1 if attempt == max_retries: - print(f"Failed to download {attachment['name']} after {max_retries} attempts.") + logging.error(f"Failed to download {attachment['name']} after {max_retries} attempts.") else: - print(f"Retrying {attachment['name']}... ({attempt}/{max_retries})") + logging.warning(f"Retrying {attachment['name']}... ({attempt}/{max_retries})") def worker(queue, course_id_folder, course_audio_filename, max_retries): @@ -80,14 +84,10 @@ def worker(queue, course_id_folder, course_audio_filename, max_retries): def convert_mp4(mp4_file): try: - # 获取 MP4 文件所在的目录 mp4_dir = os.path.dirname(mp4_file) - # 获取 MP4 文件的文件名(不包含扩展名) mp4_filename = os.path.splitext(os.path.basename(mp4_file))[0] - # 生成对应的 WAV 文件路径 wav_file = os.path.join(mp4_dir, f"{mp4_filename}.wav") - # 构建 FFmpeg 命令 command = [ 'ffmpeg', '-y', @@ -99,56 +99,46 @@ def convert_mp4(mp4_file): wav_file ] - # 执行 FFmpeg 命令 subprocess.run(command, check=True) - print(f"成功将 {mp4_file} 转换为 {wav_file}") + logging.info(f"成功将 {mp4_file} 转换为 {wav_file}") return wav_file except subprocess.CalledProcessError as e: - print(f"转换失败: {e}") + logging.error(f"转换失败: {e}") return None except FileNotFoundError: - print("未找到 FFmpeg,请确保已安装并配置好 FFmpeg 环境。") + logging.error("未找到 FFmpeg,请确保已安装并配置好 FFmpeg 环境。") return None + def get_course(): - # 连接到SQLite数据库 conn = sqlite3.connect('courses.db') cursor = conn.cursor() - max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1') # 获取数据库中最大的课程ID + max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1') if max_course_id: max_course_id = max_course_id.fetchone()[0] - print(f"The maximum course ID is {max_course_id}") + logging.info(f"The maximum course ID is {max_course_id}") else: - print("No courses found in the database.") + logging.info("No courses found in the database.") max_course_id = 11 start_course_id = max_course_id - 5 - # 查询courses表中的所有课程ID cursor.execute('SELECT id, title FROM courses where id >= ?', (start_course_id,)) - # cursor.execute('SELECT id, title FROM courses where id >= ') course_ids_data = cursor.fetchall() - print(course_ids_data) course_ids = [row[0] for row in course_ids_data] course_ids_dict = dict(course_ids_data) - print(course_ids_dict) - print(course_ids) - # 创建json文件夹 if not os.path.exists('json'): os.makedirs('json') - # 创建course文件夹 if not os.path.exists('course'): os.makedirs('course') - # 先请求全部的链接获取数据,并将获取到的课程信息保存到数据库中 for course_id in course_ids: - # course_id = course_id_tuple[0] - print(f"Processing course ID: {course_id}") + logging.info(f"Processing course ID: {course_id}") json_filename = os.path.join('json', f'{course_id}.json') if os.path.exists(json_filename): - print(f"Course {course_id} JSON file already exists, using local file.") + logging.info(f"Course {course_id} JSON file already exists, using local file.") with open(json_filename, 'r', encoding='utf-8') as json_file: contents_data = json.load(json_file) else: @@ -171,7 +161,6 @@ def get_course(): cursor.close() conn.close() - # 现在所有的课程信息都已经保存到数据库中,开始下载附件和进行后续操作 for course_id in course_ids: course_id_folder = os.path.join('course', str(course_id)) @@ -185,27 +174,23 @@ def get_course(): attachment_queue = Queue() - # 下载所有附件 for attachment in [item['attachment'] for item in contents_data['data'] if item['attachment']]: attachment_queue.put(attachment) - # 创建并启动多个下载线程 threads = [] for _ in range(max_download_threads): - t = Thread(target=worker, args=(attachment_queue, course_id_folder, course_audio_filename, max_retry_attempts)) + t = Thread(target=worker, + args=(attachment_queue, course_id_folder, course_audio_filename, max_retry_attempts)) t.start() threads.append(t) - # 等待所有下载任务完成 attachment_queue.join() for t in threads: t.join() - # 检查是否存在音频文件 audio_files = [item for item in contents_data['data'] if item['category'] == 'audio'] if audio_files: - # 合并所有音频文件 audio_files.sort(key=lambda x: x['order']) combined_audio_filename = os.path.join(course_id_folder, 'combined_audio.mp3') @@ -221,15 +206,13 @@ def get_course(): shutil.move(combined_audio_filename, course_audio_filename) os.remove(text_file) - # 删除下载的临时音频文件 for item in audio_files: audio_file_path = os.path.join(course_id_folder, item['attachment']['name']) try: os.remove(audio_file_path) except: - print('delete file fail') + logging.error('delete file fail') - # 整理文件 for item in contents_data['data']: attachment = item['attachment'] if attachment: diff --git a/courses.db b/courses.db index f1296b11da1f58079b827893859aaf4f100c73ec..5f744cce5033b64db28c860b099142b8fdd47fc8 100755 GIT binary patch delta 4824 zcma)AYm8l08NKuBgPBfgr#drWr=1z*V$j|0_u0Ee5g#ZZ2qK6e*!`>}0yVATA9W@c zL1Rg=;vx6|iAvN6Rv{zhQHnn_F&ZI6N&MB^P8~EnswPHb&~*zj=iZ5#^W&S`xwGc1 z`#tvh*1oe(eXD17_S-!N&K%g+X!L&O&dulb>^rpMyEm`;Xa89%+VA{qX!lEf{r%_k zpLOunJ
NKKG delta 285 zcmWN_zfQtH00-b+@5&!30tX6ODivB-i0SNNdbC;B)){uhiFt6^s12S5O`-{vE3sackgMIly*=_Rq zB>#qOm49{Yn=nekh=2(c7+8>kG-MzPYmfsQ@~{pCC_)K1l%WC}unASD!4_ V#uVw#au50J+o+