# -*- coding: utf-8 -*- import configparser import json import os import shutil import sqlite3 import subprocess from queue import Queue from threading import Thread import requests from fileconvert import mp4_to_mp3, mp4_to_wav from headers import headers from logging_config import setup_logging from video_voice_process import process_audio_file logger = setup_logging() # 读取配置文件 config = configparser.ConfigParser() config.read('config.ini') authorization_token = config['DEFAULT']['authorization_token'] max_download_threads = int(config['DEFAULT']['max_download_threads']) max_retry_attempts = int(config['DEFAULT']['max_retry_attempts']) download_id = int(config['DEFAULT']['download_id']) headers = headers headers['authorization'] = f'Bearer {authorization_token}' def download_attachment(attachment, course_id_folder, course_audio_filename, max_retries): if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"): logger.info("字符串为空") last_slash_index = attachment['url'].rfind('/') download_filename = attachment['url'][last_slash_index + 1:] else: download_filename = attachment['name'] attempt = 0 while attempt < max_retries: try: url = attachment['url'] file_extension = attachment['name'].split('.')[-1].lower() if file_extension != 'mp3': course_id_folder = os.path.join(course_id_folder, file_extension) else: if os.path.exists(course_audio_filename): logger.info(f"File {course_audio_filename} already exists, skipping download.") return filename = os.path.join(course_id_folder, download_filename) if os.path.exists(filename): logger.info(f"File {filename} already exists, skipping download.") return command = f'aria2c -o "{filename}" -x 16 -s 16 {url}' subprocess.run(command, shell=True, check=True) logger.info(f"Download Command: {command}") return except subprocess.CalledProcessError as e: logger.error(f"Failed to download {attachment['name']}: {e}") attempt += 1 if attempt == max_retries: logger.error(f"Failed to download {attachment['name']} after {max_retries} attempts.") else: logger.warning(f"Retrying {attachment['name']}... ({attempt}/{max_retries})") def worker(queue, course_id_folder, course_audio_filename, max_retries): while not queue.empty(): attachment = queue.get() download_attachment(attachment, course_id_folder, course_audio_filename, max_retries) queue.task_done() def fetch_course(): conn = sqlite3.connect('courses.db') cursor = conn.cursor() max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1') if max_course_id: max_course_id = max_course_id.fetchone()[0] logger.info(f"The maximum course ID is {max_course_id}") else: logger.info("No courses found in the database.") max_course_id = 11 start_course_id = download_id if max_course_id - 5 < download_id else max_course_id - 5 cursor.execute('SELECT id, title FROM courses where id >= ?', (start_course_id,)) course_ids_data = cursor.fetchall() course_ids = [row[0] for row in course_ids_data] course_ids_dict = dict(course_ids_data) if not os.path.exists('json'): os.makedirs('json') if not os.path.exists('course'): os.makedirs('course') for course_id in course_ids: logger.info(f"Processing course ID: {course_id}") json_filename = os.path.join('json', f'{course_id}.json') if os.path.exists(json_filename): logger.info(f"Course {course_id} JSON file already exists, using local file.") with open(json_filename, 'r', encoding='utf-8') as json_file: contents_data = json.load(json_file) else: response = requests.get(f'https://bandu-api.songy.info/v2/courses/{course_id}/contents', headers=headers) contents_data = response.json() with open(json_filename, 'w', encoding='utf-8') as save_json_file: json.dump(contents_data, save_json_file, ensure_ascii=False, indent=4) for item in contents_data['data']: cursor.execute(''' INSERT INTO contents (id, course_id, content, category, audio_order, attachment_url, mime_type) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT(id) DO NOTHING ''', (item['id'], course_id, item['content'], item['category'], item['order'], item['attachment']['url'] if item['attachment'] else None, item['attachment']['mime_type'] if item['attachment'] else None)) return course_ids, course_ids_dict def download_course_contents(course_ids, course_ids_dict): if not os.path.exists('json'): os.makedirs('json') if not os.path.exists('course'): os.makedirs('course') for course_id in course_ids: course_id_folder = os.path.join('course', str(course_id)) if not os.path.exists(course_id_folder): os.makedirs(course_id_folder) json_filename = os.path.join('json', f'{course_id}.json') with open(json_filename, 'r', encoding='utf-8') as json_file: contents_data = json.load(json_file) course_audio_filename = os.path.join(course_id_folder, f'{course_ids_dict[course_id]}.mp3') attachment_queue = Queue() for attachment in [item['attachment'] for item in contents_data['data'] if item['attachment']]: attachment_queue.put(attachment) threads = [] for _ in range(max_download_threads): t = Thread(target=worker, args=(attachment_queue, course_id_folder, course_audio_filename, max_retry_attempts)) t.start() threads.append(t) attachment_queue.join() for t in threads: t.join() audio_files = [item for item in contents_data['data'] if item['category'] == 'audio'] if audio_files: audio_files.sort(key=lambda x: x['order']) combined_audio_filename = os.path.join(course_id_folder, 'combined_audio.mp3') if not os.path.exists(course_audio_filename): text_file = os.path.join(course_id_folder, 'input_files.txt') with open(text_file, 'w') as f: for audio_file in audio_files: f.write(f"file '{audio_file['attachment']['name']}'\n") ffmpeg_command = f'ffmpeg -f concat -safe 0 -i {text_file} -c copy "{combined_audio_filename}"' subprocess.run(ffmpeg_command, shell=True) shutil.move(combined_audio_filename, course_audio_filename) os.remove(text_file) for item in audio_files: audio_file_path = os.path.join(course_id_folder, item['attachment']['name']) try: os.remove(audio_file_path) except: logger.error('delete file fail') for item in contents_data['data']: attachment = item['attachment'] if attachment: filename = os.path.join(course_id_folder, attachment['name']) if os.path.exists(filename): file_extension = attachment['name'].split('.')[-1].lower() folder_name = f"{file_extension}" if folder_name == 'mp3': continue folder_path = os.path.join(course_id_folder, folder_name) if not os.path.exists(folder_path): os.makedirs(folder_path) move_file = os.path.join(folder_path, attachment['name']) shutil.move(filename, move_file) # 保存category为text的content到TXT文件 text_contents = [item['content'] for item in contents_data['data'] if item['category'] == 'text'] if text_contents: with open(os.path.join(course_id_folder, f'{course_id}.txt'), 'w', encoding='utf-8') as txt_file: for content in text_contents: txt_file.write(content + '\n') # 处理mp4文件 mp4_folder = os.path.join(course_id_folder, 'mp4') mp4_file = None exist_md_file = False if os.path.exists(mp4_folder): # 遍历指定文件夹内的所有文件和子文件夹 for root, dirs, files in os.walk(mp4_folder): for file in files: # 检查是否已经存在 if file.lower().endswith('.md'): exist_md_file = True # 检查文件扩展名是否为.mp4 if file.lower().endswith('.mp4'): # 构建完整的 MP4 文件路径 mp4_file = os.path.join(root, file) if (not exist_md_file) and mp4_file is not None: mp4_to_mp3(mp4_file) # 调用 mp4_to_wav 函数进行转换 wav_file = mp4_to_wav(mp4_file) if wav_file is not None: try: process_audio_file(wav_file) except: print('process_audio_file fail') def get_course(): course_ids, course_ids_dict = fetch_course() download_course_contents(course_ids, course_ids_dict) if __name__ == '__main__': get_course()