# -*- coding: utf-8 -*- import configparser import os import shutil import sqlite3 import subprocess from queue import Queue from threading import Thread import requests import json from headers import headers # 读取配置文件 config = configparser.ConfigParser() config.read('config.ini') authorization_token = config['DEFAULT']['authorization_token'] max_download_threads = int(config['DEFAULT']['max_download_threads']) max_retry_attempts = int(config['DEFAULT']['max_retry_attempts']) start_course_id = int(config['DEFAULT']['start_course_id']) headers = headers headers['authorization'] = f'Bearer {authorization_token}' def download_attachment(attachment, course_id_folder, max_retries): if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"): print("字符串为空") # 找到最后一个斜杠的位置 last_slash_index = attachment['url'].rfind('/') # 截取最后一个斜杠之后的所有字符 download_filename = attachment['url'][last_slash_index + 1:] print(attachment['url']) else: download_filename = attachment['name'] attempt = 0 while attempt < max_retries: try: url = attachment['url'] print(download_filename) print(attachment['name']) filename = os.path.join(course_id_folder, download_filename) command = f"aria2c -o {filename} -x 16 -s 16 {url}" print(command) subprocess.run(command, shell=True, check=True) return except subprocess.CalledProcessError as e: print(f"Failed to download {attachment['name']}: {e}") attempt += 1 if attempt == max_retries: print(f"Failed to download {attachment['name']} after {max_retries} attempts.") else: print(f"Retrying {attachment['name']}... ({attempt}/{max_retries})") def worker(queue, course_id_folder, max_retries): while not queue.empty(): attachment = queue.get() download_attachment(attachment, course_id_folder, max_retries) queue.task_done() def get_course(): # 连接到SQLite数据库 conn = sqlite3.connect('courses.db') cursor = conn.cursor() # 查询courses表中的所有课程ID cursor.execute('SELECT id, title FROM courses where id >= ?', (start_course_id,)) # cursor.execute('SELECT id, title FROM courses where id >= ') course_ids_data = cursor.fetchall() print(course_ids_data) course_ids = [row[0] for row in course_ids_data] course_ids_dict = dict(course_ids_data) print(course_ids_dict) print(course_ids) # 创建json文件夹 if not os.path.exists('json'): os.makedirs('json') # 创建course文件夹 if not os.path.exists('course'): os.makedirs('course') # 先请求全部的链接获取数据,并将获取到的课程信息保存到数据库中 for course_id in course_ids: # course_id = course_id_tuple[0] print(f"Processing course ID: {course_id}") json_filename = os.path.join('json', f'{course_id}.json') if os.path.exists(json_filename): print(f"Course {course_id} JSON file already exists, using local file.") with open(json_filename, 'r', encoding='utf-8') as json_file: contents_data = json.load(json_file) else: response = requests.get(f'https://bandu-api.songy.info/v2/courses/{course_id}/contents', headers=headers) contents_data = response.json() with open(json_filename, 'w', encoding='utf-8') as json_file: json.dump(contents_data, json_file, ensure_ascii=False, indent=4) for item in contents_data['data']: cursor.execute(''' INSERT INTO contents (id, course_id, content, category, audio_order, attachment_url, mime_type) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT(id) DO NOTHING ''', (item['id'], course_id, item['content'], item['category'], item['order'], item['attachment']['url'] if item['attachment'] else None, item['attachment']['mime_type'] if item['attachment'] else None)) conn.commit() cursor.close() conn.close() # 现在所有的课程信息都已经保存到数据库中,开始下载附件和进行后续操作 for course_id in course_ids: course_id_folder = os.path.join('course', str(course_id)) if not os.path.exists(course_id_folder): os.makedirs(course_id_folder) else: print(f"Course {course_id} folder already exists, skipping download and merge operations.") continue json_filename = os.path.join('json', f'{course_id}.json') with open(json_filename, 'r', encoding='utf-8') as json_file: contents_data = json.load(json_file) attachment_queue = Queue() # 下载所有附件 for attachment in [item['attachment'] for item in contents_data['data'] if item['attachment']]: attachment_queue.put(attachment) # 创建并启动多个下载线程 threads = [] for _ in range(max_download_threads): t = Thread(target=worker, args=(attachment_queue, course_id_folder, max_retry_attempts)) t.start() threads.append(t) # 等待所有下载任务完成 attachment_queue.join() for t in threads: t.join() # 检查是否存在音频文件 audio_files = [item for item in contents_data['data'] if item['category'] == 'audio'] if audio_files: # 合并所有音频文件 audio_files.sort(key=lambda x: x['order']) combined_audio_filename = os.path.join(course_id_folder, 'combined_audio.mp3') course_audio_filename = os.path.join(course_id_folder, f'{course_ids_dict[course_id]}.mp3') if not os.path.exists(combined_audio_filename): text_file = os.path.join(course_id_folder, 'input_files.txt') with open(text_file, 'w') as f: for audio_file in audio_files: f.write(f"file '{audio_file['attachment']['name']}'\n") ffmpeg_command = f"ffmpeg -f concat -safe 0 -i {text_file} -c copy {combined_audio_filename}" subprocess.run(ffmpeg_command, shell=True) shutil.copy2(combined_audio_filename, course_audio_filename) os.remove(text_file) # 删除下载的临时音频文件 for item in audio_files: audio_file_path = os.path.join(course_id_folder, item['attachment']['name']) try: os.remove(audio_file_path) except: print('delete file fail') # 整理文件 for item in contents_data['data']: attachment = item['attachment'] if attachment: filename = os.path.join(course_id_folder, attachment['name']) if os.path.exists(filename): file_extension = attachment['name'].split('.')[-1].lower() folder_name = f"{file_extension}" if folder_name == 'mp3': continue folder_path = os.path.join(course_id_folder, folder_name) if not os.path.exists(folder_path): os.makedirs(folder_path) shutil.move(filename, os.path.join(folder_path, attachment['name'])) # 保存category为text的content到TXT文件 text_contents = [item['content'] for item in contents_data['data'] if item['category'] == 'text'] if text_contents: with open(os.path.join(course_id_folder, f'{course_id}.txt'), 'w', encoding='utf-8') as txt_file: for content in text_contents: txt_file.write(content + '\n') if __name__ == '__main__': get_course()