diff --git a/course.py b/course.py index 44a6bde..7e49af5 100755 --- a/course.py +++ b/course.py @@ -23,6 +23,7 @@ start_course_id = int(config['DEFAULT']['start_course_id']) headers = headers headers['authorization'] = f'Bearer {authorization_token}' + def download_attachment(attachment, course_id_folder, max_retries): if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"): print("字符串为空") @@ -64,6 +65,7 @@ def worker(queue, course_id_folder, max_retries): download_attachment(attachment, course_id_folder, max_retries) queue.task_done() + def get_course(): # 连接到SQLite数据库 conn = sqlite3.connect('courses.db') @@ -202,4 +204,3 @@ def get_course(): if __name__ == '__main__': get_course() - diff --git a/course_list.py b/course_list.py index 993cb3d..1ec99ea 100755 --- a/course_list.py +++ b/course_list.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- -import requests -import json -import sqlite3 import configparser +import sqlite3 + +import requests + +import json from headers import headers # 读取配置文件 @@ -25,14 +27,14 @@ params = { 'sort': sort, } -def get_list(): +def get_list(): response = requests.get('https://bandu-api.songy.info/v2/communities/f0495084-4c6f-4f35-b4d5-2068641a53a1/courses', params=params, headers=headers) # 假设response的内容就是你提供的course_list.json course_list_json = response.text - + # print(course_list_json) json_data = json.loads(course_list_json) @@ -72,5 +74,6 @@ def get_list(): cursor.close() conn.close() + if __name__ == '__main__': get_list() diff --git a/main.py b/main.py index fe29a64..411f60f 100644 --- a/main.py +++ b/main.py @@ -3,16 +3,7 @@ from course import get_course from course_list import get_list from markdown_generator import get_content -# This is a sample Python script. - -# Press ⌃R to execute it or replace it with your code. -# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings. - - -# Press the green button in the gutter to run the script. if __name__ == '__main__': get_list() get_course() get_content() - -# See PyCharm help at https://www.jetbrains. com/help/pycharm/ diff --git a/markdown_generator.py b/markdown_generator.py index 801a97a..9d399db 100644 --- a/markdown_generator.py +++ b/markdown_generator.py @@ -65,6 +65,8 @@ def create_audio_transcriptions_table(db_path): # 调用函数创建表 db_path = 'courses.db' # 数据库文件路径 + + # create_audio_transcriptions_table(db_path) # 下载音频文件 @@ -108,10 +110,12 @@ def voice2txt(voice_path): def save_to_db(course_id, filename, text_value): conn = sqlite3.connect('courses.db') cursor = conn.cursor() - cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)", (course_id, filename, text_value)) + cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)", + (course_id, filename, text_value)) conn.commit() conn.close() + # 检查数据库中是否已存在转换后的文本 def check_db_for_text(course_id, filename): conn = sqlite3.connect('courses.db') @@ -157,6 +161,7 @@ def process_item(item): else: return f"[{item['content']}]({item['attachment']['url']})\n" + def process_logseq_item(item): if item['category'] == 'text': return f"- {item['content']}\n" @@ -178,13 +183,15 @@ def json_to_markdown(json_file, markdown_file, logseq=False): logging.info(f"Writing Markdown file: {markdown_file}") with open(markdown_file, 'w', encoding='utf-8') as md_file: with ThreadPoolExecutor(max_workers=max_download_threads) as executor: # Use a thread pool with 5 threads - futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in data['data']] + futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in + data['data']] for future in futures: md_file.write(future.result()) # Write the result to the Markdown file except Exception as e: logging.error(f"处理JSON文件时出错:{e}") + def get_content(): # 连接到SQLite数据库 conn = sqlite3.connect('courses.db') @@ -208,7 +215,8 @@ def get_content(): json_filename = os.path.join('json', f'{course_id}.json') # copy_json_file_name = os.path.join('data', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '?') - copy_json_file_name = os.path.join('course', f'{course_id}', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '?') + copy_json_file_name = os.path.join('course', f'{course_id}', 'json', + f'{course_ids_dict[course_id]}.json').replace('?', '?') # md_file_name = os.path.join('data', 'markdown', f'{course_ids_dict[course_id]}.md') md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}.md') if os.path.exists(json_filename): @@ -228,6 +236,7 @@ def get_content(): else: continue + if __name__ == '__main__': # create_audio_transcriptions_table(db_path) - get_content() \ No newline at end of file + get_content() diff --git a/video_voice_process.py b/video_voice_process.py index d15d84d..c451522 100644 --- a/video_voice_process.py +++ b/video_voice_process.py @@ -1,17 +1,18 @@ # -*- coding: utf-8 -*- +import logging +import os import shutil +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed import requests from pydub import AudioSegment from pydub.silence import split_on_silence -import os -from concurrent.futures import ThreadPoolExecutor, as_completed -import logging -import threading # 创建一个锁对象 file_write_lock = threading.Lock() + # 配置日志 def configure_logging(): logger = logging.getLogger() @@ -29,8 +30,10 @@ def configure_logging(): console_handler.setFormatter(console_formatter) logger.addHandler(console_handler) + configure_logging() + def process_audio_file(audio_file_path): file_name_with_extension = os.path.basename(audio_file_path) file_name_without_extension = os.path.splitext(file_name_with_extension)[0] @@ -111,6 +114,7 @@ def process_audio_file(audio_file_path): except Exception as e: logging.error(f"Failed to process {file_name_with_extension}: {str(e)}") + def main(): all_files = os.listdir('media') audio_files = [file for file in all_files if file.endswith('.wav')] @@ -121,5 +125,6 @@ def main(): audio_file_path = os.path.join('media', audio_file) executor.submit(process_audio_file, audio_file_path) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/vocal_getter.py b/vocal_getter.py index 75f5175..889a65b 100644 --- a/vocal_getter.py +++ b/vocal_getter.py @@ -2,6 +2,7 @@ from pydub import AudioSegment from pydub.silence import split_on_silence + def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000, silence_thresh=-40): """ 函数功能:删除音频文件中的无声部分,并将处理后的音频保存到指定输出路径 @@ -26,10 +27,10 @@ def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000, # 将处理后的音频保存到输出文件路径 final_audio.export(output_file_path, format="wav") -if __name__ == "__main__": +if __name__ == "__main__": # 首先使用命令行umx audio.wav来将音频拆分为 input_file_path = "vocal/this is water_umxl/vocals.wav" output_file_path = "vocal/this is water_umxl/vocals_process.wav" - remove_silent_parts(input_file_path, output_file_path) \ No newline at end of file + remove_silent_parts(input_file_path, output_file_path)