format files

2025-02-13 17:06:43 +08:00
parent 95ae505193
commit 04f82dd9cf
6 changed files with 36 additions and 26 deletions
--- a/course.py
+++ b/course.py
@@ -23,6 +23,7 @@ start_course_id = int(config['DEFAULT']['start_course_id'])
 headers = headers
 headers['authorization'] = f'Bearer {authorization_token}'

+
 def download_attachment(attachment, course_id_folder, max_retries):
    if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"):
        print("字符串为空")
@@ -64,6 +65,7 @@ def worker(queue, course_id_folder, max_retries):
        download_attachment(attachment, course_id_folder, max_retries)
        queue.task_done()

+
 def get_course():
    # 连接到SQLite数据库
    conn = sqlite3.connect('courses.db')
@@ -202,4 +204,3 @@ def get_course():

 if __name__ == '__main__':
    get_course()
-
--- a/course_list.py
+++ b/course_list.py
@@ -1,8 +1,10 @@
 # -*- coding: utf-8 -*-
-import requests
-import json
-import sqlite3
 import configparser
+import sqlite3
+
+import requests
+
+import json
 from headers import headers

 # 读取配置文件
@@ -25,14 +27,14 @@ params = {
    'sort': sort,
 }

-def get_list():

+def get_list():
    response = requests.get('https://bandu-api.songy.info/v2/communities/f0495084-4c6f-4f35-b4d5-2068641a53a1/courses',
                            params=params, headers=headers)

    # 假设response的内容就是你提供的course_list.json
    course_list_json = response.text
-    
+
    # print(course_list_json)

    json_data = json.loads(course_list_json)
@@ -72,5 +74,6 @@ def get_list():
    cursor.close()
    conn.close()

+
 if __name__ == '__main__':
    get_list()
--- a/main.py
+++ b/main.py
@@ -3,16 +3,7 @@ from course import get_course
 from course_list import get_list
 from markdown_generator import get_content

-# This is a sample Python script.
-
-# Press ⌃R to execute it or replace it with your code.
-# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
-
-
-# Press the green button in the gutter to run the script.
 if __name__ == '__main__':
    get_list()
    get_course()
    get_content()
-
-# See PyCharm help at https://www.jetbrains. com/help/pycharm/
--- a/markdown_generator.py
+++ b/markdown_generator.py
@@ -65,6 +65,8 @@ def create_audio_transcriptions_table(db_path):

 # 调用函数创建表
 db_path = 'courses.db'  # 数据库文件路径
+
+
 # create_audio_transcriptions_table(db_path)

 # 下载音频文件
@@ -108,10 +110,12 @@ def voice2txt(voice_path):
 def save_to_db(course_id, filename, text_value):
    conn = sqlite3.connect('courses.db')
    cursor = conn.cursor()
-    cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)", (course_id, filename, text_value))
+    cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)",
+                   (course_id, filename, text_value))
    conn.commit()
    conn.close()

+
 # 检查数据库中是否已存在转换后的文本
 def check_db_for_text(course_id, filename):
    conn = sqlite3.connect('courses.db')
@@ -157,6 +161,7 @@ def process_item(item):
    else:
        return f"[{item['content']}]({item['attachment']['url']})\n"

+
 def process_logseq_item(item):
    if item['category'] == 'text':
        return f"- {item['content']}\n"
@@ -178,13 +183,15 @@ def json_to_markdown(json_file, markdown_file, logseq=False):
        logging.info(f"Writing Markdown file: {markdown_file}")
        with open(markdown_file, 'w', encoding='utf-8') as md_file:
            with ThreadPoolExecutor(max_workers=max_download_threads) as executor:  # Use a thread pool with 5 threads
-                futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in data['data']]
+                futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in
+                           data['data']]
                for future in futures:
                    md_file.write(future.result())  # Write the result to the Markdown file

    except Exception as e:
        logging.error(f"处理JSON文件时出错：{e}")

+
 def get_content():
    # 连接到SQLite数据库
    conn = sqlite3.connect('courses.db')
@@ -208,7 +215,8 @@ def get_content():

        json_filename = os.path.join('json', f'{course_id}.json')
        # copy_json_file_name = os.path.join('data', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '？')
-        copy_json_file_name = os.path.join('course', f'{course_id}', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '？')
+        copy_json_file_name = os.path.join('course', f'{course_id}', 'json',
+                                           f'{course_ids_dict[course_id]}.json').replace('?', '？')
        # md_file_name = os.path.join('data', 'markdown', f'{course_ids_dict[course_id]}.md')
        md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}.md')
        if os.path.exists(json_filename):
@@ -228,6 +236,7 @@ def get_content():
        else:
            continue

+
 if __name__ == '__main__':
    # create_audio_transcriptions_table(db_path)
-    get_content()
+    get_content()
--- a/video_voice_process.py
+++ b/video_voice_process.py
@@ -1,17 +1,18 @@
 # -*- coding: utf-8 -*-
+import logging
+import os
 import shutil
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed

 import requests
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
-import os
-from concurrent.futures import ThreadPoolExecutor, as_completed
-import logging
-import threading

 # 创建一个锁对象
 file_write_lock = threading.Lock()

+
 # 配置日志
 def configure_logging():
    logger = logging.getLogger()
@@ -29,8 +30,10 @@ def configure_logging():
    console_handler.setFormatter(console_formatter)
    logger.addHandler(console_handler)

+
 configure_logging()

+
 def process_audio_file(audio_file_path):
    file_name_with_extension = os.path.basename(audio_file_path)
    file_name_without_extension = os.path.splitext(file_name_with_extension)[0]
@@ -111,6 +114,7 @@ def process_audio_file(audio_file_path):
    except Exception as e:
        logging.error(f"Failed to process {file_name_with_extension}: {str(e)}")

+
 def main():
    all_files = os.listdir('media')
    audio_files = [file for file in all_files if file.endswith('.wav')]
@@ -121,5 +125,6 @@ def main():
            audio_file_path = os.path.join('media', audio_file)
            executor.submit(process_audio_file, audio_file_path)

+
 if __name__ == "__main__":
-    main()
+    main()
--- a/vocal_getter.py
+++ b/vocal_getter.py
@@ -2,6 +2,7 @@
 from pydub import AudioSegment
 from pydub.silence import split_on_silence

+
 def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000, silence_thresh=-40):
    """
    函数功能：删除音频文件中的无声部分，并将处理后的音频保存到指定输出路径
@@ -26,10 +27,10 @@ def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000,
    # 将处理后的音频保存到输出文件路径
    final_audio.export(output_file_path, format="wav")

-if __name__ == "__main__":

+if __name__ == "__main__":
    # 首先使用命令行umx audio.wav来将音频拆分为
    input_file_path = "vocal/this is water_umxl/vocals.wav"
    output_file_path = "vocal/this is water_umxl/vocals_process.wav"

-    remove_silent_parts(input_file_path, output_file_path)
+    remove_silent_parts(input_file_path, output_file_path)