format files

This commit is contained in:
YuanHui
2025-02-13 17:06:43 +08:00
parent 95ae505193
commit 04f82dd9cf
6 changed files with 36 additions and 26 deletions

View File

@@ -23,6 +23,7 @@ start_course_id = int(config['DEFAULT']['start_course_id'])
headers = headers
headers['authorization'] = f'Bearer {authorization_token}'
def download_attachment(attachment, course_id_folder, max_retries):
if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"):
print("字符串为空")
@@ -64,6 +65,7 @@ def worker(queue, course_id_folder, max_retries):
download_attachment(attachment, course_id_folder, max_retries)
queue.task_done()
def get_course():
# 连接到SQLite数据库
conn = sqlite3.connect('courses.db')
@@ -202,4 +204,3 @@ def get_course():
if __name__ == '__main__':
get_course()

View File

@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
import requests
import json
import sqlite3
import configparser
import sqlite3
import requests
import json
from headers import headers
# 读取配置文件
@@ -25,14 +27,14 @@ params = {
'sort': sort,
}
def get_list():
def get_list():
response = requests.get('https://bandu-api.songy.info/v2/communities/f0495084-4c6f-4f35-b4d5-2068641a53a1/courses',
params=params, headers=headers)
# 假设response的内容就是你提供的course_list.json
course_list_json = response.text
# print(course_list_json)
json_data = json.loads(course_list_json)
@@ -72,5 +74,6 @@ def get_list():
cursor.close()
conn.close()
if __name__ == '__main__':
get_list()

View File

@@ -3,16 +3,7 @@ from course import get_course
from course_list import get_list
from markdown_generator import get_content
# This is a sample Python script.
# Press ⌃R to execute it or replace it with your code.
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
get_list()
get_course()
get_content()
# See PyCharm help at https://www.jetbrains. com/help/pycharm/

View File

@@ -65,6 +65,8 @@ def create_audio_transcriptions_table(db_path):
# 调用函数创建表
db_path = 'courses.db' # 数据库文件路径
# create_audio_transcriptions_table(db_path)
# 下载音频文件
@@ -108,10 +110,12 @@ def voice2txt(voice_path):
def save_to_db(course_id, filename, text_value):
conn = sqlite3.connect('courses.db')
cursor = conn.cursor()
cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)", (course_id, filename, text_value))
cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)",
(course_id, filename, text_value))
conn.commit()
conn.close()
# 检查数据库中是否已存在转换后的文本
def check_db_for_text(course_id, filename):
conn = sqlite3.connect('courses.db')
@@ -157,6 +161,7 @@ def process_item(item):
else:
return f"[{item['content']}]({item['attachment']['url']})\n"
def process_logseq_item(item):
if item['category'] == 'text':
return f"- {item['content']}\n"
@@ -178,13 +183,15 @@ def json_to_markdown(json_file, markdown_file, logseq=False):
logging.info(f"Writing Markdown file: {markdown_file}")
with open(markdown_file, 'w', encoding='utf-8') as md_file:
with ThreadPoolExecutor(max_workers=max_download_threads) as executor: # Use a thread pool with 5 threads
futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in data['data']]
futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in
data['data']]
for future in futures:
md_file.write(future.result()) # Write the result to the Markdown file
except Exception as e:
logging.error(f"处理JSON文件时出错{e}")
def get_content():
# 连接到SQLite数据库
conn = sqlite3.connect('courses.db')
@@ -208,7 +215,8 @@ def get_content():
json_filename = os.path.join('json', f'{course_id}.json')
# copy_json_file_name = os.path.join('data', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '')
copy_json_file_name = os.path.join('course', f'{course_id}', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '')
copy_json_file_name = os.path.join('course', f'{course_id}', 'json',
f'{course_ids_dict[course_id]}.json').replace('?', '')
# md_file_name = os.path.join('data', 'markdown', f'{course_ids_dict[course_id]}.md')
md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}.md')
if os.path.exists(json_filename):
@@ -228,6 +236,7 @@ def get_content():
else:
continue
if __name__ == '__main__':
# create_audio_transcriptions_table(db_path)
get_content()
get_content()

View File

@@ -1,17 +1,18 @@
# -*- coding: utf-8 -*-
import logging
import os
import shutil
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from pydub import AudioSegment
from pydub.silence import split_on_silence
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import logging
import threading
# 创建一个锁对象
file_write_lock = threading.Lock()
# 配置日志
def configure_logging():
logger = logging.getLogger()
@@ -29,8 +30,10 @@ def configure_logging():
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
configure_logging()
def process_audio_file(audio_file_path):
file_name_with_extension = os.path.basename(audio_file_path)
file_name_without_extension = os.path.splitext(file_name_with_extension)[0]
@@ -111,6 +114,7 @@ def process_audio_file(audio_file_path):
except Exception as e:
logging.error(f"Failed to process {file_name_with_extension}: {str(e)}")
def main():
all_files = os.listdir('media')
audio_files = [file for file in all_files if file.endswith('.wav')]
@@ -121,5 +125,6 @@ def main():
audio_file_path = os.path.join('media', audio_file)
executor.submit(process_audio_file, audio_file_path)
if __name__ == "__main__":
main()
main()

View File

@@ -2,6 +2,7 @@
from pydub import AudioSegment
from pydub.silence import split_on_silence
def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000, silence_thresh=-40):
"""
函数功能:删除音频文件中的无声部分,并将处理后的音频保存到指定输出路径
@@ -26,10 +27,10 @@ def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000,
# 将处理后的音频保存到输出文件路径
final_audio.export(output_file_path, format="wav")
if __name__ == "__main__":
if __name__ == "__main__":
# 首先使用命令行umx audio.wav来将音频拆分为
input_file_path = "vocal/this is water_umxl/vocals.wav"
output_file_path = "vocal/this is water_umxl/vocals_process.wav"
remove_silent_parts(input_file_path, output_file_path)
remove_silent_parts(input_file_path, output_file_path)