format files
This commit is contained in:
@@ -23,6 +23,7 @@ start_course_id = int(config['DEFAULT']['start_course_id'])
|
||||
headers = headers
|
||||
headers['authorization'] = f'Bearer {authorization_token}'
|
||||
|
||||
|
||||
def download_attachment(attachment, course_id_folder, max_retries):
|
||||
if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"):
|
||||
print("字符串为空")
|
||||
@@ -64,6 +65,7 @@ def worker(queue, course_id_folder, max_retries):
|
||||
download_attachment(attachment, course_id_folder, max_retries)
|
||||
queue.task_done()
|
||||
|
||||
|
||||
def get_course():
|
||||
# 连接到SQLite数据库
|
||||
conn = sqlite3.connect('courses.db')
|
||||
@@ -202,4 +204,3 @@ def get_course():
|
||||
|
||||
if __name__ == '__main__':
|
||||
get_course()
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import requests
|
||||
import json
|
||||
import sqlite3
|
||||
import configparser
|
||||
import sqlite3
|
||||
|
||||
import requests
|
||||
|
||||
import json
|
||||
from headers import headers
|
||||
|
||||
# 读取配置文件
|
||||
@@ -25,14 +27,14 @@ params = {
|
||||
'sort': sort,
|
||||
}
|
||||
|
||||
def get_list():
|
||||
|
||||
def get_list():
|
||||
response = requests.get('https://bandu-api.songy.info/v2/communities/f0495084-4c6f-4f35-b4d5-2068641a53a1/courses',
|
||||
params=params, headers=headers)
|
||||
|
||||
# 假设response的内容就是你提供的course_list.json
|
||||
course_list_json = response.text
|
||||
|
||||
|
||||
# print(course_list_json)
|
||||
|
||||
json_data = json.loads(course_list_json)
|
||||
@@ -72,5 +74,6 @@ def get_list():
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
get_list()
|
||||
|
||||
9
main.py
9
main.py
@@ -3,16 +3,7 @@ from course import get_course
|
||||
from course_list import get_list
|
||||
from markdown_generator import get_content
|
||||
|
||||
# This is a sample Python script.
|
||||
|
||||
# Press ⌃R to execute it or replace it with your code.
|
||||
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
|
||||
|
||||
|
||||
# Press the green button in the gutter to run the script.
|
||||
if __name__ == '__main__':
|
||||
get_list()
|
||||
get_course()
|
||||
get_content()
|
||||
|
||||
# See PyCharm help at https://www.jetbrains. com/help/pycharm/
|
||||
|
||||
@@ -65,6 +65,8 @@ def create_audio_transcriptions_table(db_path):
|
||||
|
||||
# 调用函数创建表
|
||||
db_path = 'courses.db' # 数据库文件路径
|
||||
|
||||
|
||||
# create_audio_transcriptions_table(db_path)
|
||||
|
||||
# 下载音频文件
|
||||
@@ -108,10 +110,12 @@ def voice2txt(voice_path):
|
||||
def save_to_db(course_id, filename, text_value):
|
||||
conn = sqlite3.connect('courses.db')
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)", (course_id, filename, text_value))
|
||||
cursor.execute("INSERT INTO audio_transcriptions (course_id, filename, text) VALUES (?, ?, ?)",
|
||||
(course_id, filename, text_value))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# 检查数据库中是否已存在转换后的文本
|
||||
def check_db_for_text(course_id, filename):
|
||||
conn = sqlite3.connect('courses.db')
|
||||
@@ -157,6 +161,7 @@ def process_item(item):
|
||||
else:
|
||||
return f"[{item['content']}]({item['attachment']['url']})\n"
|
||||
|
||||
|
||||
def process_logseq_item(item):
|
||||
if item['category'] == 'text':
|
||||
return f"- {item['content']}\n"
|
||||
@@ -178,13 +183,15 @@ def json_to_markdown(json_file, markdown_file, logseq=False):
|
||||
logging.info(f"Writing Markdown file: {markdown_file}")
|
||||
with open(markdown_file, 'w', encoding='utf-8') as md_file:
|
||||
with ThreadPoolExecutor(max_workers=max_download_threads) as executor: # Use a thread pool with 5 threads
|
||||
futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in data['data']]
|
||||
futures = [executor.submit(process_logseq_item if logseq else process_item, item) for item in
|
||||
data['data']]
|
||||
for future in futures:
|
||||
md_file.write(future.result()) # Write the result to the Markdown file
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"处理JSON文件时出错:{e}")
|
||||
|
||||
|
||||
def get_content():
|
||||
# 连接到SQLite数据库
|
||||
conn = sqlite3.connect('courses.db')
|
||||
@@ -208,7 +215,8 @@ def get_content():
|
||||
|
||||
json_filename = os.path.join('json', f'{course_id}.json')
|
||||
# copy_json_file_name = os.path.join('data', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '?')
|
||||
copy_json_file_name = os.path.join('course', f'{course_id}', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '?')
|
||||
copy_json_file_name = os.path.join('course', f'{course_id}', 'json',
|
||||
f'{course_ids_dict[course_id]}.json').replace('?', '?')
|
||||
# md_file_name = os.path.join('data', 'markdown', f'{course_ids_dict[course_id]}.md')
|
||||
md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}.md')
|
||||
if os.path.exists(json_filename):
|
||||
@@ -228,6 +236,7 @@ def get_content():
|
||||
else:
|
||||
continue
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# create_audio_transcriptions_table(db_path)
|
||||
get_content()
|
||||
get_content()
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import requests
|
||||
from pydub import AudioSegment
|
||||
from pydub.silence import split_on_silence
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import logging
|
||||
import threading
|
||||
|
||||
# 创建一个锁对象
|
||||
file_write_lock = threading.Lock()
|
||||
|
||||
|
||||
# 配置日志
|
||||
def configure_logging():
|
||||
logger = logging.getLogger()
|
||||
@@ -29,8 +30,10 @@ def configure_logging():
|
||||
console_handler.setFormatter(console_formatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
|
||||
configure_logging()
|
||||
|
||||
|
||||
def process_audio_file(audio_file_path):
|
||||
file_name_with_extension = os.path.basename(audio_file_path)
|
||||
file_name_without_extension = os.path.splitext(file_name_with_extension)[0]
|
||||
@@ -111,6 +114,7 @@ def process_audio_file(audio_file_path):
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to process {file_name_with_extension}: {str(e)}")
|
||||
|
||||
|
||||
def main():
|
||||
all_files = os.listdir('media')
|
||||
audio_files = [file for file in all_files if file.endswith('.wav')]
|
||||
@@ -121,5 +125,6 @@ def main():
|
||||
audio_file_path = os.path.join('media', audio_file)
|
||||
executor.submit(process_audio_file, audio_file_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from pydub import AudioSegment
|
||||
from pydub.silence import split_on_silence
|
||||
|
||||
|
||||
def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000, silence_thresh=-40):
|
||||
"""
|
||||
函数功能:删除音频文件中的无声部分,并将处理后的音频保存到指定输出路径
|
||||
@@ -26,10 +27,10 @@ def remove_silent_parts(input_file_path, output_file_path, min_silence_len=1000,
|
||||
# 将处理后的音频保存到输出文件路径
|
||||
final_audio.export(output_file_path, format="wav")
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 首先使用命令行umx audio.wav来将音频拆分为
|
||||
input_file_path = "vocal/this is water_umxl/vocals.wav"
|
||||
output_file_path = "vocal/this is water_umxl/vocals_process.wav"
|
||||
|
||||
remove_silent_parts(input_file_path, output_file_path)
|
||||
remove_silent_parts(input_file_path, output_file_path)
|
||||
|
||||
Reference in New Issue
Block a user