change file save path
This commit is contained in:
10
courses/Attachment.py
Normal file
10
courses/Attachment.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
class Attachment:
|
||||||
|
def __init__(self, attachment_id, course_id, name, content, url):
|
||||||
|
self.id = attachment_id
|
||||||
|
self.course_id = course_id
|
||||||
|
self.name = name
|
||||||
|
self.content = content
|
||||||
|
self.url = url
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"Attachment(id={self.id}, course_id={self.course_id}, name={self.name}, url={self.url})"
|
||||||
27
courses/Course.py
Normal file
27
courses/Course.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from courses.Attachment import Attachment
|
||||||
|
|
||||||
|
|
||||||
|
class Course:
|
||||||
|
def __init__(self, course_id, title, attachments=None):
|
||||||
|
self.id = course_id
|
||||||
|
self.title = title
|
||||||
|
self.attachments = attachments if attachments else []
|
||||||
|
|
||||||
|
def add_attachment(self, attachment):
|
||||||
|
self.attachments.append(attachment)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"Course(id={self.id}, title={self.title}, attachments={self.attachments})"
|
||||||
|
|
||||||
|
|
||||||
|
# 使用示例
|
||||||
|
# 创建附件实例
|
||||||
|
attachment1 = Attachment(attachment_id=1, course_id=101, name='Lesson 1', url='http://example.com/lesson1.mp3')
|
||||||
|
attachment2 = Attachment(attachment_id=2, course_id=101, name='Lesson 2', url='http://example.com/lesson2.mp3')
|
||||||
|
|
||||||
|
# 创建课程实例,并添加附件
|
||||||
|
course = Course(course_id=101, title='Introduction to Python')
|
||||||
|
course.add_attachment(attachment1)
|
||||||
|
course.add_attachment(attachment2)
|
||||||
|
|
||||||
|
print(course)
|
||||||
20678
courses/all/course.json
Normal file
20678
courses/all/course.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -7,19 +9,16 @@ from pydub import AudioSegment
|
|||||||
|
|
||||||
def get_course_id(date_file_path):
|
def get_course_id(date_file_path):
|
||||||
with open(date_file_path, encoding="UTF-8") as file:
|
with open(date_file_path, encoding="UTF-8") as file:
|
||||||
# courses = json.load(f)
|
course_json_data = file.read()
|
||||||
json_data = file.read()
|
|
||||||
# 解析JSON数据
|
# 解析JSON数据
|
||||||
data = json.loads(json_data)
|
data = json.loads(course_json_data)
|
||||||
|
|
||||||
# 初始化一个空数组来存储id值
|
# 初始化一个空数组来存储id值
|
||||||
ids = []
|
course_ids = []
|
||||||
|
|
||||||
# 遍历items列表
|
# 遍历items列表
|
||||||
for item in data['data']['items']:
|
for item in data['data']['items']:
|
||||||
# 提取id并添加到数组中
|
# 提取id并添加到数组中
|
||||||
ids.append(item['id'])
|
course_ids.append(item['id'])
|
||||||
return ids
|
return course_ids
|
||||||
|
|
||||||
|
|
||||||
# 通过request来爬取课程信息json数据
|
# 通过request来爬取课程信息json数据
|
||||||
@@ -68,24 +67,32 @@ def get_audio(audio_data):
|
|||||||
# 准备下载和合并MP3文件
|
# 准备下载和合并MP3文件
|
||||||
output_title = data["data"]["title"].replace(".", "_").replace("/", "_") # 替换文件名中不允许的字符
|
output_title = data["data"]["title"].replace(".", "_").replace("/", "_") # 替换文件名中不允许的字符
|
||||||
output_filename = Path(f"{output_title}.mp3")
|
output_filename = Path(f"{output_title}.mp3")
|
||||||
|
save_file_path = os.path.join(output_title, output_filename)
|
||||||
temp_mp3_files = [Path(f"{idx}.mp3") for idx in range(len(mp3_urls))]
|
temp_mp3_files = [Path(f"{idx}.mp3") for idx in range(len(mp3_urls))]
|
||||||
|
|
||||||
|
# 创建对应title的文件夹
|
||||||
|
if not os.path.exists(output_title):
|
||||||
|
os.makedirs(output_title)
|
||||||
|
logging.info("Folder created")
|
||||||
|
else:
|
||||||
|
logging.info("Folder already exists")
|
||||||
|
|
||||||
# 下载每个MP3文件
|
# 下载每个MP3文件
|
||||||
for url, filename in zip(mp3_urls, temp_mp3_files):
|
for url, filename in zip(mp3_urls, temp_mp3_files):
|
||||||
download_mp3(url, filename)
|
download_mp3(url, filename)
|
||||||
|
|
||||||
# 合并MP3文件
|
# 合并MP3文件
|
||||||
merge_mp3_files(temp_mp3_files, output_filename)
|
merge_mp3_files(temp_mp3_files, save_file_path)
|
||||||
|
|
||||||
# 清理临时文件
|
# 清理临时文件
|
||||||
for file in temp_mp3_files:
|
for file in temp_mp3_files:
|
||||||
file.unlink()
|
file.unlink()
|
||||||
|
|
||||||
print(f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.")
|
logging.info(f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
ids = get_course_id('course_data.json')
|
ids = get_course_id('all/course.json')
|
||||||
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI"
|
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI"
|
||||||
json_data = request_date(ids[0], token)
|
json_data = request_date(ids[0], token)
|
||||||
get_audio(json_data)
|
get_audio(json_data)
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
import json
|
|
||||||
|
|
||||||
with open('course_data.json', encoding="UTF-8") as file:
|
|
||||||
# courses = json.load(f)
|
|
||||||
json_data = file.read()
|
|
||||||
# 解析JSON数据
|
|
||||||
data = json.loads(json_data)
|
|
||||||
|
|
||||||
# 初始化一个空数组来存储id值
|
|
||||||
ids = []
|
|
||||||
|
|
||||||
# 遍历items列表
|
|
||||||
for item in data['data']['items']:
|
|
||||||
# 提取id并添加到数组中
|
|
||||||
ids.append(item['id'])
|
|
||||||
|
|
||||||
# 打印结果
|
|
||||||
print(ids)
|
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
requests~=2.31.0
|
requests>=2.31.0
|
||||||
ipython~=8.24.0
|
ipython~=8.24.0
|
||||||
Scrapy~=2.11.2
|
Scrapy~=2.11.2
|
||||||
|
pydub~=0.25.1
|
||||||
20690
songyi/courses.json
20690
songyi/courses.json
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user