diff --git a/courses/Attachment.py b/courses/Attachment.py index edca1e1..1595fbd 100644 --- a/courses/Attachment.py +++ b/courses/Attachment.py @@ -1,4 +1,10 @@ class Attachment: + id: None + course_id: None + name: "" + content: "" + url: "" + def __init__(self, attachment_id, course_id, name, content, url): self.id = attachment_id self.course_id = course_id diff --git a/courses/Course.py b/courses/Course.py index 529f2ae..5c92a05 100644 --- a/courses/Course.py +++ b/courses/Course.py @@ -2,16 +2,23 @@ from courses.Attachment import Attachment class Course: - def __init__(self, course_id, title, attachments=None): + id: None + title: "" + description: "" + duration: 0 + attachments: [] + + def __init__(self, course_id, title, description, attachments=None): self.id = course_id self.title = title + self.description = description self.attachments = attachments if attachments else [] def add_attachment(self, attachment): self.attachments.append(attachment) def __repr__(self): - return f"Course(id={self.id}, title={self.title}, attachments={self.attachments})" + return f"Course(id={self.id}, title={self.title}, description={self.description}, attachments={self.attachments})" # 使用示例 diff --git a/courses/course_database.db b/courses/course_database.db new file mode 100644 index 0000000..86153e9 Binary files /dev/null and b/courses/course_database.db differ diff --git a/courses/create_table.py b/courses/create_table.py new file mode 100644 index 0000000..aecc530 --- /dev/null +++ b/courses/create_table.py @@ -0,0 +1,73 @@ +import sqlite3 + +conn = sqlite3.connect('course_database.db') +print("数据库打开成功") +c = conn.cursor() + +c.execute('''CREATE TABLE IF NOT EXISTS COURSE +( + ID INTEGER PRIMARY KEY NOT NULL, + TITLE VARCHAR(200) NOT NULL, + DESCRIPTION VARCHAR(500) DEFAULT '' NOT NULL, + DURATION INTEGER DEFAULT 0 NOT NULL, + CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL +);''') + +c.execute('''CREATE TABLE IF NOT EXISTS COURSE_ATTACHMENT +( + ID INTEGER PRIMARY KEY NOT NULL, + COURSE_ID INTEGER DEFAULT 0 NOT NULL, + CONTENT VARCHAR(500) DEFAULT '' NOT NULL, + CATEGORY VARCHAR(20) DEFAULT '' NOT NULL, + "ORDER" INTEGER DEFAULT 0 NOT NULL, + ATTACHMENT_ID INTEGER DEFAULT 0 NOT NULL, + DURATION INTEGER DEFAULT 0 NOT NULL, + URL TEXT DEFAULT '' NOT NULL, + MIME_TYPE VARCHAR(20) DEFAULT '' NOT NULL, + CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL +);''') + +c.execute('''CREATE TABLE IF NOT EXISTS JSON_DATA +( + ID INTEGER PRIMARY KEY NOT NULL, + JSON_DATA TEXT DEFAULT '' NOT NULL, + "TYPE" VARCHAR(20) DEFAULT '' NOT NULL, + CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL +);''') + +print("数据表创建成功") +conn.commit() +conn.close() + +''' +{ + "id": 8157, + "course_id": 488, + "content": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3", + "category": "audio", + "attachment_id": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d", + "order": 2, + "duration": 48290, + "created_at": "2024-05-23T07:23:23.082Z", + "updated_at": "2024-05-23T07:24:26.551Z", + "attachment": { + "id": 43229, + "attachment_id": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d", + "name": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3", + "thumb": "", + "raw": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3", + "size": 386349, + "duration": 48290, + "mime_type": "audio/mp3", + "location": "aws_s3", + "created_at": "2024-05-23T07:23:23.059Z", + "updated_at": "2024-05-23T07:24:26.546Z", + "url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3", + "raw_url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3", + "thumb_url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3" + } +} +''' diff --git a/courses/parse_course.py b/courses/parse_course.py index 267a400..849a8d6 100644 --- a/courses/parse_course.py +++ b/courses/parse_course.py @@ -1,7 +1,10 @@ import json import logging import os +import sqlite3 from pathlib import Path +import random +from time import sleep import requests from pydub import AudioSegment @@ -90,7 +93,8 @@ def get_audio(audio_data): for file in temp_mp3_files: file.unlink() - logging.info(f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.") + logging.info( + f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.") # 获取全部附件 @@ -99,16 +103,60 @@ def get_all_attachments(attachment_json_data): attachments = [item for item in data["data"]["course_contents"]] attachmentlist = [] for attachment in attachments: - attachment = Attachment(attachments[0]["id"], attachments[0]["course_id"], attachments[0]["content"], attachments[0]["content"], attachments[0]["attachment"].get("url")) + attachment = Attachment(attachments[0]["id"], attachments[0]["course_id"], attachments[0]["content"], + attachments[0]["content"], attachments[0]["attachment"].get("url")) print(attachment) attachmentlist.append(attachment) print(attachments) return attachmentlist +def query_all_course(): + conn = sqlite3.connect('course_database.db') + print("数据库打开成功") + c = conn.cursor() + all_course_json = c.execute('SELECT JSON from JSON_DATA jd WHERE "TYPE" = "ALL"').fetchall() + return all_course_json[0][0] + + +def query_course_by_id(course_id): + conn = sqlite3.connect('course_database.db') + print("数据库打开成功") + c = conn.cursor() + all_course_json = c.execute('SELECT JSON from JSON_DATA jd WHERE ID = ' + str(course_id)).fetchall() + # return re.sub(r'[\r\n]', '', str(all_course_json[0])) + return all_course_json[0][0] + + +def save_course_json(ids): + conn = sqlite3.connect('course_database.db') + print("数据库打开成功") + c = conn.cursor() + for id in ids: + token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI" + json_data = request_date(ids[0], token) + + # 将JSON数据转换为字符串 + json_string = json.dumps(str(json_data)) + + # 插入JSON字符串到SQLite表中 + c.execute("INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) VALUES (?,?,?)", (id,json_string,"COURSE")) + #c.execute( + # 'INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) values (' + str(id) + ',' + str(json_data) + ',' + "COURSE" + ')') + conn.commit() + print("insert one "+str(id) + json_string) + secs = random.normalvariate(5, 0.4) + if secs <= 0: + secs = 1 # 太小则重置为平均值 + sleep(secs) + conn.close() + if __name__ == '__main__': ids = get_course_id('all/course.json') token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI" - json_data = request_date(ids[0], token) - #get_audio(json_data) + # json_data = request_date(ids[0], token) + json_data = query_course_by_id(488) + # get_audio(json_data) + print(json_data) get_all_attachments(json_data) + save_course_json(ids) diff --git a/courses/tables.sql b/courses/tables.sql new file mode 100644 index 0000000..b7ca618 --- /dev/null +++ b/courses/tables.sql @@ -0,0 +1,35 @@ +CREATE TABLE IF NOT EXISTS COURSE +( + ID INTEGER PRIMARY KEY NOT NULL, + TITLE VARCHAR(200) NOT NULL, + DESCRIPTION VARCHAR(500) DEFAULT '' NOT NULL, + DURATION INTEGER DEFAULT 0 NOT NULL, + CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL +); + + +CREATE TABLE IF NOT EXISTS COURSE_ATTACHMENT +( + ID INTEGER PRIMARY KEY NOT NULL, + COURSE_ID INTEGER DEFAULT 0 NOT NULL, + CONTENT VARCHAR(500) DEFAULT '' NOT NULL, + CATEGORY VARCHAR(20) DEFAULT '' NOT NULL, + "ORDER" INTEGER DEFAULT 0 NOT NULL, + ATTACHMENT_ID INTEGER DEFAULT 0 NOT NULL, + DURATION INTEGER DEFAULT 0 NOT NULL, + URL TEXT DEFAULT '' NOT NULL, + MIME_TYPE VARCHAR(20) DEFAULT '' NOT NULL, + CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL +); + +CREATE TABLE IF NOT EXISTS JSON_DATA +( + ID INTEGER PRIMARY KEY NOT NULL, + JSON TEXT DEFAULT '' NOT NULL, + "TYPE" VARCHAR(20) DEFAULT '' NOT NULL, + REMARK VARCHAR(200) DEFAULT '' NOT NULL, + CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL +) \ No newline at end of file