save all the course json date to database

This commit is contained in:
lostecho
2024-05-26 19:35:57 +08:00
parent 6e4c28fb76
commit b7d1f71575
6 changed files with 175 additions and 6 deletions

View File

@@ -1,4 +1,10 @@
class Attachment:
id: None
course_id: None
name: ""
content: ""
url: ""
def __init__(self, attachment_id, course_id, name, content, url):
self.id = attachment_id
self.course_id = course_id

View File

@@ -2,16 +2,23 @@ from courses.Attachment import Attachment
class Course:
def __init__(self, course_id, title, attachments=None):
id: None
title: ""
description: ""
duration: 0
attachments: []
def __init__(self, course_id, title, description, attachments=None):
self.id = course_id
self.title = title
self.description = description
self.attachments = attachments if attachments else []
def add_attachment(self, attachment):
self.attachments.append(attachment)
def __repr__(self):
return f"Course(id={self.id}, title={self.title}, attachments={self.attachments})"
return f"Course(id={self.id}, title={self.title}, description={self.description}, attachments={self.attachments})"
# 使用示例

BIN
courses/course_database.db Normal file

Binary file not shown.

73
courses/create_table.py Normal file
View File

@@ -0,0 +1,73 @@
import sqlite3
conn = sqlite3.connect('course_database.db')
print("数据库打开成功")
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS COURSE
(
ID INTEGER PRIMARY KEY NOT NULL,
TITLE VARCHAR(200) NOT NULL,
DESCRIPTION VARCHAR(500) DEFAULT '' NOT NULL,
DURATION INTEGER DEFAULT 0 NOT NULL,
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
);''')
c.execute('''CREATE TABLE IF NOT EXISTS COURSE_ATTACHMENT
(
ID INTEGER PRIMARY KEY NOT NULL,
COURSE_ID INTEGER DEFAULT 0 NOT NULL,
CONTENT VARCHAR(500) DEFAULT '' NOT NULL,
CATEGORY VARCHAR(20) DEFAULT '' NOT NULL,
"ORDER" INTEGER DEFAULT 0 NOT NULL,
ATTACHMENT_ID INTEGER DEFAULT 0 NOT NULL,
DURATION INTEGER DEFAULT 0 NOT NULL,
URL TEXT DEFAULT '' NOT NULL,
MIME_TYPE VARCHAR(20) DEFAULT '' NOT NULL,
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
);''')
c.execute('''CREATE TABLE IF NOT EXISTS JSON_DATA
(
ID INTEGER PRIMARY KEY NOT NULL,
JSON_DATA TEXT DEFAULT '' NOT NULL,
"TYPE" VARCHAR(20) DEFAULT '' NOT NULL,
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
);''')
print("数据表创建成功")
conn.commit()
conn.close()
'''
{
"id": 8157,
"course_id": 488,
"content": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
"category": "audio",
"attachment_id": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d",
"order": 2,
"duration": 48290,
"created_at": "2024-05-23T07:23:23.082Z",
"updated_at": "2024-05-23T07:24:26.551Z",
"attachment": {
"id": 43229,
"attachment_id": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d",
"name": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
"thumb": "",
"raw": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
"size": 386349,
"duration": 48290,
"mime_type": "audio/mp3",
"location": "aws_s3",
"created_at": "2024-05-23T07:23:23.059Z",
"updated_at": "2024-05-23T07:24:26.546Z",
"url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
"raw_url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
"thumb_url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3"
}
}
'''

View File

@@ -1,7 +1,10 @@
import json
import logging
import os
import sqlite3
from pathlib import Path
import random
from time import sleep
import requests
from pydub import AudioSegment
@@ -90,7 +93,8 @@ def get_audio(audio_data):
for file in temp_mp3_files:
file.unlink()
logging.info(f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.")
logging.info(
f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.")
# 获取全部附件
@@ -99,16 +103,60 @@ def get_all_attachments(attachment_json_data):
attachments = [item for item in data["data"]["course_contents"]]
attachmentlist = []
for attachment in attachments:
attachment = Attachment(attachments[0]["id"], attachments[0]["course_id"], attachments[0]["content"], attachments[0]["content"], attachments[0]["attachment"].get("url"))
attachment = Attachment(attachments[0]["id"], attachments[0]["course_id"], attachments[0]["content"],
attachments[0]["content"], attachments[0]["attachment"].get("url"))
print(attachment)
attachmentlist.append(attachment)
print(attachments)
return attachmentlist
def query_all_course():
conn = sqlite3.connect('course_database.db')
print("数据库打开成功")
c = conn.cursor()
all_course_json = c.execute('SELECT JSON from JSON_DATA jd WHERE "TYPE" = "ALL"').fetchall()
return all_course_json[0][0]
def query_course_by_id(course_id):
conn = sqlite3.connect('course_database.db')
print("数据库打开成功")
c = conn.cursor()
all_course_json = c.execute('SELECT JSON from JSON_DATA jd WHERE ID = ' + str(course_id)).fetchall()
# return re.sub(r'[\r\n]', '', str(all_course_json[0]))
return all_course_json[0][0]
def save_course_json(ids):
conn = sqlite3.connect('course_database.db')
print("数据库打开成功")
c = conn.cursor()
for id in ids:
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI"
json_data = request_date(ids[0], token)
# 将JSON数据转换为字符串
json_string = json.dumps(str(json_data))
# 插入JSON字符串到SQLite表中
c.execute("INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) VALUES (?,?,?)", (id,json_string,"COURSE"))
#c.execute(
# 'INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) values (' + str(id) + ',' + str(json_data) + ',' + "COURSE" + ')')
conn.commit()
print("insert one "+str(id) + json_string)
secs = random.normalvariate(5, 0.4)
if secs <= 0:
secs = 1 # 太小则重置为平均值
sleep(secs)
conn.close()
if __name__ == '__main__':
ids = get_course_id('all/course.json')
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI"
json_data = request_date(ids[0], token)
#get_audio(json_data)
# json_data = request_date(ids[0], token)
json_data = query_course_by_id(488)
# get_audio(json_data)
print(json_data)
get_all_attachments(json_data)
save_course_json(ids)

35
courses/tables.sql Normal file
View File

@@ -0,0 +1,35 @@
CREATE TABLE IF NOT EXISTS COURSE
(
ID INTEGER PRIMARY KEY NOT NULL,
TITLE VARCHAR(200) NOT NULL,
DESCRIPTION VARCHAR(500) DEFAULT '' NOT NULL,
DURATION INTEGER DEFAULT 0 NOT NULL,
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
);
CREATE TABLE IF NOT EXISTS COURSE_ATTACHMENT
(
ID INTEGER PRIMARY KEY NOT NULL,
COURSE_ID INTEGER DEFAULT 0 NOT NULL,
CONTENT VARCHAR(500) DEFAULT '' NOT NULL,
CATEGORY VARCHAR(20) DEFAULT '' NOT NULL,
"ORDER" INTEGER DEFAULT 0 NOT NULL,
ATTACHMENT_ID INTEGER DEFAULT 0 NOT NULL,
DURATION INTEGER DEFAULT 0 NOT NULL,
URL TEXT DEFAULT '' NOT NULL,
MIME_TYPE VARCHAR(20) DEFAULT '' NOT NULL,
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
);
CREATE TABLE IF NOT EXISTS JSON_DATA
(
ID INTEGER PRIMARY KEY NOT NULL,
JSON TEXT DEFAULT '' NOT NULL,
"TYPE" VARCHAR(20) DEFAULT '' NOT NULL,
REMARK VARCHAR(200) DEFAULT '' NOT NULL,
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
)