save all the course json date to database
This commit is contained in:
@@ -1,4 +1,10 @@
|
|||||||
class Attachment:
|
class Attachment:
|
||||||
|
id: None
|
||||||
|
course_id: None
|
||||||
|
name: ""
|
||||||
|
content: ""
|
||||||
|
url: ""
|
||||||
|
|
||||||
def __init__(self, attachment_id, course_id, name, content, url):
|
def __init__(self, attachment_id, course_id, name, content, url):
|
||||||
self.id = attachment_id
|
self.id = attachment_id
|
||||||
self.course_id = course_id
|
self.course_id = course_id
|
||||||
|
|||||||
@@ -2,16 +2,23 @@ from courses.Attachment import Attachment
|
|||||||
|
|
||||||
|
|
||||||
class Course:
|
class Course:
|
||||||
def __init__(self, course_id, title, attachments=None):
|
id: None
|
||||||
|
title: ""
|
||||||
|
description: ""
|
||||||
|
duration: 0
|
||||||
|
attachments: []
|
||||||
|
|
||||||
|
def __init__(self, course_id, title, description, attachments=None):
|
||||||
self.id = course_id
|
self.id = course_id
|
||||||
self.title = title
|
self.title = title
|
||||||
|
self.description = description
|
||||||
self.attachments = attachments if attachments else []
|
self.attachments = attachments if attachments else []
|
||||||
|
|
||||||
def add_attachment(self, attachment):
|
def add_attachment(self, attachment):
|
||||||
self.attachments.append(attachment)
|
self.attachments.append(attachment)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"Course(id={self.id}, title={self.title}, attachments={self.attachments})"
|
return f"Course(id={self.id}, title={self.title}, description={self.description}, attachments={self.attachments})"
|
||||||
|
|
||||||
|
|
||||||
# 使用示例
|
# 使用示例
|
||||||
|
|||||||
BIN
courses/course_database.db
Normal file
BIN
courses/course_database.db
Normal file
Binary file not shown.
73
courses/create_table.py
Normal file
73
courses/create_table.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
import sqlite3
|
||||||
|
|
||||||
|
conn = sqlite3.connect('course_database.db')
|
||||||
|
print("数据库打开成功")
|
||||||
|
c = conn.cursor()
|
||||||
|
|
||||||
|
c.execute('''CREATE TABLE IF NOT EXISTS COURSE
|
||||||
|
(
|
||||||
|
ID INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
TITLE VARCHAR(200) NOT NULL,
|
||||||
|
DESCRIPTION VARCHAR(500) DEFAULT '' NOT NULL,
|
||||||
|
DURATION INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
|
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
|
);''')
|
||||||
|
|
||||||
|
c.execute('''CREATE TABLE IF NOT EXISTS COURSE_ATTACHMENT
|
||||||
|
(
|
||||||
|
ID INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
COURSE_ID INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
CONTENT VARCHAR(500) DEFAULT '' NOT NULL,
|
||||||
|
CATEGORY VARCHAR(20) DEFAULT '' NOT NULL,
|
||||||
|
"ORDER" INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
ATTACHMENT_ID INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
DURATION INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
URL TEXT DEFAULT '' NOT NULL,
|
||||||
|
MIME_TYPE VARCHAR(20) DEFAULT '' NOT NULL,
|
||||||
|
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
|
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
|
);''')
|
||||||
|
|
||||||
|
c.execute('''CREATE TABLE IF NOT EXISTS JSON_DATA
|
||||||
|
(
|
||||||
|
ID INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
JSON_DATA TEXT DEFAULT '' NOT NULL,
|
||||||
|
"TYPE" VARCHAR(20) DEFAULT '' NOT NULL,
|
||||||
|
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
|
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
|
);''')
|
||||||
|
|
||||||
|
print("数据表创建成功")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
'''
|
||||||
|
{
|
||||||
|
"id": 8157,
|
||||||
|
"course_id": 488,
|
||||||
|
"content": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
|
||||||
|
"category": "audio",
|
||||||
|
"attachment_id": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d",
|
||||||
|
"order": 2,
|
||||||
|
"duration": 48290,
|
||||||
|
"created_at": "2024-05-23T07:23:23.082Z",
|
||||||
|
"updated_at": "2024-05-23T07:24:26.551Z",
|
||||||
|
"attachment": {
|
||||||
|
"id": 43229,
|
||||||
|
"attachment_id": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d",
|
||||||
|
"name": "a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
|
||||||
|
"thumb": "",
|
||||||
|
"raw": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
|
||||||
|
"size": 386349,
|
||||||
|
"duration": 48290,
|
||||||
|
"mime_type": "audio/mp3",
|
||||||
|
"location": "aws_s3",
|
||||||
|
"created_at": "2024-05-23T07:23:23.059Z",
|
||||||
|
"updated_at": "2024-05-23T07:24:26.546Z",
|
||||||
|
"url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
|
||||||
|
"raw_url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3",
|
||||||
|
"thumb_url": "https://xuexi-courses-storage.firesbox.com/7000102069/replay/a03fd311-76c5-4bdf-a01f-6011ea60cd3d.mp3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'''
|
||||||
@@ -1,7 +1,10 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sqlite3
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import random
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
@@ -90,7 +93,8 @@ def get_audio(audio_data):
|
|||||||
for file in temp_mp3_files:
|
for file in temp_mp3_files:
|
||||||
file.unlink()
|
file.unlink()
|
||||||
|
|
||||||
logging.info(f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.")
|
logging.info(
|
||||||
|
f"All MP3 files have been downloaded, merged into {output_filename}, and temporary files have been removed.")
|
||||||
|
|
||||||
|
|
||||||
# 获取全部附件
|
# 获取全部附件
|
||||||
@@ -99,16 +103,60 @@ def get_all_attachments(attachment_json_data):
|
|||||||
attachments = [item for item in data["data"]["course_contents"]]
|
attachments = [item for item in data["data"]["course_contents"]]
|
||||||
attachmentlist = []
|
attachmentlist = []
|
||||||
for attachment in attachments:
|
for attachment in attachments:
|
||||||
attachment = Attachment(attachments[0]["id"], attachments[0]["course_id"], attachments[0]["content"], attachments[0]["content"], attachments[0]["attachment"].get("url"))
|
attachment = Attachment(attachments[0]["id"], attachments[0]["course_id"], attachments[0]["content"],
|
||||||
|
attachments[0]["content"], attachments[0]["attachment"].get("url"))
|
||||||
print(attachment)
|
print(attachment)
|
||||||
attachmentlist.append(attachment)
|
attachmentlist.append(attachment)
|
||||||
print(attachments)
|
print(attachments)
|
||||||
return attachmentlist
|
return attachmentlist
|
||||||
|
|
||||||
|
|
||||||
|
def query_all_course():
|
||||||
|
conn = sqlite3.connect('course_database.db')
|
||||||
|
print("数据库打开成功")
|
||||||
|
c = conn.cursor()
|
||||||
|
all_course_json = c.execute('SELECT JSON from JSON_DATA jd WHERE "TYPE" = "ALL"').fetchall()
|
||||||
|
return all_course_json[0][0]
|
||||||
|
|
||||||
|
|
||||||
|
def query_course_by_id(course_id):
|
||||||
|
conn = sqlite3.connect('course_database.db')
|
||||||
|
print("数据库打开成功")
|
||||||
|
c = conn.cursor()
|
||||||
|
all_course_json = c.execute('SELECT JSON from JSON_DATA jd WHERE ID = ' + str(course_id)).fetchall()
|
||||||
|
# return re.sub(r'[\r\n]', '', str(all_course_json[0]))
|
||||||
|
return all_course_json[0][0]
|
||||||
|
|
||||||
|
|
||||||
|
def save_course_json(ids):
|
||||||
|
conn = sqlite3.connect('course_database.db')
|
||||||
|
print("数据库打开成功")
|
||||||
|
c = conn.cursor()
|
||||||
|
for id in ids:
|
||||||
|
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI"
|
||||||
|
json_data = request_date(ids[0], token)
|
||||||
|
|
||||||
|
# 将JSON数据转换为字符串
|
||||||
|
json_string = json.dumps(str(json_data))
|
||||||
|
|
||||||
|
# 插入JSON字符串到SQLite表中
|
||||||
|
c.execute("INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) VALUES (?,?,?)", (id,json_string,"COURSE"))
|
||||||
|
#c.execute(
|
||||||
|
# 'INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) values (' + str(id) + ',' + str(json_data) + ',' + "COURSE" + ')')
|
||||||
|
conn.commit()
|
||||||
|
print("insert one "+str(id) + json_string)
|
||||||
|
secs = random.normalvariate(5, 0.4)
|
||||||
|
if secs <= 0:
|
||||||
|
secs = 1 # 太小则重置为平均值
|
||||||
|
sleep(secs)
|
||||||
|
conn.close()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
ids = get_course_id('all/course.json')
|
ids = get_course_id('all/course.json')
|
||||||
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI"
|
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI"
|
||||||
json_data = request_date(ids[0], token)
|
# json_data = request_date(ids[0], token)
|
||||||
#get_audio(json_data)
|
json_data = query_course_by_id(488)
|
||||||
|
# get_audio(json_data)
|
||||||
|
print(json_data)
|
||||||
get_all_attachments(json_data)
|
get_all_attachments(json_data)
|
||||||
|
save_course_json(ids)
|
||||||
|
|||||||
35
courses/tables.sql
Normal file
35
courses/tables.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS COURSE
|
||||||
|
(
|
||||||
|
ID INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
TITLE VARCHAR(200) NOT NULL,
|
||||||
|
DESCRIPTION VARCHAR(500) DEFAULT '' NOT NULL,
|
||||||
|
DURATION INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
|
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS COURSE_ATTACHMENT
|
||||||
|
(
|
||||||
|
ID INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
COURSE_ID INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
CONTENT VARCHAR(500) DEFAULT '' NOT NULL,
|
||||||
|
CATEGORY VARCHAR(20) DEFAULT '' NOT NULL,
|
||||||
|
"ORDER" INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
ATTACHMENT_ID INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
DURATION INTEGER DEFAULT 0 NOT NULL,
|
||||||
|
URL TEXT DEFAULT '' NOT NULL,
|
||||||
|
MIME_TYPE VARCHAR(20) DEFAULT '' NOT NULL,
|
||||||
|
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
|
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS JSON_DATA
|
||||||
|
(
|
||||||
|
ID INTEGER PRIMARY KEY NOT NULL,
|
||||||
|
JSON TEXT DEFAULT '' NOT NULL,
|
||||||
|
"TYPE" VARCHAR(20) DEFAULT '' NOT NULL,
|
||||||
|
REMARK VARCHAR(200) DEFAULT '' NOT NULL,
|
||||||
|
CREATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
|
UPDATED_AT TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user