diff --git a/courses/course_database.db b/courses/course_database.db index 86153e9..90f74a6 100644 Binary files a/courses/course_database.db and b/courses/course_database.db differ diff --git a/courses/parse_course.py b/courses/parse_course.py index 849a8d6..1b9097b 100644 --- a/courses/parse_course.py +++ b/courses/parse_course.py @@ -2,6 +2,7 @@ import json import logging import os import sqlite3 +from datetime import datetime from pathlib import Path import random from time import sleep @@ -31,7 +32,7 @@ def request_date(course_id, request_token): url = 'https://bandu-api.songy.info/v2/courses/' + str(course_id) + '?expand=contents' headers = {"Authorization": "Bearer " + request_token} course_json = requests.get(url, headers=headers) - return course_json.content + return course_json.json() # 下载MP3文件并按顺序合并 @@ -127,25 +128,24 @@ def query_course_by_id(course_id): # return re.sub(r'[\r\n]', '', str(all_course_json[0])) return all_course_json[0][0] - +# 保存课程json数据文件到数据库 def save_course_json(ids): conn = sqlite3.connect('course_database.db') print("数据库打开成功") c = conn.cursor() for id in ids: + if id > 7: + continue token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI" - json_data = request_date(ids[0], token) - - # 将JSON数据转换为字符串 - json_string = json.dumps(str(json_data)) + json_data = request_date(id, token) + title = json_data["data"]["title"].replace(".", "_").replace("/", "_") + created_at = datetime.fromisoformat(json_data["data"]["created_at"].replace('Z', '+00:00')) + updated_at = datetime.fromisoformat(json_data["data"]["updated_at"].replace('Z', '+00:00')) # 插入JSON字符串到SQLite表中 - c.execute("INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) VALUES (?,?,?)", (id,json_string,"COURSE")) - #c.execute( - # 'INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE) values (' + str(id) + ',' + str(json_data) + ',' + "COURSE" + ')') + c.execute("INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE,REMARK,CREATED_AT,UPDATED_AT) VALUES (?,?,?,?,?,?)", (id,json.dumps(json_data),"COURSE",title,created_at,updated_at)) conn.commit() - print("insert one "+str(id) + json_string) - secs = random.normalvariate(5, 0.4) + secs = random.normalvariate(1, 0.4) if secs <= 0: secs = 1 # 太小则重置为平均值 sleep(secs) @@ -155,8 +155,8 @@ if __name__ == '__main__': ids = get_course_id('all/course.json') token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI" # json_data = request_date(ids[0], token) - json_data = query_course_by_id(488) + # json_data = query_course_by_id(488) # get_audio(json_data) - print(json_data) - get_all_attachments(json_data) + # print(json_data) + # get_all_attachments(json_data) save_course_json(ids)