From 05b8744a9dc7dab75c8061d01f9edb9fbea181c2 Mon Sep 17 00:00:00 2001
From: YuanHui <31339626+alsesa@users.noreply.github.com>
Date: Fri, 7 Mar 2025 15:10:44 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 course.py              |  75 ++++++++++++++++-------------------------
 courses.db             | Bin 2580480 -> 2588672 bytes
 video_voice_process.py |  17 ++++++----
 3 files changed, 40 insertions(+), 52 deletions(-)

diff --git a/course.py b/course.py
index c629077..a6701d7 100755
--- a/course.py
+++ b/course.py
@@ -11,7 +11,17 @@ from threading import Thread
 import requests
 
 from headers import headers
+import logging
 from video_voice_process import process_audio_file
+from logging.handlers import RotatingFileHandler
+
+# 配置日志
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s',
+                    handlers=[
+                        logging.StreamHandler(),  # 控制台日志
+                        RotatingFileHandler('app.log', maxBytes=1024*1024*5, backupCount=3)  # 日志文件
+                    ])
 
 # 读取配置文件
 config = configparser.ConfigParser()
@@ -26,13 +36,9 @@ headers['authorization'] = f'Bearer {authorization_token}'
 
 def download_attachment(attachment, course_id_folder, course_audio_filename, max_retries):
     if attachment['name'] in ["", None] or attachment['name'].endswith(".m3u8"):
-        print("字符串为空")
-        # 找到最后一个斜杠的位置
+        logging.info("字符串为空")
         last_slash_index = attachment['url'].rfind('/')
-
-        # 截取最后一个斜杠之后的所有字符
         download_filename = attachment['url'][last_slash_index + 1:]
-        print(attachment['url'])
     else:
         download_filename = attachment['name']
 
@@ -40,35 +46,33 @@ def download_attachment(attachment, course_id_folder, course_audio_filename, max
     while attempt < max_retries:
         try:
             url = attachment['url']
-            print(download_filename)
-            print(attachment['name'])
 
             file_extension = attachment['name'].split('.')[-1].lower()
             if file_extension != 'mp3':
                 course_id_folder = os.path.join(course_id_folder, file_extension)
             else:
                 if os.path.exists(course_audio_filename):
-                    print(f"File {course_audio_filename} already exists, skipping download.")
+                    logging.info(f"File {course_audio_filename} already exists, skipping download.")
                     return
 
             filename = os.path.join(course_id_folder, download_filename)
 
             if os.path.exists(filename):
-                print(f"File {filename} already exists, skipping download.")
+                logging.info(f"File {filename} already exists, skipping download.")
                 return
 
             command = f"aria2c -o {filename} -x 16 -s 16 {url}"
-            print(command)
             subprocess.run(command, shell=True, check=True)
+            logging.info(f"Download Command: {command}")
 
             return
         except subprocess.CalledProcessError as e:
-            print(f"Failed to download {attachment['name']}: {e}")
+            logging.error(f"Failed to download {attachment['name']}: {e}")
             attempt += 1
             if attempt == max_retries:
-                print(f"Failed to download {attachment['name']} after {max_retries} attempts.")
+                logging.error(f"Failed to download {attachment['name']} after {max_retries} attempts.")
             else:
-                print(f"Retrying {attachment['name']}... ({attempt}/{max_retries})")
+                logging.warning(f"Retrying {attachment['name']}... ({attempt}/{max_retries})")
 
 
 def worker(queue, course_id_folder, course_audio_filename, max_retries):
@@ -80,14 +84,10 @@ def worker(queue, course_id_folder, course_audio_filename, max_retries):
 
 def convert_mp4(mp4_file):
     try:
-        # 获取 MP4 文件所在的目录
         mp4_dir = os.path.dirname(mp4_file)
-        # 获取 MP4 文件的文件名（不包含扩展名）
         mp4_filename = os.path.splitext(os.path.basename(mp4_file))[0]
-        # 生成对应的 WAV 文件路径
         wav_file = os.path.join(mp4_dir, f"{mp4_filename}.wav")
 
-        # 构建 FFmpeg 命令
         command = [
             'ffmpeg',
             '-y',
@@ -99,56 +99,46 @@ def convert_mp4(mp4_file):
             wav_file
         ]
 
-        # 执行 FFmpeg 命令
         subprocess.run(command, check=True)
-        print(f"成功将 {mp4_file} 转换为 {wav_file}")
+        logging.info(f"成功将 {mp4_file} 转换为 {wav_file}")
         return wav_file
     except subprocess.CalledProcessError as e:
-        print(f"转换失败: {e}")
+        logging.error(f"转换失败: {e}")
         return None
     except FileNotFoundError:
-        print("未找到 FFmpeg，请确保已安装并配置好 FFmpeg 环境。")
+        logging.error("未找到 FFmpeg，请确保已安装并配置好 FFmpeg 环境。")
         return None
 
+
 def get_course():
-    # 连接到SQLite数据库
     conn = sqlite3.connect('courses.db')
     cursor = conn.cursor()
-    max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1')  # 获取数据库中最大的课程ID
+    max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1')
     if max_course_id:
         max_course_id = max_course_id.fetchone()[0]
-        print(f"The maximum course ID is {max_course_id}")
+        logging.info(f"The maximum course ID is {max_course_id}")
     else:
-        print("No courses found in the database.")
+        logging.info("No courses found in the database.")
         max_course_id = 11
     start_course_id = max_course_id - 5
 
-    # 查询courses表中的所有课程ID
     cursor.execute('SELECT id, title FROM courses where id >= ?', (start_course_id,))
-    # cursor.execute('SELECT id, title FROM courses where id >= ')
     course_ids_data = cursor.fetchall()
-    print(course_ids_data)
     course_ids = [row[0] for row in course_ids_data]
     course_ids_dict = dict(course_ids_data)
-    print(course_ids_dict)
-    print(course_ids)
 
-    # 创建json文件夹
     if not os.path.exists('json'):
         os.makedirs('json')
 
-    # 创建course文件夹
     if not os.path.exists('course'):
         os.makedirs('course')
 
-    # 先请求全部的链接获取数据，并将获取到的课程信息保存到数据库中
     for course_id in course_ids:
-        # course_id = course_id_tuple[0]
-        print(f"Processing course ID: {course_id}")
+        logging.info(f"Processing course ID: {course_id}")
 
         json_filename = os.path.join('json', f'{course_id}.json')
         if os.path.exists(json_filename):
-            print(f"Course {course_id} JSON file already exists, using local file.")
+            logging.info(f"Course {course_id} JSON file already exists, using local file.")
             with open(json_filename, 'r', encoding='utf-8') as json_file:
                 contents_data = json.load(json_file)
         else:
@@ -171,7 +161,6 @@ def get_course():
     cursor.close()
     conn.close()
 
-    # 现在所有的课程信息都已经保存到数据库中，开始下载附件和进行后续操作
     for course_id in course_ids:
         course_id_folder = os.path.join('course', str(course_id))
 
@@ -185,27 +174,23 @@ def get_course():
 
         attachment_queue = Queue()
 
-        # 下载所有附件
         for attachment in [item['attachment'] for item in contents_data['data'] if item['attachment']]:
             attachment_queue.put(attachment)
 
-        # 创建并启动多个下载线程
         threads = []
         for _ in range(max_download_threads):
-            t = Thread(target=worker, args=(attachment_queue, course_id_folder, course_audio_filename, max_retry_attempts))
+            t = Thread(target=worker,
+                       args=(attachment_queue, course_id_folder, course_audio_filename, max_retry_attempts))
             t.start()
             threads.append(t)
 
-        # 等待所有下载任务完成
         attachment_queue.join()
 
         for t in threads:
             t.join()
 
-        # 检查是否存在音频文件
         audio_files = [item for item in contents_data['data'] if item['category'] == 'audio']
         if audio_files:
-            # 合并所有音频文件
             audio_files.sort(key=lambda x: x['order'])
 
             combined_audio_filename = os.path.join(course_id_folder, 'combined_audio.mp3')
@@ -221,15 +206,13 @@ def get_course():
                 shutil.move(combined_audio_filename, course_audio_filename)
                 os.remove(text_file)
 
-                # 删除下载的临时音频文件
                 for item in audio_files:
                     audio_file_path = os.path.join(course_id_folder, item['attachment']['name'])
                     try:
                         os.remove(audio_file_path)
                     except:
-                        print('delete file fail')
+                        logging.error('delete file fail')
 
-        # 整理文件
         for item in contents_data['data']:
             attachment = item['attachment']
             if attachment:
diff --git a/courses.db b/courses.db
index f1296b11da1f58079b827893859aaf4f100c73ec..5f744cce5033b64db28c860b099142b8fdd47fc8 100755
GIT binary patch
delta 4824
zcma)AYm8l08NKuBgPBfgr#drWr=1z*V$j|0_u0Ee5g#ZZ2qK6e*!`>}0yVATA9W@c
zL1Rg=;vx6|iAvN6Rv{zhQHnn_F&ZI6N&MB^P8~EnswPHb&~*zj=iZ5#^W&S`xwGc1
z`#tvh*1oe(eXD17_S-!N&K%g+X!L&O&dulb>^rpMyEm`;Xa89%+VA{qX!lEf{r%_k
zpLOunJ<TD!bxZ3?{M^$z_X~9IZFFxBq8HJJ=tm47Rv=a)Rv}g+PD2bLh7fBI!-%zr
z(-G?s>k%V}4Tv`&-iR1Qj3LGmXCTg;r+eS@++TVo%Yhvm&+i|;VEEwtMLRa0GuYev
z+`${e{qtj=*tn&4<wduy8)(dp9{kP^Hhf--6W_eIHGAjr$L?5o@SYQ2zH8z3pPab;
zzzcUBdHM0LEIu%|_|V*mZ#=y4y(j*3&%VXmez-7y=-OM?_c!)j*_u57^x`A?1YLaa
zr+5O}VmBZE`ToW4&n<j${|mE^FFgA2;uDV@KYaN3qj=(x<A=ZazjsdD{m^S~F0MOr
z_iMl4LB|h2@bcsL-gfe)jfe@vCd6jMBw`9NjW`Q2Gk@}?=E$MG@y3yI<>s?zhWmH*
z{icuh9_;x?&$W#scU|6dE1w%{ox8fxd);WyoZZxWCo>|@L^*NE2_khC#0ejW_s+N?
zSS?h^?pQZc-gm)tGuA-6EjR8K?o)Bs&0FyTY}+erMP@lk!AKTK5-EfwF1tuVGD$^R
zSgUh6_w<wrb&%am+bT>vOYU?OMEj_TbSe<zctH%4P6QRac4g0=4I||%vf12O1MNH6
z##{Dvw(Y2*GRmox3*s{~L@Mwa=`zs;qcVm<B^S2bu)ExU;d&kGU~fr&_pTe=&b0N0
z-8<*TTc(372x$sWC{><FmNQyL8R5C0OfchZX60pnoH9|F<&j%Dsm5AGrHWKak&RHq
zP!kA9K_!hLoMx6{kV@0?lTQy|I!0%jp$1xZR&lhYPqD2&HH$^oTxrgT@*xr#v>=ui
z4%x*JO<<ha^3dG$wyn#;LI=%!|IFF{-nMN`qtO^|JlR+qhJQKymNj<_{dMT7!NY?S
zr_HTiSbgEDAFn!n<=z#~uQ+$$zJdP!kN5q$Z|kXpb4}w(9GrdagL7r2G?``^z2wBE
zkWd;f@nNkaJbGq~NGXVtO`JJWe(cypGuA*C9iJIJ|4^mpsbH2{46)DL66sUI2}NnB
z$%PCqv4X{tJu+od9psXpUr|lWave0MB!!GFc*}{`*^(k##-m|YL{&cY{DcX)2C_4$
zvDODGOhP28n24lE4r&l$CRTe!q9}3{#>RF|U+*0$XZ2(=)<Aci(XrO$6*fkBkqyp?
zaneEQcqEn=kuar#HJ(BGX!+Uk858RuJM$WET~=vUz}XPlOX7_zpmGVE!DJ#6l$BdS
zozvx}XQxf7gIv<A4^)z(oCoL$sJMX4MXg^ep>STwtdMXl%A-$AnotMXnbcV8(h5^#
z+!R_d5`spvV)DdCsR(C5_{g&grabnqo{{oD7f-+0EsGW1T$fb1G*dB$gxYz~NG>4+
z(kQ}QvP#>a3@@K4*N&72{)VMg1FgE*-d|x;8RIP>Hf>pfo}zKiwo59D5KJ_Z2U@;-
z8ERMu+0Arug^3kh6$X1YN8VmakrHdP2c6)KrOc%*+3+Zq(sNBL+vU-wo9&_sTQaP0
zK?G8?HB}}w%TkCplh_*tmV@KAd~y4viFJ^jOyjK!UuSBsKgzNVi57&~h<-U7G8?5Q
zQKlj~fpv(D^vn!4QXS-yfq!2mDUL^K6EsmSLUt*p-kGFHRNAR*F@~w!eaVcCHGoT|
z;=L83OjS|1#EI~UV&2=@D^<ICQsHnL95suRg`DajJF7R=x}d@ojKIz|kzl9eWErTg
z&&V*usNsm97g3aRe`p#^<MPN(rt#K$Dt%&}dT(tcTo@cKEuj|5SSY=c(0oj~E{{Gt
zWkU^MSkk6<S8_@|i-^W!OSOd$ZAW&{K~+%hDc6dZM_-)S7M92HC8GYW3Xh;UYoj4U
z5i3eki;UVlDaMcsCy;2#t{)pIKX=t+Gt@w{&iakD-dSP80%RV|CyAN1ePtFl55@@Y
zu7D~s7{Bt^NFVIb$%$sDf$p+HW3BTmY{F}6I4g)1PQg^dS$bmyp<YUfgG{w7fA@<C
zds7Wymn$D{z2kMF_VE?ieLlg9Cu32><P3p#%$P#wqT?bLMFID^^OPYSDv$non!*t*
z`ybuL^1MoJg||8~NQ6%gijfV&EANo1%#LN6G`7hZ{Uhb(`^TE02AX&G?d=sd>V&2l
zdf}<#n0trIK;nX=#Hcn%GwPCEdfG^N<f*A<tby*@UgNE|Rm#aD=TUG)98>~o2)6;_
z3<DKu7BEdVXI}3A+LW2Dfh<p+H#xFl)!<BHZ5;m9@LSg$82bCr)q{@>&djgdylKrK
znOonw4&+yip7PLNwXNB?h1-Ahe{P!Pu`&1?!CBT(aiXHa7)xA>aN83^sVtbw2Er3j
zPWkCwF|}`T5@z(Hl>|dp#<w?#RUXpVo)?R0$5k$Q>r4!AQ{`_TnS{Tofn1^^*H${m
z1bmu=$m1%5&Ka=4b9<txX1K<o&d~dpD0WyKWcQ3*Q(=-R+oam&I%*{4C=?5;2CG`Y
zf8Z}ViSkQ_a5b%i>}L8%g~`*1J0X+`S4LbGHOAhhC<*wJS1~b8%j;jAu%QMpbrW4(
zA)?ND0n1J-uDnDRxOe*k70eFFxTK1h%9Fnz#ua5d>_!c=?PmLMg)K^$LJePI6)wV(
z8IG~XHNuJ%B#datPLw+YY*Za&H`7%WrVwooF}SvP;5&=RQ9y~WFz_`7Y%gXr`|{W;
Pkl|rD*<>})oh#<Q>NKKG

delta 285
zcmWN_zfQtH00-b+@5&!30tX6ODivB-i0SNNd<o-tiOM8sLgQ+QLnjl=4L3mu@db!6
zw>bC;B))<!CMqs2#`qh)<%{DfiJvEA>{uhiFt6^s12S5O`-{vE3sackgMIly*=_Rq
zB>#qOm49{Yn=nekh=2(c7+8>kG-MzPYmfsQ@~{pCC_)K1l%WC}unASD!4_<Tqr<5F
z@=hAc67Hem!sq%}xb>V#uVw#au50J+o+<miQ)U?CNPeAl<+4{zRs$DypaCB2LK9lx
eL!eiKc6mX$#Z=ns1d7l6??*x2sAfUucmE%@%48t`

diff --git a/video_voice_process.py b/video_voice_process.py
index b3be43a..5116250 100644
--- a/video_voice_process.py
+++ b/video_voice_process.py
@@ -11,8 +11,8 @@ from gradio_client import Client, handle_file
 from pydub import AudioSegment
 from pydub.silence import split_on_silence
 
-use_remote_api = False
-process_workers = 5 if use_remote_api else 2
+use_remote_api = True
+process_workers = 5 if use_remote_api else 1
 
 config = configparser.ConfigParser()
 config.read('config.ini')
@@ -101,7 +101,7 @@ def process_audio_file(audio_file_path):
 def send_request(chunk, index, file_name_without_extension):
     audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav")
     chunk.export(audio_part_path, format="wav")
-    logging.info(f'Exported chunk file {audio_part_path} for {file_name_without_extension}')
+    # logging.info(f'Exported chunk file {audio_part_path} for {file_name_without_extension}')
     try:
         if use_remote_api:
             multipart_form_data = {
@@ -140,13 +140,18 @@ def send_request(chunk, index, file_name_without_extension):
 
 
 def main():
-    all_files = os.listdir('media')
-    audio_files = [file for file in all_files if file.endswith('.wav')]
+    # all_files = os.listdir('media')
+    # audio_files = [file for file in all_files if file.endswith('.wav')]
+    audio_files = []
+    for root, dirs, files in os.walk('media'):
+        for file in files:
+            if file.endswith('.wav'):
+                audio_files.append(os.path.join(root, file))
     print(audio_files)
 
     with ThreadPoolExecutor(max_workers=process_workers) as executor:
         for audio_file in audio_files:
-            audio_file_path = os.path.join('media', audio_file)
+            audio_file_path = os.path.join(audio_file)
             executor.submit(process_audio_file, audio_file_path)