From 5b5570ccc8036b21af3f0a9f20e1cc4fe131ad60 Mon Sep 17 00:00:00 2001 From: YuanHui <31339626+alsesa@users.noreply.github.com> Date: Wed, 5 Mar 2025 12:48:47 +0800 Subject: [PATCH] update voice process use gradio --- config.ini | 5 +- course.py | 26 ++++++---- courses.db | Bin 2543616 -> 2580480 bytes monitor | 2 +- requirements.txt | 5 +- video_voice_process.py | 105 ++++++++++++++++++++++++----------------- 6 files changed, 86 insertions(+), 57 deletions(-) diff --git a/config.ini b/config.ini index 74b02a4..96097e2 100755 --- a/config.ini +++ b/config.ini @@ -5,4 +5,7 @@ sort= max_download_threads = 5 max_retry_attempts = 3 -authorization_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3NDI2MDc3NDAsImp0aSI6IjEwMjM4YTJmLTBiN2QtNDIwNi1iNDU2LTQ1MTRiMjFjZGM4MyIsImlhdCI6MTc0MDAxNTc0MCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiZDliZjIzNjQtNjVhYi00ZWNkLThhZjctY2MzMDcxODU0M2M5In0.05GqlG4rhwwlbuQUfEHlHTB-vAz2lOh5JCVlQ6j4V8s \ No newline at end of file +authorization_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3NDI2MDc3NDAsImp0aSI6IjEwMjM4YTJmLTBiN2QtNDIwNi1iNDU2LTQ1MTRiMjFjZGM4MyIsImlhdCI6MTc0MDAxNTc0MCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiZDliZjIzNjQtNjVhYi00ZWNkLThhZjctY2MzMDcxODU0M2M5In0.05GqlG4rhwwlbuQUfEHlHTB-vAz2lOh5JCVlQ6j4V8s + +voice2txt_url=https://api.siliconflow.cn/v1/audio/transcriptions +voice_token=sk-vksrlpckcpttnpjgftpgwytmiipjmvhyzmnffhbhjpahbfiq \ No newline at end of file diff --git a/course.py b/course.py index e3a4d10..c629077 100755 --- a/course.py +++ b/course.py @@ -154,8 +154,8 @@ def get_course(): else: response = requests.get(f'https://bandu-api.songy.info/v2/courses/{course_id}/contents', headers=headers) contents_data = response.json() - with open(json_filename, 'w', encoding='utf-8') as json_file: - json.dump(contents_data, json_file, ensure_ascii=False, indent=4) + with open(json_filename, 'w', encoding='utf-8') as save_json_file: + json.dump(contents_data, save_json_file, ensure_ascii=False, indent=4) for item in contents_data['data']: cursor.execute(''' @@ -242,7 +242,7 @@ def get_course(): folder_path = os.path.join(course_id_folder, folder_name) if not os.path.exists(folder_path): os.makedirs(folder_path) - move_file = os.path.join(folder_path, attachment['name']); + move_file = os.path.join(folder_path, attachment['name']) shutil.move(filename, move_file) # 保存category为text的content到TXT文件 @@ -253,21 +253,27 @@ def get_course(): txt_file.write(content + '\n') # 处理mp4文件 mp4_folder = os.path.join(course_id_folder, 'mp4') + mp4_file = None + exist_md_file = False if os.path.exists(mp4_folder): # 遍历指定文件夹内的所有文件和子文件夹 for root, dirs, files in os.walk(mp4_folder): for file in files: + # 检查是否已经存在 + if file.lower().endswith('.md'): + exist_md_file = True # 检查文件扩展名是否为.mp4 if file.lower().endswith('.mp4'): # 构建完整的 MP4 文件路径 mp4_file = os.path.join(root, file) - # 调用 mp4_to_wav 函数进行转换 - wav_file = convert_mp4(mp4_file) - if wav_file is not None: - try: - process_audio_file(wav_file) - except: - print('process_audio_file fail') + if (not exist_md_file) and mp4_file is not None: + # 调用 mp4_to_wav 函数进行转换 + wav_file = convert_mp4(mp4_file) + if wav_file is not None: + try: + process_audio_file(wav_file) + except: + print('process_audio_file fail') if __name__ == '__main__': diff --git a/courses.db b/courses.db index 3000ddf625e7839b8bd977b38284ca65129a279d..f1296b11da1f58079b827893859aaf4f100c73ec 100755 GIT binary patch delta 32652 zcma*Q34EPbdEjlycIYnj2XZT{O0 z{`+Ws;l{SZ#r5}X_~{MN4gTYQ|3dVQG=AgG6a4(OJNNT5`^s19Klozf2Vbn>Q_W{R zpACFA^0|-CCO(__+|OqVpRIhh@!8I22cMmM-oWPpK5yi+i_e?*JjmzGd>-Pno6p01 z9^v!o_~C)X7TJ%hAPaSa`G3Q(OW%VdF;Vw zK9PUJhIL>3%J)G5;(4{CfUV{5+ih2tU7;e~O=9%_k;*ynWBr z2j1M2{PoPQe(j5aU?dng^wqR;sXTkXA{*uQ)7dwcf8-ca>74gNh1{=E(UeYgBi zRQWeCaevj%t+NL^_a2-)^xPAtH{0WT_UxYg;qy;CyKyr9iT(e+s&T{NCnh%>e)6CG zsM6d2)c)_pFR@YQNU;T7-yn1i-=Bn3rjPE$Pqh`nRRX?cucGbnI&K*zhh*y29>bV_zfy4Y~ zcmDl>x7&aG_XqrT;2#eBiv8+89Qcm?`F}d_*~vG5Xn$(cWc+_0_+FL$h7aw3U{lDf z^0CQhe}4ZTR@oo^`Te`?|0(Y^{KJ7S+vERu;EX-`PX}5Wp5A}!=>wYw?caIrz;`VJ z@*qG1GJrm7K>Z=WehRrnU)!;F`?t4W+}^qUTic)8{&U-RZ})Fs zx2?GC``f0rHEnxp+u*jt+n(L_wyodW_J*y0xAlix$G4u``ux_Ktxs>=yEVRb^OhfM zd2LJQmTzylxaC{Aw1+lSPk#LM1EKq^j-{=N!6UL&f5WqPR9Ac2bqoJ`Rl6?wf2?W` znsHuV*&c|S_Sf!ew*tXfAYmT7de;MKkDB%$uWDC7|NE-;i0S|Tt!h_5|7uk`3$s}9 zU#@zfD=4jM_nQfdE8G3LfWoSF1=L;Du9$FWx9&om6|HZ0y-LgMl)vgQpvsLYuqUir! z^*~qlr>oj^1wUBTu3PqBR<-NG{$yo)To>^FSgRiBivRPfb_Mi5t!h_5|KqB51?Z1g zw#Rh*@0Z)HCH9NP71BRi^@2kBhb!CTii!WQvOT7I|L?WEy)u4Gx8M)%ZnuKbn6B*i z?|wkLuI%5fYS)Rsx2j!N{BKvbM|A*nVkoB zv~2(J_D^lQwJo*v_qIO0Wn#vG4R8QdSN-F@eN19z3c-H}QMGqFS}>PsXueqT77P54r|P{fx`CjE(U zE}2M2>>vH(8+O_6`rRiErq|FrbrIk}tJ?%msPn0L1OJhSDJp?D^eOZXz;V9FPcW&^%NG{yq`{#-mp zVkCJq zltd<$@};uzbON*{BDtvj>eKu9Z4Eu|Ue)e9)t;GRnNSo05BO4nT+kN|CQ`mQdnOzZ zjQZp0WHfBo{`Iz9wl%-|U}_D$SJ6h5XSQ@Alt_faQC}vKh7z+8)&(Ygxl|||ibWE! zsQvO^JpS&?z54|+D_6D7GfyrNj3?rekT1vTeBo3g3^!*pFn2Z^%Z2=bP$Xx+{{KH2 z&#a~AikV)qJTnEep>QS=4f*`>BvcWNB@uOzfG-=2fYn?kkc{W-um8e~Ch<_l7xw33zEl)uNyTExWHKDe#?p4}GjRTEgS!t}-tPZQZ=+Ev zyy$!7w}9}Y3Ss!r-r~#^XXuC=9((6G4=RxE;A9!R2LSJ1d%#N}7s8oL*cZ*F1HSN^ z{J|t-j$n$$BAGxeoeV?~OotG2E(9{&Z+fN+#Ut@xIv((4f{_Sf zFycp4#1VsmkUyA>X9DqHn zhAulp7lbCZgN1K;;EEt8v*Cmv%_gEH8cFz)sU6XXFB45CfH&q3=0N(EUH0L(qkDd0 z_q%(q$Y?kZys@Gx}9rv&hhJbG2FgpIHYi;75tRyFFH*PTYhS< zGdWT0xL#j|8V%!0C&g zMFz5n%t$Wa3k7rRbq4&z&4wrZxu`#tj^=`X`vbrCc-&e`&%0>&nO9zr1@6=Z;*P06 zUg0j;_r7trXL`$)06ci}2;de*&lWlxiWiQ!7ti8SaVAHhh~h%LPr%;{_}}n=A4+Bd zXt1C!kVeOZV{xQ%mT&P`A`l5hP;x2z?Aa>z=w}YV829eIiXy)5nJpN~!FB1VFB^r` z!=V&*Ner^^XZ@*U+#iVhlXhSZnPe=jHS{)CafKpS+hNaa*qK>B64{r_L4@IKlC^~p z*MVd#6isB{%ha-^m03gYRZRai?`)BDHj@p+e3^I-<(-M34Z_KUFBA`Fb2-jX(NNO< zz;~XETd_6tyo(Ut^UANm@L8n;1gieqi`&qxX0ZMYI`=T7C_;nR0&>Iz40;W(t5V~-VP`w?!=&}p}Od~s&R z=@={4&o53}Mk~$^!>aTau7K+2>Ueo^wgwFGG8n4xkJD8nQ;ts+7y1^b$4hlDEzb06 zPgIt?p16qXI!ihI72O($iul$Mi!}qn#N}htLObR5O*vLipL_M3!l)AiQ2vei_Y3A*)_X9o2g%Q8lg}S=n`^+d;B{bTai*QOS$tV; z;1>1@lbWN>)OnU5kQ^Cs>jtnx@E$weEr3+AN3Fuh32hQY*kc0d*tFZ@*HSqDU}jiOe$~leb2^U#Y;kl*Rx|Qa;p`|NFVVxb;#h-XOo(LmYH{JD z?4Th&-sNf8x|=(V`y0x^_wF3+ktq$Hd5dK@Lp`PbbMS}KtG=?2fz$x-4#(UIgj;jL zt_dA@^E*LglYqLoaLKvdR8b>LQMh_l;1p66^Abw1zx|7Q9<<8Z#1Mlh7k0*I7U4p+ zV)P6kv8$qJ+Gh(VPQy`*){w}*Pe||RGhWg&zKTPo^F2;pIFi@gnNGqFAX4~!iCUav z%T#U|{I!AC3L02}w+A|2CZ9hJM3v5L5DMODx}rb5k7S%Y_UTA@x$}MOm6N|wI-z?d za|yM~v^&Qym!57s>XiaQD@znwYV0ZfGqGqPEy#g;ErMlWv!ElU&drVKrPjeq4i z_}IaYl$D4} z>1sq(s4$;SiaL4^nC0^vH2Yh66c1NtebA`_+ukXnc!-M4Z-(~a#d^)?8@%rvY}zl@-k zvC}?)2v<=6D+@Imc_Lr#*4T&_x5ny67w}AeIkG_1)2)#f!Kp#w>etwh!lg-V-L2MwL?+6*iqSqX&z$CoU-E!BKv2oTk*4a_-3yN@tE1f&(_FWYXGC&EAC&k-r%5a~x4G3Fy@`acv=nBKBLZ1^K9v7yW6*d_$%yf29}*E~fEA3}V+w`X z2>BZv>MC-faOqaz%77*6bBXPFQ$z@wb$SA%Ls}Kf1{!huYKjY8!p5UFF~Umrgxl1E zeWvXgGrT1<)>V(rE}qp;cB^}%n-z$3xPX+G0cE*0r?J&ozO%RC=HgVVEdOY&oKY9% z3*BN{6$I-;gTp2#S?U(7!m|oOBfQmS`zt?7JK#?5F)K8Qner(+ML@p zPIDVs1@7cE4&aDtL5mTiSg|smTH>h5CTzm8WylEF5D=Ktat5Sz|mlSlNsD1;VC8jI? z=phd}>I(S_aLM9ybJ>_R-(}n-D@er5&;?rRK9_%wP-=dwSDYI+&gZi)cuXK@udAgv z)(o#PhijkX_`-LipR5iMsK#C^4Bd8)&Xh1lZs#Ff;cxhJIuDAKC@WII=|l^aJD`>6 zK*j|_R}N8pX96QYTZ#*fShj`1TDB9#g+WJWr{jvUy4YSzX&FYDW3F%*nH8hOnL6tB z^)iraUt(jhR3)nff-wUrC!!ghp#dF>p^%b-D|0wdXhcpx%?)kHJ)Vl&(5vAr^NYe; zIp_*KLsbRah8 zVfdkaw%Bs5Od{>GLKzocs=!_0fwC~`X!0_{o!vEc5^&p8b(o|5R_Ff0=CO_ceZ!ur z!#AI)`WIDJ{vG*`7-<~wY(va!19WRtkQ{0uS^>FrSTH-u@kOlkWoM zx5Trt&@Uo?Dqm_iCQm($PQzrW{}fAF@e02RShxG(8Mn0;S`{HVGh;dC%r7q(9I`(A zfE*1m1iU*8T?l%y!Z?s)v0OQc8?=mJ6#V&G_8fvL`(KP^fmKvn>wrv82VhmbBSILA zTBt-bpLdng&qxNol-2QZloJZ8EX{T; za-wUxB}9oSjhBAE_Y&CvaeY4qqjg#p?8$ybfJHme$L5WFMRbM%YRsC zw&4X2&1x0^5W`Nm1dvDYD6C@t(a7EhE!t%<3ny8F@Gi%#Hsoc!lgF2ES*3Ve=2%C9T89O68aK*I;mC~1$jWyjY%QgOI#7N=9HP8maWL2F< znV@n~WQfAiKJV5qD>#qp1yHxY3$A3?mA1Jdl?vPjaaQ@L2la@w|4G_VbcA?0M2ZQO z7{fx>M>`zLyJfmIlxntnKsD)#w=f^zd8|Wi2NbIk=t62TO8$NMpEXkF3*H?p+kh&^ zo5o>%u|y#~D7j-yY!s+ZKs&8lh4V*F{Zms$?O?}TR7zAUL`fCcotBFhU4>AM4q!Nu zVyM;OGhj#7Ys_bDS29)shp{CaZu)!MD}qm6H{eR2<~C&zdMe4NVJF(@J?yhNVE~j6 zl@UoWDV9I25rG<0Xy-m0)cKVtJjqX)fm~VKV{qrRyC)$t3;lJk_?0t_y2}Nv zXb9s{E<2VDFNKHnWOpd9>ak=eQyN z6FUP?{cq+!Bv@+vv=>Xp2V%Sl&B$BeOO(xfaR97en)npOqh zRGrf|!~Eh%XtBynVq$a{ATyQu%5LL!q8Os(=q;wrr5PB{nUYwBf+)um1P{ktGw$N- zq&qmLqw`{MQUp7l)gXe31wPZ0w;(J_MG&1TfU{`!aJe7Ur|9Xlv0v|c#1O_AP#Hv6 z6izX5D3P0byj%Je^yni>UX3M;nI$j+N#3M0*xfAFJjCtn16CJdyKe|bO? z%`RRL$>WSIsbMjO*ec^ZCn79ZxyaFbl+DvHxAZoLZn`lps&7RQyS(ut5zN-&weM@i z+{1P%hpey*Ox)Y;ZYs1lqLQKHpH_Goy}T*yv!$A5Z$EY8Xm92ax>yQ=*|lE=g9a*w z;fcV|aHBBNqvTTg73)U)?k0k-aIz@oR87gc#0O}on5SxE%d}=FR>4_TtV3ElwKzfi zMoaD1@&k1`Annx3aJ%yuBS2P{$$vm-G4DNCMop?FPt`RkHlM-1iStZ}exR-a#|OJ2 z=in}6gg^}Tl~IgegP6(K0QWn7#x*VYbk3~RQ4{oEhye{>ljr4!{81X9`9q_c^FLV^sv z?6JWeT+{5f8aVzr0`t5mN64+~TGs72xOH($>*k%CzHr~?Ha@!Hf$ELdt5WM)><{dH z^sClq@}CmK*Kb-*?vKWe5Ga$eiAxnuoD}kyzEL_!DT&A?yj;*=q3N32au-RvB5XO*i5=mNShLO~krN{}jdXR#yNaDiU<`e{ zYPC*hlf)y-zt%Z(HO`Ffb~U(N68{855hb<6z8V=V7`|j4bTPH@yo} zS4C6Tp&(x4K8vy$yrs>5GC$y*5dV%tm3^pwQuz&->~c7ou7 z&bzZ`5u!A5F5oTPd4=0U;?8SoSe&_1v>)1bV21<+d0Cb_dP{>b48-)f9z;YpjTc)x z3LP!#g{sUg-lp0y(97%eryUbZT;BB^&;{|hLSwy5SwEl_u)9!$Sdnh0<^+OnJRq`n zb=H0PPZ}otmmb7w>OY<lcM!QxXJ&8NnpGKm7l3#)ug??Mv?lIz7Iw=wqtmJAv9^h9x{Rvwx81eajl z9LtTwlQ@MEoYUUww&2rfcWXP{dhxn}Cnw*m_|=i6vH5uEa-H+ixW!RG0mP@vKZEo0 z5)olH#|DZQYtRJ6=Ihd@<^ndSoN|yPSW6IGC-&o6`Uvu>^1omRDd|C;)`ZBSN6xc) zKv|p>CkD_95KWy0YZeNtA&N~1(`mB7ZSiPcdI+tY7vIN)E~2pl78x%DQt)bByaB7y zb8$)zlMrHYY(yjlv6og7Dn?}7*-Op^kl)1I62zQp#SC+91GhM!ux5zl@owpg1x<~( zW8H{+PPjCctr6tM%m~q9_aM$fk%gMDDhri8HaJ%zP=w2h;ewtc zvNgBGAyE!Qv>N_5qD49k%W2QhApSFRh*CeH=}^XhcYZNG(@MW1-^zbnpl<%8#~#-L zoyA#^0%Zf$ysVax{nYc1J!m1i#6fhjy>O~5`Y>qOP*|WaGwC*uvmQ-zLh3M)cQ5#0i#G@vhOM=ZLa`+{d&AHVGg42c1tKYrWq#hZA ziMVdXV}rZT9)DPJoe%vXmyS(0H~eZ^~8hwlL1`=n+L_9%2s42dYHPGVwoG0u)AJicVVU=8WQ+`@)iMZXr)^oI%JA>_Wjjm|K=cTb z*W^nq`GjOS8dqgb)VrJl+pw_AIt6a|} zi5?YqknjU}izYMAkYx)Hs{=Y>=RjQ*#WC@_Ld4-#ybE}Q4G?-?ekRy6L=54;?hBJpPVOT(`_z8F_Vu*vhs9q60 zCMl=SfgZMwMzCSnxt@r99tPecH9WkipRHD zH-@nx^lu`=XvxAn>X!Ht-XD=+GWwh?K4&w6Zd+k?&YULXq*`ihBXnru?5wF)Yze4Z zfWT@ak;>3{WuZ(U!8?<>RsXsC&j}eddgIG6Q-4E$fr(UElc-jvqoE6Gd1@{c_|Sly zdHR&N>;>K>pP1}+m*nSE5Gw|khOS0lLiNh~a`d~UJ8XY4Nwxy^o$7|-g=3OOMWBQt zenYFPkXE(j~|F`Vn~C`&9OaEQi)!fYTTp zPZy^wX%%=mU)qDHDqV_*+Z8@lj%vN*1Os(Vaz1`|-3&F2RM)?Ad;7N6wtZ&nt*wu2 z`JMY;x&OnPFK^zl=^OX`$$jtMcxGeuhTQt!U;obP&Z@tudX`u2`mg(oRo@8JK62-Y za;~@3tx>EP&!j@8!ix%}q(9R_X7^D*w}zgFR@JTTTh1%* zh=pT0e>xcTQA}nkh{DW8eOEk_&4dzhilVaiZ#{ezm3L7})l$?#_}Q#jC`Nr;I_S%VBh<)+C=LzT zf&Wb*(EC3|*~c1suPXg}i)Xe#F6pQIA>zvhD1{0q*{oDD5%Wc(0g5UkISM`OYk&SY zB}n(~n7$i@yYD>enTWcER4hrwUnEIYLpYZ8Q_Yf&`y$b7G#SlB!|9Oyf3F>VGLc?GM-#jB)Ue!n z*fUWwo(&~p6qHcR$V7=~8uIhAZ|=2k(U=cHk3l+-2|i#DRlU1KqRtDgt@XGaD<5MAKALrBXSVIhciyW6>DAO~GL> z%O{Yt4-f8+XV%hlB^KZ0nJE_uq;n}`6IIWwi-LWMw_-V8EEl6%%O46+vur0{emtID zOV5=v?eff&kYc_}DC0{;z+2cKkNJ|pXwVnR#WD#hYa`i&{ej0~yPw!i*g)@#LPW(~bp6sA3L=MClAL@kHo6yry; z2*3>VNZmRWSkV~YGI11`+&Pd-+CTL_9;e$HdajoAJ3T9l2XYCjJy}mWkc2#t`Kc(C z>_V4;v_G86MeGkeyDyPhL&p^;eB{m!&!UphSv-d#Ne41i+d=KVWHy@dMH4BCUemD* z#c}q&zj!j9TT9QCi+beFcF$aLUqd)YC2u?(0ByNgf_nT|5Y|p-gZ^kVMopvrfByRM zIHt(Gd#;>o_nmDX1SSwn)Ui^%8^pK?BO#Ge=Dvm$bP@@2MMK6Ecv5M8@1869YpZ9b zaKI1XnSd_}^N7q~RY^2Unwug*{#Y~_v;8|Br7-Wq*fsa=99bpmw|FMYWs)djh>j{? zM13%cX&6G@`%}SiEEb3-L+Q9(`<*A^Pu#nw{rbBe-SyTtZrreB#CC4`n{Cf-o!h#1 z%aQww_kV2j=;k+Wdhxy=-1pNP2RGUe4DSEyo6l9b>)(q~4SPTeam7K(pF^}H#e$5Y z!c*}$yP5&ZiAanZ^<`ZNExmgmcG243kb6X!G31#s$9*MP2;WC(xR{buMJKofgEIc4 zKMBRA1BtNRbPeNfEj?H6ZO}7QAjZ7|p>WFQ4^wIxmTM#uVYxwqTM7J>qQBNLZ|{CrW-YI*I;p+Wv%F*^$#q*u&vaPiOoU5s z;>j2oHF9BuFCO4*7~~!pcnZrB zi=p}26WnXEhMsqC+U`4V_skUJ{FTgbUywf~9Oln(c#5TxKCV`X24kUkR9N}bJ9gOv zHCV}O=)LLy`ZmvO(EwTn@z32el*@cbv7ekz&*3fa)QSG2-qL4q44RVC> zWl{;wgy{q)sDNAolFq_p;XnZ6zjoI{yX+ldh+_@CS0V9$ceYS45XQ#x#pJvZ=7{S{ zCPPU|;N!VOHV_TP5<$DFo~weem+#qgC2{Qc%#=!n(Vd~VFThIBt8#?OAQ^Mv6#wVW zo^&>3&%gH|ewj7&TshN|o|#Zj{#+`R@I^UXL-3rclYV|pgrnTN$(kbmkUjQ;$Ky}j zyQls7*tT77*|u(a-Ht!l{=3@_Y(2B}{adzde(t`74L@A}v8u^+(_Hv<%Bua%{I7`H zvHsJZkd|J=r@01&LCU}fB{+&ZaVgfX$@CR(ChjoJ8^Raq_L!RX&L)!$N1^sH?I!*< zUf0;;d2-phC9%l5a#C`V@PixIrgp_GD8U;u87|U~eo6~NC1^=j1s+AJiOoYzXJTZH z(Bq?JOk9JESBCA_(ZcoHa)S!xQ05g9g(SmjfD#}njI`=^=6_k%pZ{&|`VHktDu-lR zTs}q)n%jP^FmX;VT-Eq5Z%dX0f%BPT6eHm&CXIzG6Q_xC)^fbDk`RI4*}c_Uj`Ndd zFe@o;#S0Bq=^QS}$>qc#a;*WKp~O@Ins7dO9&OUkg#t;Hj!@qd%b+g9F76JBypNMR8*Z&K1$f3tA* zD!&p)E)Co$of~pnOg0>j`>O;Opxc)i;#`676biAgw3 z{nAsXzkSBNbxE=+$vN&Pg$q2GLS{krc}b099rbU_|B~!V!+P(o7*p}1hlmlCWA$c7 zWq^f7>NG0BZYMvhrAx);F+BWqD4ookqPao0evIKY=tDrfXi+QGf0l4F0d}Hf-t-pg z^p`2u6c?9AnP~|6B?QoSRa0b$SV+9Oa_PkMdC(|G zXY`!Yp65$xsFN?HGp+7`6?o)L+(vUh$z((;T+ctZT5noi^LFSj!E*{)Tq;P35Z||sX#Sz zs2C7h>=~B|ZVCA|DBlYl3pLCo*q&WjobQ!{jV^1Rj2eAG^J;kPjX^4yV4p<9;^;+6 zzRHrPXIc|aG_uVggqtrvh*! zB927WEO%u}lMY2Bu(%+QIvp*PIWncD+VHx7Me2~%atS>_wku&u=h(&K7^zrND5r~% zp<#CT05V7Do4gH56ePW!Z9jLkaB0CkCn+vkZ!2J#+#KB`=8!7^$le~k1slrr6g76| zMNG|bnN+8fx1;xfWXRZU(3U)iEoHFiZqcm(Jy~hBETJ!VjNtM)r+(>>V;e?3<6eD7sx*=yL03N-rX^- z>`DSsnI26-7sIOj5+s?rGhvzGc|#l&gU?@)d*WIvBr&pbz@c% z+uEg;sQ6M{!In%r03?UW|3$%C?L$jg!gwep)3OFT14fZ6ZV(wH{L0{Fd4--1 z?x+#KG$qBP)>LwNma?(xYS-sKXC}xlO(5f&6R2Wc!2%yt_Pb z=oQgQ9*S13AQFKOl2nK(?uC1MC$uh6q|Sx%7FRZbZeU3Ap~;zOxL(u?Y2b6WoS?c>>PzLz^n^lLmJ~)p zLJVYD0e9)LWb)8i1xHzV8fuY**4sjh)VI4ATMDOUsl_ZVm}_h(XcFZ>>YcG&%M{uv z`6go7LIiUs3l~fR5K`o}T@gz}@5x>U%5sI>(vCV~$K2Ch4)z3jgh=FCvX_U^eTX{U zxXM2AZrz6bXJmt{`Y(BR$OJbEJy$Ry3Ydxrr_OeniHv8;z?&AZs^mT}Q<%ecl!mV{ z8S4{kpqn&OvT{kXJzZ!YA`Mx2C-5ayeVY{GAASy^`mId^>W!c*+QCqfJ%t2xnYPM+dC-H0-n#8r^JTnAdg-uH6^d9QLbh})UvR`<$fqg$E5@8PX07+N(RWbs&B5_ z`Q{xT**3OyVoTrspV<84O~H-d+3=z2?^b=2a{FJbvg$sX|FS{u*O$pG7SNO*%$`*T zZ{$BSmoE%4naJg`l9pkPYyuQ`u4Kz=vM548PRpVN5FMB~Waj|8yl8P)H*bOn&6p8t zuJ0h-b=h1r36%^@{vQj}v!d()DM=fG8@j@P|yoP+8Qq`Y0xr(_vwTJFHrf;j;b?NU}#5=ITpsUaVv_=9p9{i(NO6 zn4aZg2Nf*R^VEz;R$KrOv6{g&_RZk*puen(LT$DPG7>k1CFd(W>j*JlAHd;4KXtq4 z`-(ZSzNlcRAk||>@uHX=b<<+Uo1sm@yUG26p^q+=uPtSpBCN!~xzdV`v9P%n{d!}Q z0D(MTq8nLIJ-5=xwKo;JVc9siFQ?EVMSJAOcU96DUiADj5WM!tss2_iGjUhy~11gQq=iXF}2`ys!~;h%an!~ z^jFz|ujIdEb|B{A0sV_1LBC(<9?Q?UB;wN9HX_wRG|Q1u4E~3$_Tl1%^PmiC*zND6 zZVi1USZf@SLyI}3lUrXpHVfaeD12*$VXUJ%r>%uCv>b$Faraa!rz6I3ZM(>5Ge*s$ zy-Yqd)~kq;rOGdNQmD`!%NLGVw@7*~)^Zc0*p;$Z`lgwl(vs2%5h~@<*NHQfFo5w3|**+ zEG%^PD&X3$&+mUw=p2?%8jdOSz=#@WNTDCmMB@s;wo6Upo6}7A-?9 z&YmdDHV83X61OS@FWo#q0s~bboHf|BdgodVo27v2S%D#xgY{+cjLmmbi>NH8zY2PW z4B3v0b5a{A*y7ygo^4vZ-6~u_u>%)pakmp3c#MkyMIA%$KjQMOv{EOBg(nL#Qb?x3vNCs-n5#vPF+ttO@)@CrBQJRA!B~pMKB6+O zhbo}5w?Ui@NEWn+dUBx5<)b)}2)#%exS8-v!c=MKiX9OsY zLLf@}WHf^ZbBt%KB`1dLn_iZITf}gr_%7;iRFp7&2`FE;oi*y6Wy+KERH>xfCO9#k z5W1<~1E7_j%cY4twQ!UehL(vbjyHPD9qd9232$H*56`>j`^W|tguW@CHVF09jgpfg zQ4?{p;u*r~GUrA~>mKhY;L9_vOQl#ziM}!*B|xTfA1+V$Eo0s>xk>@BPXUoiXo~*P z5y)?I$bj-)?dnr8Gouc9S)ks^hX7U2>dFajL2F!4=~6T4EO$ zW-s8P!^|ur&hUsh%}@kFiKdQIok~lG>N49-Tx3yTruLiplwhXbQ@|GU8=sF+u8UJc zBIArt0{;YO5(Wllb6p4b*kUzL)q@ZOzIdhb_SJfp@@Pa+ zOc#TAqm$Y%V(ZGQ*RkyTC11Oy)1B(1<^}dKLyn45y=4$<&=S7vYYzLD_Ccd4B7s@z z(Q>UBb9%!bJOXwt%HmP|oAOB`cRgfL{=-w^v-kL>MA70N;L5>1xz!0dV`0O{=$x1| zDc%6d^p!#zXL16Xj8QU$-_mEmfnVwbIIU#6M1-xv6aWxs{-YMM>2 z;c_mi%c%W${yEu)x?lIoJ9A_~RcW<^+u28`Y+0vtqi>C!ugu;eS4Wi@-+X5RUB1Do z2n22#DP4w|m%mFbrQ0>dxOCzuEW)N8*(2AzIn9@gx48{hsuyIe0gjeewf2C{qV|Rc z)aa$aPr`>>np*bU)z#+mUo=eRaio_2J{g)^W|2sX7!Q^Zs|f{))HyXTl5n=Y)O)Ke z?$k5v0Y$c>uSTD0I8Lvfk&RGTxk(LYhL|rLL}3;|jC-`XbWF%eIG38Gp@~v2rJ@tB z{xTGHw{5^*lvHI{S*yq|gp|3Z0MVm02~H=sZi!b#4y9Yx|LwZ%f3~f4>zlUJ-v29` z-?!=ZeWy1D*Z(aC(mz}8qek{c!Nw7=ju09!{`Soc#e6&AT?jmk9>>K7Dxpa`hp z2BNAgsroKzDsCZUuKeW~InI~kcBZr7Uw_xL6Pr`FH~$Sod;Iw>lf}iKDvJe;dXt-A z<>>6)Ct@qciUMRc{wg97`KpWM)U3Q0iVEi2?9%9sHJ9? zxo}w2MXeYVa%5@~k0Q&JU$q`rFQ+jTPdxhV{MQ9ZwV(GQNnPaTW(YkbtMik2F&hQ) zl@5cKI1=2bP+w)i$TKKN7wpX8QyQ+21&zg9H)NaMl0R&=sp6%Vzny6;YQ4Cr5_wW% zxne1^C@NpeW{~AMybCE*a6EafSktYu(k{h#eOxyx)q3T|r+k zM}<;e?2wYuN)|$y1v*fC?U1K))S8jI9!wI32;Xb7nt&m19-Wn&n)yot{o-E6<3y08 zY_b{xpjJ`{W{Fd{HyCI*aG^)}E*RRA|C+(jW1cWNv#m{5lQ1c0_7H{S8#0Wsc6z7WVMw*CT&QOhMG=VVrKZb@ui!=mM=XeEdvv#Ll?CSWhA@;^E06_&YDlgQ;k>#4zqKw_COQrMpXKO8SsB^`hiEOYd&Vod~ z_`4=w6`1(DCd(oyuoU^O_}_FjDv6h^Y*jUKL%d{zAP%Iu6|1>@*zLvfDfO$e`cd(x z6fNa^QJFm|9Zh+woM7cKVdXx2qLOZ)LQjswXC;^_hqAdD4!5S8sf#s3 zSaX|LA)uI}icqx*B@iiU}1v x>fcf7lB44C0zWGWHgw_?QlSuu_nIsd+L^{HYLzp@;!@67B0h~OD5V12{{;w*pF;ou delta 851 zcmYk&TSydP6bJB`an>DoW^BEr>t&mD{l->`Wn6CyYPwUDsca&IQc^3K7x1!ynbAWV z;%Wu5vgonGSh``*gO%-9Z|c)N_+Ze3J_J5Q2!R;%;QG|rHKBnY%y-Uk=KG(($^sjR zJYz!z2AyHJ!5LdUzBq%9_uY|U;SaB;V)f=n$-k2V-+vT%$Mum(eS}3EN{7wemXpO;{~-0Shu+FPS1RJsrN!S(5nUg)~Npk z9}=$0yG$bKtfWNgnx>qV>8`B2%YgGOB(o4ZCv$o36vy4`y4~GBe7(%&a+f&={pIW% z8dH>NGy5f|DM8H^)QX~-oz;v&jCsg5G7t&bN~qIke=6|lZpH$&L-u_7v(MgU;2DOE z(UBJNfrY4|WCs2AsK1!qFq+1g(G)WS&XCOu&Ia=Qe!5NGlUL+Tc}!lEL$Xh9lr@== zEz*wkS=x}Mq=@8~8l(=%A#O`%abJvy^I||;68pqf(QVojZK7aWF>RU_O(C3~d=B$W z9ObE~sz~JFoOr7xj{C1(<%E|_BnQ4Wkwwv!{7A(Kn=sH!RCwD=a$&QXRFRce0>>xrLM(%aaEyIPgG=&XRUxBL6n5@2i)^o64$3TwwR@zo|0G7e*si91#JKT diff --git a/monitor b/monitor index bd25bd3..b1d647b 160000 --- a/monitor +++ b/monitor @@ -1 +1 @@ -Subproject commit bd25bd3ec0b655b8994dfd568abbfa52b22d1baa +Subproject commit b1d647b3b89691b8373d6fb6806711614281fd02 diff --git a/requirements.txt b/requirements.txt index cfd7b1c..33d5171 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ -requests -pydub \ No newline at end of file +requests~=2.32.3 +pydub~=0.25.1 +gradio_client~=1.7.2 \ No newline at end of file diff --git a/video_voice_process.py b/video_voice_process.py index 4d8b98e..b3be43a 100644 --- a/video_voice_process.py +++ b/video_voice_process.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import configparser import logging import os import shutil @@ -6,11 +7,20 @@ import threading from concurrent.futures import ThreadPoolExecutor, as_completed import requests +from gradio_client import Client, handle_file from pydub import AudioSegment from pydub.silence import split_on_silence -# 创建一个锁对象 -file_write_lock = threading.Lock() +use_remote_api = False +process_workers = 5 if use_remote_api else 2 + +config = configparser.ConfigParser() +config.read('config.ini') +token = config['DEFAULT']['voice_token'] +url = config['DEFAULT']['voice2txt_url'] +headers = { + "Authorization": f'Bearer {token}' +} # 配置日志 @@ -44,6 +54,8 @@ def process_audio_file(audio_file_path): wav_filename = os.path.splitext(os.path.basename(audio_file_path))[0] # 生成对应的 WAV 文件路径 md_file = os.path.join(wav_dir, f"{wav_filename}.md") + # 创建一个锁对象 + file_write_lock = threading.Lock() try: audio_file = AudioSegment.from_wav(audio_file_path) @@ -57,45 +69,11 @@ def process_audio_file(audio_file_path): else: new_audio_chunks.append(chunk) - url = "https://api.siliconflow.cn/v1/audio/transcriptions" - headers = { - "Authorization": "Bearer sk-lakndqcjlmtukekcliwkkryaxquifduhvzgcnlhofzvofllv" - } - sentences = [] # 用于存储所有句子的列表 - def send_request(chunk, index): - try: - audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav") - chunk.export(audio_part_path, format="wav") - logging.info(f'Exported chunk file {audio_part_path} for {file_name_with_extension}') - - multipart_form_data = { - 'file': (os.path.basename(audio_part_path), open(audio_part_path, 'rb')), - 'model': (None, 'FunAudioLLM/SenseVoiceSmall') - } - - response = requests.post(url, files=multipart_form_data, headers=headers) - result = response.json() - text = result["text"] - print(text) - return index, text # 返回索引和文本 - except Exception as e: - logging.error(f'Error processing {file_name_with_extension}, chunk {index}: {str(e)}') - # 将出错的音频片段复制到error文件夹 - error_dir = os.path.join(os.getcwd(), 'media', 'error') - if not os.path.exists(error_dir): - os.makedirs(error_dir) - error_path = os.path.join(error_dir, f"{file_name_without_extension}_chunk_{index}.wav") - shutil.copy(audio_part_path, error_path) - logging.error(f'Copied request failed chunk file {file_name_with_extension} to {error_path}') - return index, "" # 返回空文本 - finally: - if os.path.exists(audio_part_path): - os.remove(audio_part_path) - - with ThreadPoolExecutor(max_workers=5) as executor: - futures = {executor.submit(send_request, chunk, i): i for i, chunk in enumerate(new_audio_chunks)} + with ThreadPoolExecutor(max_workers=process_workers) as executor: + futures = {executor.submit(send_request, chunk, i, file_name_without_extension): i for i, chunk in + enumerate(new_audio_chunks)} for future in as_completed(futures): index = futures[future] @@ -110,22 +88,63 @@ def process_audio_file(audio_file_path): for sentence in sorted(sentences, key=lambda x: x[0]): # 根据索引排序 markdown_content += f"{sentence[1]}\n\n" - with file_write_lock: # 确保文件写入操作的线程安全 + # with file_write_lock: # 确保文件写入操作的线程安全 # md_file_path = os.path.join('media', file_name_without_extension + '.md') - with open(md_file, "w", encoding="utf-8") as f: - f.write(markdown_content) + with open(md_file, "w", encoding="utf-8") as f: + f.write(markdown_content) logging.info(f"Finished processing {file_name_with_extension}") except Exception as e: logging.error(f"Failed to process {file_name_with_extension}: {str(e)}") +def send_request(chunk, index, file_name_without_extension): + audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav") + chunk.export(audio_part_path, format="wav") + logging.info(f'Exported chunk file {audio_part_path} for {file_name_without_extension}') + try: + if use_remote_api: + multipart_form_data = { + 'file': (os.path.basename(audio_part_path), open(audio_part_path, 'rb')), + 'model': (None, 'FunAudioLLM/SenseVoiceSmall') + } + + response = requests.post(url, files=multipart_form_data, headers=headers) + result = response.json() + text = result["text"] + print(text) + + else: + client = Client("http://192.168.31.3:7860/") + text = client.predict( + input_wav=handle_file(audio_part_path), + language="zh", + api_name="/model_inference" + ) + print(text) + + return index, text # 返回索引和文本 + except Exception as e: + logging.error(f'Error processing {file_name_without_extension}, chunk {index}: {str(e)}') + # 将出错的音频片段复制到error文件夹 + error_dir = os.path.join(os.getcwd(), 'media', 'error') + if not os.path.exists(error_dir): + os.makedirs(error_dir) + error_path = os.path.join(error_dir, f"{file_name_without_extension}_chunk_{index}.wav") + shutil.copy(audio_part_path, error_path) + logging.error(f'Copied request failed chunk file {file_name_without_extension} to {error_path}') + return index, "" # 返回空文本 + finally: + if os.path.exists(audio_part_path): + os.remove(audio_part_path) + + def main(): all_files = os.listdir('media') audio_files = [file for file in all_files if file.endswith('.wav')] print(audio_files) - with ThreadPoolExecutor(max_workers=5) as executor: + with ThreadPoolExecutor(max_workers=process_workers) as executor: for audio_file in audio_files: audio_file_path = os.path.join('media', audio_file) executor.submit(process_audio_file, audio_file_path)