From 3d363ff1fd79dab5bc8b57f050cdff8214687f76 Mon Sep 17 00:00:00 2001 From: YuanHui <31339626+alsesa@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:32:05 +0800 Subject: [PATCH] change audio to text use local service --- courses.db | Bin 2588672 -> 2596864 bytes markdown_generator.py | 51 +++++++++++++---------------------------- video_voice_process.py | 2 +- 3 files changed, 17 insertions(+), 36 deletions(-) diff --git a/courses.db b/courses.db index 5f744cce5033b64db28c860b099142b8fdd47fc8..4708ceaecefc6324b10e72786db409a2d1abdb97 100755 GIT binary patch delta 7569 zcmbtZdu&u!dbf=Ywy`0^28>gPv0>6x_mKNI=iGCezK~L7``GQKt6g=~Zo)l}OCh|H zvUzrk2XHV12$Yx`h!dcRNuUei(SR|qZqtVDEJDXa&!4Au*K z;OeUFKlRVwZCo9+~feBGx((CUhr*{@_jwHipZol?k`N}KFzR6_x<&L0BJbC9EGd z09yrH4a;2(w>TpGb z^#c-0t*nsDi*)QBT+kJtyt^W&>p(keHgEI8ubT~9AI1x?ZOX9uM#)e{0g0sX2vwF5 z8M+>ES?DQ0$!w#sfpmq60RLkb0_el zI6rV-SG?^rxtys3?N_sH%&<9B;J{2sBG0vidWI6IsUkjS+G+vctP_9KvP$fz2RVaj z=T8?lfNA&7FK$Fl7@5i+DPG%q=f?2ow>}eY+OYkFJ2&DoI8`8Lsq|$+}Kr~JXV}|BRLe`xpAVez5VvN*Gi{`lWz@|#)nE{C|>FJ zB>CgT@dL<#!r)H)h}Xp%$ES88>xS{oJJNUKlPCd|97+ow9%a|f(yq(p^Jglo70Tq* z*Gt2PN~3!#F2hN@uJ~DbUGWZ&Se%%^*KW7ZZ*ND;{ch%70*m7}oDyLqqJY}_84<1t zh^JM^d~TE{tl!AFdF(9%Qv!Cc}jLU>;`{mBAv9tcZwo zj7Dve8YEHV+xYNr_IAbJr^wnm(7c*WXV^SuL&q_wL%1HWxe*Zx!69&cQX1;vkmI!< z4u~D~AZIW=TTmd%f3=ddX&zkvW_jX|lXrHfJ!t%F@!G}W_)%m`d;2Hh{Yh+B0=o}c zk&M2FE;jYGJ^PVyw_iJ6-g6llQ=WXQyn7tKMsgux(c3C1nP$Kogk+fU(RGxyOlaZ@ zKO~e>7iSw#++q~^p%XGjV|P;}1rbr` z5?4syRoe5ZRPoBcSrH&$Ya?fzXntAYNic;=rxk>5754uSF)54d8!tq$vFJEx zZBz?GWDr>5D?egRAf$EE{`e`&)zvRO^bgHPFU)J^%Ni~j?6XQ*)uk?f{b0vZJ!Jrs~J($QY*{up-V>o>eQwIT<#|gkyxq2?`Nv zLxgY^CWM>_B1c4l7f=?vOQ2RdC;M}%4s=!H=q)y7ZcX`0c_QS5Y1AL;MjjD3Fya{} z43V?K#NTdO^|PinF3_{@tTE%4a5dDPBM5WC2pQ@L5_+CQpE8cmHM&&X_-J=`{FA31 z%IVr*FQ`7mhAbJUCh`y#k@g@kb(p4vLQ}P&<|;r>=COOey&^D;sErP**a|x`Y_2kv zdyYw6Lz)L+fS#xn4pDm!4Sl5p8sGYJw5fWK)l7eoVbVA;q;^DHtI!aQ=MfcYM%+Mv zCA~@-fAMGfC5USRt2A?O;T!je(rjj-(Gk}!aV+D=Foibn8<#{hVj>mIR`Yn}KMsig z+Q@k2(f%%{@cS7*Ai5p2y#Sp&NW+ez-AV_3Sm}7uDr0b9ZVtk>Hc(U_*h?8APf5Wd z9Fs8`eckxzLYDc2S=aR<;iB83RQ%NnVd_CvGrgE$LMb=KI2M|VLGgK*0Az%`^_gqX z?!8b1{=RfQh5H7WmGnsk| z&);K8qrh_+lfIOM1ui;EM8RYfE)3;RqNfQ%dHlWKUWwDI139Zc6rRg?1wq7lU?uc| zN~wQ1-1pdOFx7#a$+WESd(%v9`7|sdP93!Ni1;Wu zkgu3-6=n>y`VbRmBrUgoyyHJ0w(3F74vS|#^=c%tu9n~b3L%FtVp(dGBwFJ2$RuBh+h!O^La$N$ca&hMt&y2vrD5PHDH;*I-ub1D?mtH@JVI!U2$B*6~ z*nrmo=C5eq*;Z`(T>NsXmAiC7&jy8u_*3a ze1CWR#>M{JmfB#yY6j{p_S_>&)6#_4@o26j43MS#_@1o$(A3QQ-OltU55sQ7N#&aTKbL zHP8;w&Wzc(aaUTs>p;%T-QHqX#w&^D#v>m>+@iD7h$BLDh;=9$aG)HE<%1i$AM^`b z4|29w_h-E_nhRzf5*gni)He|QiXlRSQ2QPajZpF9EiGN~=XYZXplhS6L~BoRQHCwR zBP$zPVicxNnnoo?e`r4sI$T-bmqN!Y3lMwtAgj;tzRVdq4n>m{9?@1H>UDrsj>6b$ zAIYGLcmIIU^&n?nR!^}t%j5`OI82~M7)+jAaqK^^*Gf00EHIWv z_z#VZC>cL_1SMY9Mn~0b^D=BgNK6_?E*3#MA-6&j`rh<3suY(J64QzA{`-J%>p{+7 F`fo)0+zS8z delta 347 zcmY+l zYH2G%Zm6L#n%i0$f`+zesj0`Y7ks+;@AKns2 z$(^$Q#f2a5M(Nb@I+jO(1rB%+K!g}bFa~juK>-ysNWeHGVFD&Whbc%w8Zs~qSui5Y z%e{S)VkkJ~YKr-Hn{8EKPUh9#ayB}0%-=Z6zR$KtKd3d& diff --git a/markdown_generator.py b/markdown_generator.py index 2cff81f..d146f27 100644 --- a/markdown_generator.py +++ b/markdown_generator.py @@ -7,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor from os import makedirs import requests +from gradio_client import Client, handle_file import json @@ -14,15 +15,6 @@ import json config = configparser.ConfigParser() config.read('config.ini') max_download_threads = int(config['DEFAULT']['max_download_threads']) -# start_course_id = int(config['DEFAULT']['start_course_id']) - -# 转译url -trans_url = 'https://api.siliconflow.cn/v1/audio/transcriptions' - -headers = { - "Authorization": "Bearer sk-lakndqcjlmtukekcliwkkryaxquifduhvzgcnlhofzvofllv", - # "Content-Type": "multipart/form-data" -} # 设置日志配置 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -55,9 +47,9 @@ def create_audio_transcriptions_table(db_path): try: # 执行创建表的SQL语句 cursor.execute(CREATE_TABLE_SQL) - print("表audio_transcriptions创建成功。") + logging.info("表audio_transcriptions创建成功。") except sqlite3.Error as e: - print(f"创建表时出错: {e}") + logging.error(f"创建表时出错: {e}") finally: # 关闭数据库连接 conn.close() @@ -67,8 +59,6 @@ def create_audio_transcriptions_table(db_path): db_path = 'courses.db' # 数据库文件路径 -# create_audio_transcriptions_table(db_path) - # 下载音频文件 def download_file(url, local_path): try: @@ -87,23 +77,15 @@ def download_file(url, local_path): # 调用api将语音转换为文本 def voice2txt(voice_path): - url = trans_url - multipart_form_data = { - 'file': ('audio.mp3', open(voice_path, 'rb')), - 'model': (None, 'FunAudioLLM/SenseVoiceSmall') - } - response = requests.request("POST", url, files=multipart_form_data, headers=headers) - # 检查请求是否成功 - if response.status_code == 200: - # 解析JSON响应 - data = response.json() - # 提取text的值 - text_value = data.get('text', None) # 使用get方法可以避免KeyError,如果'text'键不存在则返回None - logging.info(f"Text value: {text_value}") - return text_value - else: - print('请求失败,状态码:', response.status_code) + client = Client("http://192.168.31.3:7860/") + text = client.predict( + input_wav=handle_file(voice_path), + language="zh", + api_name="/model_inference" + ) + logging.info(text) + return text # 保存文本到数据库 @@ -199,9 +181,9 @@ def get_content(): max_course_id = cursor.execute('SELECT id FROM courses ORDER BY id DESC LIMIT 1') # 获取数据库中最大的课程ID if max_course_id: max_course_id = max_course_id.fetchone()[0] - print(f"The maximum course ID is {max_course_id}") + logging.info(f"The maximum course ID is {max_course_id}") else: - print("No courses found in the database.") + logging.info("No courses found in the database.") max_course_id = 11 start_course_id = max_course_id - 5 @@ -219,7 +201,7 @@ def get_content(): # 先请求全部的链接获取数据,并将获取到的课程信息保存到数据库中 for course_id in course_ids: - print(f"Processing course ID: {course_id}") + logging.info(f"Processing course ID: {course_id}") json_filename = os.path.join('json', f'{course_id}.json') # copy_json_file_name = os.path.join('data', 'json', f'{course_ids_dict[course_id]}.json').replace('?', '?') @@ -228,17 +210,16 @@ def get_content(): # md_file_name = os.path.join('data', 'markdown', f'{course_ids_dict[course_id]}.md') md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}.md') if os.path.exists(json_filename): - print(f"Course {course_id} JSON file already exists, using local file.") + logging.info(f"Course {course_id} JSON file already exists, using local file.") makedirs(f'course/{course_id}/json', exist_ok=True) shutil.copy2(json_filename, copy_json_file_name) json_to_markdown(copy_json_file_name, md_file_name) else: continue - # logseq_md_file_name = os.path.join('data', 'markdown_logseq', f'{course_ids_dict[course_id]}.md') logseq_md_file_name = os.path.join('course', f'{course_id}', f'{course_ids_dict[course_id]}_logseq.md') if os.path.exists(json_filename): - print(f"Course {course_id} JSON file already exists, using local file.") + logging.info(f"Course {course_id} JSON file already exists, using local file.") shutil.copy2(json_filename, copy_json_file_name) json_to_markdown(copy_json_file_name, logseq_md_file_name, logseq=True) else: diff --git a/video_voice_process.py b/video_voice_process.py index 5116250..9d0df48 100644 --- a/video_voice_process.py +++ b/video_voice_process.py @@ -11,7 +11,7 @@ from gradio_client import Client, handle_file from pydub import AudioSegment from pydub.silence import split_on_silence -use_remote_api = True +use_remote_api = False process_workers = 5 if use_remote_api else 1 config = configparser.ConfigParser()