From a4c35ef4d89ebda2a07fd9c618322bf4917ab0dd Mon Sep 17 00:00:00 2001 From: lostecho <752549025@qq.com> Date: Thu, 30 May 2024 00:04:46 +0800 Subject: [PATCH] update json parse files --- courses/course_database.db | Bin 19423232 -> 19423232 bytes courses/json_parser_songyi.ipynb | 224 +++++++++++++++++++++++++++++++ courses/parse_course.py | 12 +- requirements.txt | 3 +- 4 files changed, 234 insertions(+), 5 deletions(-) create mode 100644 courses/json_parser_songyi.ipynb diff --git a/courses/course_database.db b/courses/course_database.db index 90f74a6aa599375b1a5e6195bb477a12655c6d71..08a2dd3871df4cb9063f9c03211a088812798503 100644 GIT binary patch delta 29969 zcmbt-34B%6wKq2b!jwP=Bmol0y*CU3H=KD0AVirVz)hlz3g_N)4qzD)1T6ZsAr5gu zB_gh8s|{(LtwX96pCh!dU)$H$degyHU!M|RaeA+>M%(IFr|-Yd4dG6@&-eR&KmO+z zZdUeLXRou@UTf{O);)BxYxp7H$fUv>2MrqAK4{SJOM!27%+Lcjc5l6R$}qR!<57bK z6&!hTRMpS(@;@*5_{l$ns)n$S9=&GAph1JiT(kX}!85KOJR<<)0r|jSU~p_H~?I6V(^TwfBMr= zg%EaLKpQmjp@4RxxGFF+5Nx|+<27F!KIrNP)=k(@Uw>6--{qm&Dp40T(^N!FH5FOtbvjv~O*{I;yrHAaCc&sx)mlNWF(`2B`bneJh-5a2a;+%Ts&b7&!F7{@ zOASfZzyKE|fHn*+O2IrL_>cjv@Oqf(wkrai|-uJ$z( z-Q9zuKIWFX=Wbc%ajE#=vSwGa9QwvLW>;Mm@`P!S_(8N}TBb;Izme4Svrz`Eot-x) zhj21J68CK_rLFpl(R%Dc%KZY97(oJSry_DQYy#C4PU+J^{XKAlCU*3Ycye7&$LFpS20&njk|jy%U{4Yf=URE(68 zpD4AWMuBxDl}zghua%ZH`nZFeDqA1))VF?A!!pSB5o|bv(si$qeCr1$Le^AScVh8~ zs@Wc^twxN>Fe|y(3`uUk%-aHJ~n*^~|l4=zy z4irzOOHVfs9qxVLiL2%BmsEC&uT2S*HTW3cOq~WkJw^XiyM|6KDJTqcpV2XC(%ZXs zdK=pS%Q`lQt}CWg4NE?Ps!{VCr_LKO#2=BmZ_1B`e6ZQ~_fS(SpUyF}RIel^-|CdD zHB-EfB8w{2SVU3`E27A3reH~J6^mB()Df^(O-e&?E{jvVWsjZ%fQ{Kvss^ZGjw^FYW z$T~KBWr%-g>U*)gA9m$DV0R7i?@HZwM;d}$I_i}dEn`D2=aPET%6tph7M$@G(DQR6 z6@xWRa%uFa3DfApiPK9+xVxs9gxTjUp(*cfjeNEA6rF-yD@y73&V~|t|DK`MM_=AO zgoJPAjT@?onxrTvf}d~L708ohYWre*`TDmGx4r$`qwjPbK6BH9XKp|I&h-z!bwkHn zw;efi%k_VG{_vT{zVr62PrQ9c=UY!b$A0_NchB6^_4c=}J#*XPcdx(Y-9zo~eD_i2 zw_v-ENP1QOt^gH_?eO)YlBp*9NK|SCvj#STAPBPrp;i#EiFHI|!%-wVCdKLG==oEQ zJ}ynk^-d-ws#5Rq&6B-XGWf{o(gxkO1YNPMh$I+T9=c^kVlhKKH>M(|S@nE+s+$k1 z{^WPo6a~3M)2ZN^X77DVt{BYum`|rBHBSE;QYku%?J5{=3THfuj-9{MqpadkDikPK zy38YO<&nxlw%QT0e6~!rW4W4Q+m;w{1qlX}sm3B^96w2#ZmYHq^G=ezacNuAu=Bz` z$SK-gQb{k~wR9L=x?uCTv{gEwc!nkS6}5)?npcaw9t2o`_;eDvWhm!LreXm>Q481leNZrLyPiXIA^6rk>q{D=2b(*)Uo) zB>>m%bo1b|N_arL1Gb?^-8XNU6hQc6%;E`AAIzi*m(LCrKRIp_QpkPkNN{=9XWH)bn$%%nWhTyv;A zo}A&zn!0K`k$7BzeHe#lW@@q#(GNg+cBusbgji~1t$`|+5j5dYapd}de4Vj+jkm98V>hUgl$D@Js~ zK-khk0M$g?GJ?ngtQ8@yssh#RA0~wPbO^A{E2yMrH*5rcI}37bIY7UUtAYGJu%#2rh5ax^8~_IAP2 zih@~TAH>x2`jX1ds)wfrxN`11^4Qb>bDb%1B~+syR#HXngPw%|H&*)~v^x(zGBuDR zI#PVBJh^NKL|wNCn_%MkJBq4!Ge^Y>n;ODC?oPcmvl?4{5PG7>(0&SU14O9QP+`MH zbX~;;C}Jy&IU4L(7n0Uo$#AteL`9M8}5Cnt8Z1( zwh3`*HBTik5>%cveRywy=XMH{zJv}N5=2;!#7)3c#g<`sNY|wheq_r zdnIOs+&OwxkUN3Y^YfVvVIPbfJ@rVVe}+?iHVXcDdx?~}LCLn@%95!^994?r_%vjKaFj&EO-4A=*`qfzm@QyW4)I22D`jCehF zJTv*yrOHq=xnUJSbWGKRw;j_&2vmv*k(jP(5!KQx)pZdMwwMy2ZJ*}Ps`f)nz5g|P z5>2^w<%F+}2C6t}{K>{rI=wPb;0gHDEd`}fKX*tKFqfX9kt^1E9&OmilhP+o@5^C! z^li&e^A;o=n0D-+x4;{>iZ?DT{35?G%;i+HsJN-TvDF7*F15^!u*B@RjiVVHhw9iu z6g%d?^OnSjVcUWrS*l~kn9WMpy$RFO2QfwaiYn-Z*A9NEp!DrN`1MKdcYBy-C(&8V9R5xPjW-MYm4py;k8WweLAB$WNE*h#l6Is?wO=134O8gVg z)wQ-E8UGxxm|Jdly+oXm1EFxahJrsRUsc7ME;>Ga+fo)RfQsfzpd-@M>iaoTtuCZM z^4vxrflDt4aO{n-v%~xVVx0OO8&^pa?p*PuI=eV|j)gdIpXx1MU9toj9GTCNnG`2jf)*2z?P0kJ-eer3B4LeXT;V)d)nblp zyGQG;nN#h9PNNo6oJ`Hf^M}&o%`?VjTB6dEut)(&FpIsRC@r?A@O{22K z+qZ7!FjQSAw&T%?*#|2BgTL-zfMxM*~WgcxSm(~+SMka@vwHsxLSTJNu zma+cj7)!`gMfe4R3x^QtTDoMrhMSBzKk)kEnbnF9YU+I`Jel5AmU=tVzyt|$c22a4 z^ryaYCQCh)sr!qer44>=kS1s@Lx|c$H~C}yiL2G5eQ_>b7Oox>^FaE!%viRrXj&Qe z!6|iZ9~KSq3F*`yclJw}`#4Eyq=%i=tQ{v)%9bVJB;Apa3nL)4SdQBf%ZbYo&Cz3s z+K7Usd)6t@M`LD%_{7xl`|~Sl#}7=;`{T%1J{r_3*vUf2nbOimKdhum*atnGdI~vt zJ_rT+@sp+};*lvL&XTZvEPCF`u#fQwIqz#ExSjWF=A6Aw^G@sF9E74dS=`9ZUfWoZ ztxs6)eX3rRMe1-b=s)D=j!Jzc7khzl&fO`xPi$)S!)1;ggR4$Z{Gn?iQnJmsS%qwD zMUdxg-ml4~#AF(`_Bq@3>^&HwEk~m3Q=he8J7jz`kd(hzQn>H9ZcD0)l+y^#&6uYo z>hOYLTpOZ|oC7_sxYTWo5<))cikHu?q}Ly)&-r$z@+eE4z0XYvz(GRpy!cvvSp$~| zIV~}W&x}kxV@fxM{SbEVyrP6wJXF7t#okgEWp~wL&uAX_OqQr_&)9lDbJ6`#u)z;q z&+Uu)Gr)Lakdv8O>9$DL@pq;Q(_GgPu_Z-NtpxE(j)rpuK}HsVs3@j}!v!mu&`R46 z4m+>f4>^hT^DF51qtgqhe)i^EHfOr5>CoZzNl`~%&o6E8bBk0>#;2#Y@i7>kH6vh^ z@Sm1ms+_SWl|w9|T$1iMJ`PF*g9f>SQrE5H8$x~OAwzaq@LYfdIVw3_h$VFGf z@^vgr7ED*sV$7A8MkB`!#*TU~^*l1X90w%4zoMQ?msQfj`FlL)mm}rgC)p99LgI%j z%NqQ!lqzLDJr&<9;t-tAbd3Jz>q~MH7J0M zPo=0qX$%~(8Uhz^bw1Qm-oM^MmCo-!=`?l|P-`kwaAPMIjOj$H^S%-I{i!SnjVwQqDP$Xf*;yBBV z8IG-z*bEnsPv`2{T0zdrQZ$oJ-$1UuVnEZ&iW@=t2eg8N@_EC={q-1@x8pdg=^te*VIwD5*y-Ugmk$~x*?DXUR)H1I>F^F^-;qru1EPU>sIr>8&}ifz2G zAfIuE?z(YVauYvSe@ibdtDQ)Pl&!-HP&Zyn+@^3=AFCUsjz}tc=3G8z(at3!#s(qfuw$gOOATbLlDY$$_L#D8-@k4TpWn9IN3RU|xUV?syoMl`j=C>e zdyy~pP6YnEVP0T}Xv(H~LcDZyEMQBxKk_zI`=acg9opuJhpjl)jsYn}r`{?lA5gPii-z92 zF07-qo7NYFeB3QHE&B_gHl#*N9GzdSr;0l0z{DRk5f|% ziFanqL~jJ-dg#dXwc-M+dLwPuh*{Nsh^ft;Kbhv-Q#7>m{6B;Pp4FALp{A@t1FEa; z@VMlb+H+-~w9(I9QkS6*dRVU)Zw~RFwDZWT;r^`ul(#TL(%y#ZrH1qGOl|k&H-vo9 z)qPZck>XDl>02qal(;1Ulj_83ronW1^L~iUIokorR2}?m8Ya4vD2`xl-Ve2kekGb9 zqf-g1rACAtK@t3{3nrt9fkumI7)LiWhdO>;KbIaym!llY>8xGktq1CQChRpo%8rs} zZ0&ALv`j-p?U0LLjN+g~8qskv;-DXi=(sTynUK|lDk44|qK(TAOr|cUbYc$vQd*E0 z)O;Q*BEqijRUi1^AuUMGr>EX~S~o4{)Abh#<0zs(??llNtZF0s*n7xQ5U#3N8TN5oYMHSXE&y-psV+LttJ`nolJ-AIYE)H{vxDdv()a5V(9#F% zW`=!GQ{dxT81KJ{B&X&}4LgIL7(F)m3a~o)sE?ubR6WC`r{JGQY_8^#sjf~fzl|h! z`;_e@TezUxCa(%|ovqI3TT=rUj6lfpK{sw2NS}GUuRiHOrl~RUsiNc@_4@@nHUDmI zT{2i;Dz>Y@nUm2O2y0T5BL>z#&fqN#juy@f#nxlVU_lj^^oc7EF84WV;Qn|?DKdU? zxO~(cm{(c>+W`tpaTF*pLvYdBO_K$ksVgx?N1k3ku2k1pGbTh0pty8h5kP0#E*Xb| z6^4NSX~+jUs}5B$2F0evOwi(&taY5i#y8wJXp#!@>|a-n!T)hTT>D3%@H;~r>CnRs z{*)owGD%!N2S-MxW*RY6rsA+cv5?wgq5WIL5aO~OR}~e#DBdp?==8yzgC}zc1An_< zGIg5MIUbHqZFjFu=DiPwLX@cYz3K3CczkGSO_}`g{a4p3tSk!gho;*mR%fkF0989E^j>A`eAW{AG+e3=Xv#%May_YYS}68l9a>6`Tu`wt5%(R<9P3_ZrRwt z(PKjGq>$<6!TOc&PXD zX>q?i#L zwP;4%F|C;BFj;;E-OcB z5pVc32iXVjEQ@T@G9Tl5M?xtMvIr%{)LzOSif0mc&{FSAG|k89crdRr{h_FPMbXM2 zmp|HiQG6-W2!%l|ixkinBZ~pFtz6oq$TH$`TrU~NSzqms!OQc`y?=jw{>*AW)YS4f zRMlPletqsUuCzJRsBNlO*bx1}Dx7=!;3ZX?^XaL3iYtAqSzxZ>x-P?^Q+nVT$`UEznh?1N4*hj&PAFD@ya znX{>Ie{=JyX2;oE)x6ia{PO*K`x`-Ty`{*XwZ&Qm=kQvvxKg^4t8P4QIHrjr3shq( zCVFdHimF87j$^4JD#b+VIk8m;^XVMw``x^XJ9|sLbegX+;x+0WBK04;eOz1G;A8Bx zgH=8~)xA)*hSolHQ(=hvd=3Ss9ZPm?O4BYa&-8nw{ReLv&XD>(NApx4w-0U_+u(<; zqfBmU<&Wp&G7h;enl2dVam?cV?4(VxHB{TXFd|s?y%x7)vS54h{_(B(v%@}Ulbtc9 z>?kUrhqe{vI&m9z_QdVZ`!=JI0s^othw{s!KE_VfDSW=Ed(I)RNoAogzCYPslzHBk zW%tv9cVRzpIi={YiyOjx4sBxTjVpW*dW{Ctw+u0d2Szgw2WPaRI>rm!vnUn9=q#Y` z#lr9-i->t&gaDZ%l7%)4h;qS)VIh812fZoA@i|ThZ~%~ujp;EJv4rFy0peur@Bmej zXJcc@0|P|Va7PW*SNN;xx%!{8F6bFdAU?iu9KG>}Ed_GQpCev}+i7T>#IEd{8xWRb zA-c@J_Rb-MakXBH*DOVn=81SG5sRVdva1I(3@)wnGx8=+t$E}4&1*=wdURo!>#@e1&IR8t^-jEo zoW~M<4$kpYa?D&Nm5>g3uOfDcbXJ#>0dKD~6!83!m=P>ox+Q z=oqhZX~i53&h=ODoV1 z<^1vnADkGeYLZWXwC~8KYCf4@&@bj6NQN5D<=%Q_q-k%TcC0UgS_u9`Q1@Mh(GZ`I zx&pJBd@-2J%^8dq$Ni}}+a+_$n}_cH(~CE=*5%iI|4+}}{N|y1SZDJ4UO02_wW&Ec z1K($4C<8e-DZfAW)kZQ)(n&TbXW!`*$a`>5-h(HKPX@k|r?ovh_nI9e23`HZkO^&H z%lLRYsqjy1S>3Gm`d>hW>K{)V3tyX9Q5r+CD0xp=T`}Q()$XVbGEBlg87+ zYxC+tmToH&3y(T3&foBbPZ4}ggc?!VQJjq)Hm*UzyYnW~+UfZvM^FB3UOrmIQ}2J~ zwdA1!rtX2-5?Z?g!9~{GIkoG{KLsj_*f;Z>Yw=FqzZ_Pz7!dY4eA2C7s?n@=IEr^*81aYE0_7rvynj zth;#eH1s*+kDb;;o==a;iXoe-`E-InW~{%*>Ojklm+u;%))Yy=G26O&^v2=L0JW{J zL;rZ}Z$TbuoQi)s5-H1EIw4mRoveUPrc9CEXd8}(*q)0nNs8&PlxZ|Pi`g+%l3mmQ zc>8PTMPEz}%ntF1se8l33i{PEV<&%QUvUv?wC$pGNy&=%_MIqy=rlWr&v(+!@e-DO zj7=?n8rzcIfpDPNWK!G+?X}&cCDWCI+!2$Ojt?$9ONVjkXL4EBt3_s3QbMLs`3)?o z8-10l`E-U$Z9iXCLH7qXe)Uon`-1#1zkyQ4jEWqabP_Qd~cPkf_2@y+%`YkT5Qd!ns9adms*n)bxC?TPE!6YcGZ z>)R7Iv?p$CPu$d=xVb%XxIJ-8d*asi#BJ?~BkhTf_QdV&iKFd_JMK&zD;V+shCW53 z636tw^Ry^&Y)#;{6L%zzO&VH!FMe}Ba38>aK63rZSB?R90tEaMa2N0`;M>66z&*fs zfKK2z&;@)K_#SXCa363#@Br{2@DT7Y@CfjI;8EZ);Bnvypd0uB@FegQ@I&Bf-~{jt z@Xx@rz;nR!zze{Oz)QeM;1uv9;K#tf06zi#H}F&7XTZ;ae+7O4{2TB~;8(!QKmzCi zehvHv_$}}|;1%Haz#o8Df!Bb42mT2B2k<)Z25=ha1>OYy6ZjKw26zj28+ZqJ7kCf& zFW|p{KLh^*`~`R)=mS0gJ_P;>{0;a!@DcDa@PB}R0G|M#0-ph&17F;H@|D1C0K4R| zOFp{{W|txCGL&5k*ku^I3}=@S>@t#FMzKpFyA-j@Xm%OHF2(FJmR(BNWgNSVXO~iT zDPxy%cA3B~6WL`FyHv1CCA&;!mnrO0#V$d139(Cn-D!WVzyma!F>4Ake{$J+L BiU|M! delta 13319 zcmcgydwkTzwa;!gFCc*cNqFTQk|1Q+-(z1WN)$vRgrJB9!y_S}7KCMYW7F(zHVJgq zE3HN(r=`^pwb<4tT1BlDB0g@ft!}i^)(R4NOSuU!1BrkINCJ|96d)By1BL;^fe}DDkO7PYMgf_?XkZL5 z7RUm!fgE5QFdmoyOavwYxj-I}4-^1}KoL+3lmL@~QlJcA{-yxu08@c!Km}k0rUN!W z0aQQ(bifWc04LxA+`tUrT;M!lCU8D*0Wb@w1TF+F0%ikqfQx~-z$L(?KoxKqFb|jy zR09iu8ek!CIdBEA2)Ghh3|s|V4b%clfNOv{U@5Q+SPrZJ>VXDeCC~`00#*ap0@nf8 z1K$DG0BeC8fOWunzyoXmynqkz15Ln`pgraPHm?=#k6x&Xp&ji z==L?OrL^ZGPYRtL5iCt~S>4p5`SVN;GMNH76h7rCNv|uKR#&vH{)SZ;uZ|u*<(Wc< zR)%V+;wud4csZDn<+MBO=+oHbt@mHQejU%N>C`zbP|IUmOga@FX`7s^%+Ps)==Q*T zhpG)~PK$pbS-$?Piz`}N;ouI%gDtpuusGVL9c5>;KH%C}o458~f#RcPna z;oKxeaoFrsU$ZHj8ou!qXT-@+#DkR6n`z-CH1D*htVsN;@A?*&P;qN^ml7|Hs+02P z;F1AhHhkAVnLKIUB(tJWVS#@hU3MDd?YJUbZBQ?r@2?S*ERAYN)5F~lA8NJ|C`j{F z8#Y6yzC|4+wEd7LO>Bn6ZPg&cCb1hzZflO{EbYej)7XtCGqD>tn7!q3f@pT#<&Xry zB+-n3Aqed`*gVyYDcaBNm{04>-ZHwWxOIU+UG-6OjTlmvK#QqUe{%&*-4>cckMuW} zP|wU@Zlc}kqRus~@%v$?6<@UEn;kYPtO{g{-tuN(1eK1T44li_eLMBfC@7))_hnVk z+zw;vorke__rHt9169(U|5mPPlG$!2<>v4>@*HlSVS$#}w)O||p~d}0;T+LVo>Yvo z`eSJ5c^`VpiwA3fuB%XTXIBCX}=d9 z!F~(HjUMro#j&j^4wo*m&5O3Mb%5;+>pDt}L#TG8wO11l!6pRMQ80evU4a@w#qYW1 zb{P_Qj-gZOwvd%(9fJsJYG_iT?$odd{^CJu>$2)t_fmfzZ@TFR{%p}*l4Jcbwjw97 z{6~*@#?WmiJX!I=I9h=cfawkS%b$AP%Qf zm3D=v$b2ZaD>37kB8;QM-EC^fH|6k2*g|cGn=8z^jWTa*$6_kfdovbu>#^o37PiK8 zJ1Bbh-j1a-Jj)wDtExGjx^(tcXyS(Ngs8MRDVf1si{0&1sJ}W`a>gD{rsLziNz}2gi5*Jcl~_FP zE-l`Q94}Pi-i}(jV1gtDRA7i0nC72|kz{sg)VDn_PYhy6!o$%%-^SVGsm)2JSrfcN zyo=C-LVA6ow|GlmsY-nlS(>LqFlH_rRW1&lRfEY%cAM3uQ~zexY=!#vZORcJO5#K( zdM6vZvMf&)$Duo1k}mK({THkozQAfC1je^o@a8>!g+(7%VdjPf%)p&jD3b#-Pe>O`pQO9j5PUs zp(V7o*jrXI*hp$N&FON-A5hJSy{Wt{aJAtm4PlabCLNBf#D5vBDF$o8;s`2bD}okK zIg_F9cUIU}Ha6NE_C}XoTW!6;qs2!u@U`O5_F@be(&_YkvA3AI4>V!3-1hiL0Gp*) z;k9|QAYlJn5D?$Vs<)?jK=rnldduVF)13~d90p(-SnNUa75*qtX0|I-^A$FThcFaR zP-3fQJ=B;%o6EeV@>azH0ks?()bLT0N`k823(Zq)Zt8q6bk?LI);I^psB>0kNsvw*%H(^t zZ7JnDF>B-O)|P?gW-fV<6k53`SS9{lsL28~2d>)D#7Z>UR1Bw z)iu{U(X4WzX;9F(9Q@PHKUeZkm4CYVX9NG_FT_t5cd1`Ry;^Vvt@}f06nW{=8VN$kGZi1=34?P394}M(A7A8uGaz!QU65?8;TQ~v9H`$T8^D=HtPP+Qo7*0 z!OKB!ObnOOit~{QAl>LW(E|U&9TratRvXky>!5u6H+jQij> z<I>5 z@e4Lns5fuptghdmmqFE)P^%{wBKQ-1cf18YnUeX8g-Z9}p9G5_O6cxop}a&lEFb&( zBprI%Md8EE)4(jwAQCWxSrPs52NJVAm27u8hg>s9&RX0KMUGr_H?_~mEF{~D0k?21 zhCH^}h+eXaTdTw7sPv^*ZY*&bR*Yv&Ac_p0F_p^7LZur}3m! z=6Z|c*Qo1~m?fcbm9#{v139V|Z%Q<~*KpN|}G zx~nC}q`5VlLy97F%#(TRK+ck>#AGqKX=iEse7d_j=9U(S;S5RmkFq2&x72$%BoXEv zR0$P+0`smR^NQba1yRAWs$<=L^AUFeX;iE((xzn#y46xtpwmVh|+HOX%4P zeDRT@V)!wjU5oNn^E3ki}j39?cnCKscLnA{kb^JPI9TH`PnuP+AUUE>2*Qi{R2 zi0*dRwx-g|t6N8Kx_Te=Z$~(=b~d)}N z#Ta(OgB*N8^WwEJQ9;?Te_Jpg%Gl-!=7`RcGPYj~Wo)|JzocvKkb`7%DU!WJLSni# za1FIxgeCdawH=eG`$}Ju+2!VX-h3WHl=HeSjmf}7eE6t2$@WUHkb0`nMc9`ORgj>1 zF?Li%uF^X*DD-cd1&ggv=ly{KvlGE;)-BoOt@26cp0-zkeUWMv%=g8oT8hK1$*C4* z5UEzJA%X7c5am^#sdAxzKIP8`S7K8Ib!u@df-?L~+6H9oLM?x1Lw9kCKpFWwyE`k5 z`dx5p{3#OY2ltY-QjCF!y_T4YJ4Kobk2UBNX`YT!ah_nq5b4enUl2 zm@?XV&AAd)Z)bKnVP~x|Y&uR+64qQZ>=&PWk!5gKnP>d1oXb!ZtieB{-YKQ$QQH+h zsZO9dVy8FrO3e29)*zfMQ(U(-fu;A`$J4C2fswpg)1a#wop>vpo2V$*?mI#`qO+u} zqptK#rpGc7P>#G>Hk}G8HIlljFhVFlE^uwK3kV-zh=LW1nIay7k|v@ET~PX*`lA!t{BBA!N9V{75G($>T*l#i#X8qQ^T$$(UvtR%(7j}aoV&cbx`pg z&I(;=a5-I?ybg(*Rt&((`74f#X1j5xo9=86_Yr$-Ku+f*1COo64 zJ@&sAoJbE|>l;mn{0QSmTqp0nu4%Fo^E|A?^y=?}Rx=8>!h!OqM!qB{kuNbNuWIb* zH*ojtTQ^OomDl--xn!FQ6%{up18%?E3{A`p&)>Zt(Ra_owE^!{O-0X5uVVj7)OjltKxTkdd`qtw3iLu0e!1DEydT2JMI^;gU zMSVZV4oLDOQ-9~C$}O|@p{|HwsB1@CH7#3@b-DJ9@a!{xmxYRjVE5FZqX-Q^iMzR6 zvrRc0(va;62+T!nHYP=gCAps@O-~K`zb_Hfu>FmTC3Tyk$t83!}{{>wzdWau)8GiiW7<{sCE3C?+V;}|B3KCI8H&UJf z)he}UR+q?l^3ASTmcZR5Vcr=7VZ^Q6vTTf6_FijxwoP%#nGO#xG99rUnh+up6Bvn( z`1F$ZTd@;7h~sA5&M+nIf-C02zs*4>VQyZ_0skl>pe{PtrdxC!CExaaP5EZKi-a5J zo)YV)b9f(~#y+%8koV!=+H*q$3+9RW_0R+`CGJH9C89y5WEoPtHkx7_aVmZ~NfJV{DY{&P=P5)DUfB8#ZhvVK(h}WDl`4+- zPbPt>n_fjkD=6qvji6*1Vn_T$9=Z!X0)>md1{8CCy*^YXjD8YEMbVEhb$Jtdf8Lf& z+)dhq<9P%6^PCx@EglxmAjgIfia8v?GlQHrVJPa{-0ot{{v~xTbY;_S^FS~lJ`_5` zl+0`FBww}99DYoRl$;)>PMMI5DD&lpowthidpT_t{(S-|i89yo&i$-ekH zy5h7ex|HE#3Q)DDG9{q{1DI- zQaBaeo~R5?rv<5S2&hzWey^VOXsk0ZrYBnpP(naXl_NSz5`EdGY8+HTKoXL_KCN^s5av)rjR#1P|DnCMpiTP1Zp{-?`d1W zUzCw}G{Th3T`U4Sa57f2eE6gn+TG0BjoP}nTsdRW6YYpTQ4VMA5K$O+mb7F4$$@%h z-6^TAk3`I&%KD8-#MKIpYZ5g)gzR$GS11wVk%9;$c@U#qDX90?gcs7%Q$xjw<;ah5 z_Z^S5SqT&@`?ii;GmMD1&^sfB#cpM9BBCCYOFanLxVI$8&QlOXyn49(D_+XO)x{Ac7;=O~Q_MO*WWBiTZJ75(>T3qxsSiNIBO7DWI zoE^?HqXKm%E2b5vai_)nirYI;Z{wa4!-Zd?tesioUrLXhmTu@&w+sJCWl$&5a@^3x zIKOviZ0W&O0d7>pAVyrk!;!G%Oi2oa3ZzJu&hL*YP)0v5d`c@^D)x3jh=Q?rikF}$ z6^gz{am}#7hB>%cUlfLmMZtnE>iA1ZrkVZN1aVxS{i{^kMH1^$rF(k>v7Hl*nqLwL ziMAiYEuBi01xWvR2;;yCN*q|GWQk%PcJzDbp869cC7|G)9v81<&iL0P2!K~%xDCn2 zk`RR3rwBij(eAlOiS4G($w*lnpV_MhKG87A3r;D;`xrl zsVPpIQ>;9j!|9|A(xY>N}RB$9$Gp3Sl9UcOXof>o(RBP_e@2n6 zIsOfwU7MlqBEvLo=2MQxW{C)VjU4mrxL6f9CFj5!!-6Wj_~Z}b?UfkJz73(#E4O(| z<8M6h9!RdNYF)R&{PKeH1H)3L#3aJ=z4JpH9FbiLDtQEAX{M&f-^^=#?eV{D1a3M# zQ^a*-WhEYN8D$J3aE&dOSU54a6OXOnW^lW^a0)J{_=9)>#b8vnzonLrU4#c=pRa9& zR~!i3;u2C&bFYP46x6PFTGFv;u74-f59fHMbkp8k({x%f z$1^qqr)~`!5L3d}2?<}b`S`?nw<$d};h77@H~n+t;l^zRrfHPUpho9>668|l+)skD zv)v7RWW?d&bLD0m;9f~1n!-iH&)Snl%G}pBW&+=Y{&OqQo!OLc8b=dOc8ttgjb07H z0-YVewq-*}Hof?1u)cdddJd;Ko5CZF3D_jLR?>MVgX8GOUj)bR3Z4wsnJjiEZfId} zd|6KR>#OT;T<={DH4ujnw{xZgEyeWhlemWY>(Tat;SO9K%}{i!8;{cX0FS|=V|k6~ z_|@UDU8jOog^Gqr#Zhz%nd}w(oBwDSmU>FQu%n3VH?@wWqX9HMl^uD*&;EM@Rc;E8 znIPmd_#YUrp5HChE7W{4m`B%~LUs7y)5xZFp5jMwERu0+dL!bUwx@g+(oVM&r>HY* zE-OEnGhQeioP)C`KI`}*SoiRT7)b#>ek>`di2ZS z#k>zm{90`?J@AY#jS|z^FQadsK`yp%93Ddns&h}MMhw5J_kp$x&AX2Mrem+Ed*?M- zCcRX(sd|OYUZJ`QY}yRXJ_CWz&_6|@Es5o(K{m1Es^M*UD2&6 zNxN{e?xYdXtxnT}G&{O=fobcmJEL1CCTHG{f7t>247d;2a?_rDTY)ZsfFA)r2DSk| z0e%YH1>6mE1KWWf;2z*!;6C7Izz*Pk;OD?EfL{U+01pBW0S^O@0FMGYfyaQyfnNbn z08awDfTw_`foFhU1J44#0iFYP1ABm8;CWy#@B;86@DlJc@ZZ31f&T$s0e%Pk9(Waa z4Tu7L!0W&pz#o7&fqlRqfj^z#-r;@B#24 z@DXqX_!u|}{0%q;90yJSp8%f%CxOp^&w*3G7r>XmSHRzauYqrX)4;d4?%8L$4dBMi zO(HiIZj!i3<|c)kRBqC^8OF_UZbopE&P@h4Be@yHO(r*^xf#RFSZ=bo$>t`9n{nKX r=Vk&o6S= 1:\n", + " try:\n", + " if category == \"audio\":\n", + " url = audio['attachment']['url']\n", + " audio = requests.get(url)\n", + " audio_list.append(audio.content)\n", + " # 获取文本笔记\n", + " elif category == \"text\":\n", + " text = audio['content']\n", + " print(text)\n", + " with open(os.path.join(course_title, course_title + 'note.txt'), 'a') as file:\n", + " file.write(text)\n", + " file.write(\"\\n\")\n", + " # 获取其他可下载附件\n", + " elif category != \"MESSAGE_RECALL\":\n", + " # print(audio['category'])\n", + " if 'attachment' in audio:\n", + " url = audio['attachment']['url']\n", + " attachment = requests.get(url)\n", + " filename = os.path.basename(url)\n", + " with open(os.path.join(course_title, filename), 'wb') as file:\n", + " file.write(attachment.content)\n", + " # 获取其他内容\n", + " else:\n", + " print(audio['category'])\n", + " if 'attachment' in audio:\n", + " print(audio['attachment']['url'])\n", + " print(audio['text'])\n", + " i = 0\n", + " except:\n", + " i += 1\n", + " print(\"get file failed\")\n", + " if 'attachment' in audio:\n", + " print(audio['attachment']['url'])\n", + "\n", + "# 处理获取所有音频文件\n", + "audio_seg_list = []\n", + "try:\n", + " for i in audio_list:\n", + " with open('temp.mp3', 'wb') as file:\n", + " file.write(i) # 确保i是二进制数据\n", + " audio_part = AudioSegment.from_mp3('temp.mp3')\n", + " audio_seg_list.append(audio_part)\n", + " \n", + " x = sum(audio_seg_list)\n", + " audio_name = course_title + '/' + course_title + '.mp3'\n", + " x.export(audio_name, format=\"mp3\")\n", + " display(Audio(audio_name))\n", + "except Exception as e:\n", + " print(\"合并音频时发生错误:\", e)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "例行公事\n", + "daily routine\n", + "睡觉 吃/排\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 18 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 获取图片url" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-05T10:56:34.184660400Z", + "start_time": "2023-07-05T10:56:34.073384900Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://xuexi-courses-storage.firesbox.com/7000102069/replay/78b6da46-ecd8-46fb-a857-423ca6da8196.png\n" + ] + } + ], + "source": [ + "# get all url and text\n", + "import json\n", + "import requests\n", + "from IPython.display import Audio,display\n", + "from pydub import AudioSegment\n", + "import os\n", + "\n", + "# get pic url list\n", + "\n", + "\n", + "with open('audio.json', encoding= \"UTF-8\") as f:\n", + " json_data = json.load(f)\n", + "# print(type(json_data[1])) # Output: dict\n", + "\n", + "course_title = str(json_data[1]['course_title'])\n", + "# os.makedirs(course_title, exist_ok=True)\n", + "\n", + "audio_list = []\n", + "for audio in json_data:\n", + " category = audio['category']\n", + " if category == \"PLAIN_IMAGE\":\n", + " url = audio['attachment']['url']\n", + " print(url)\n", + " # with open( course_title + '_pic_url.txt', 'a') as file:\n", + " # file.write(url)\n", + " # file.write(\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/courses/parse_course.py b/courses/parse_course.py index 1b9097b..93cc57b 100644 --- a/courses/parse_course.py +++ b/courses/parse_course.py @@ -128,14 +128,15 @@ def query_course_by_id(course_id): # return re.sub(r'[\r\n]', '', str(all_course_json[0])) return all_course_json[0][0] + # 保存课程json数据文件到数据库 def save_course_json(ids): conn = sqlite3.connect('course_database.db') print("数据库打开成功") c = conn.cursor() for id in ids: - if id > 7: - continue + # if id > 7: + # continue token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI" json_data = request_date(id, token) title = json_data["data"]["title"].replace(".", "_").replace("/", "_") @@ -143,7 +144,8 @@ def save_course_json(ids): updated_at = datetime.fromisoformat(json_data["data"]["updated_at"].replace('Z', '+00:00')) # 插入JSON字符串到SQLite表中 - c.execute("INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE,REMARK,CREATED_AT,UPDATED_AT) VALUES (?,?,?,?,?,?)", (id,json.dumps(json_data),"COURSE",title,created_at,updated_at)) + c.execute("INSERT OR IGNORE INTO JSON_DATA (ID,JSON,TYPE,REMARK,CREATED_AT,UPDATED_AT) VALUES (?,?,?,?,?,?)", + (id, json.dumps(json_data), "COURSE", title, created_at, updated_at)) conn.commit() secs = random.normalvariate(1, 0.4) if secs <= 0: @@ -151,12 +153,14 @@ def save_course_json(ids): sleep(secs) conn.close() + if __name__ == '__main__': - ids = get_course_id('all/course.json') + # ids = get_course_id('all/course.json') token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOiIxMDAwMDgzNDciLCJleHAiOjE3MTkxODk0ODQsImp0aSI6IjU3ZTJhMzdmLTMyZGEtNGQ2My1hZjQxLTY5NTRlNmU1OTg2MiIsImlhdCI6MTcxNjUxMTA4NCwiaXNzIjoiYXBwdXNlciIsInVpZCI6ImJlMmViOGIyLTFhOTItNGVmMC05ZDAwLTA1YTlkN2E2OWRiMiIsInNjaGVtZSI6Imp3dGhzIiwic2lkIjoiMWI4ZjE1ZTItYjQ5ZC00MmRmLWEwNDUtZmQxYTUwNzI5ZjkxIn0.IO7C2gtsi8lMdrOgWGNuxK-t2zzmDPvmI4BqISHeZEI" # json_data = request_date(ids[0], token) # json_data = query_course_by_id(488) # get_audio(json_data) # print(json_data) # get_all_attachments(json_data) + ids = [489, 490, 491] save_course_json(ids) diff --git a/requirements.txt b/requirements.txt index 07296d3..7a9fed2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ requests>=2.31.0 ipython~=8.24.0 Scrapy~=2.11.2 -pydub~=0.25.1 \ No newline at end of file +pydub~=0.25.1 +tqdm~=4.66.4 \ No newline at end of file