From 87a2514bb395dbd6a420b70f7c75c2758cd090c1 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Tue, 29 Jul 2025 19:50:47 +0800 Subject: [PATCH] fix the document problem --- api/__pycache__/main.cpython-312.pyc | Bin 3233 -> 3233 bytes .../content_integration.cpython-312.pyc | Bin 7828 -> 7634 bytes api/routers/content_integration.py | 70 ++++++----- ...ontent_integration_service.cpython-312.pyc | Bin 12250 -> 12278 bytes .../database_service.cpython-312.pyc | Bin 45319 -> 50123 bytes api/services/content_integration_service.py | 3 +- api/services/database_service.py | 109 +++++++++++++++++- config/database.json | 7 +- 8 files changed, 144 insertions(+), 45 deletions(-) diff --git a/api/__pycache__/main.cpython-312.pyc b/api/__pycache__/main.cpython-312.pyc index 834e7c3d16f9e450ab31dff484181a08d93275a3..a0d2a6442db42dafba7069b486f0f438db13d803 100644 GIT binary patch delta 20 acmZ1|xloe(G%qg~0}ym=?%2pZjRycYrv;J# delta 20 acmZ1|xloe(G%qg~0}yOqU$K#U8V>+DdIhTh diff --git a/api/routers/__pycache__/content_integration.cpython-312.pyc b/api/routers/__pycache__/content_integration.cpython-312.pyc index a923656e2337409e7a51b96ae8acec90c3c255f2..d3bd16ee5e21f992c6815264dc62c8bb05e82712 100644 GIT binary patch delta 3001 zcmbVOYj6|S6~1?0(ptzemY=fT1{+~yLu}q)UJe9o13utnSd6{J@4zr!z|MOdkAb@0Ge3sh+G15=VhYE~F$Pt(RwUQ%>(egMcTgYAJ zI2EJB`5ehn9L+JDhSPHFJ4~F8(SC#6OdCnUpMwDkv()WzN;0b9{CU`C)5WOEb5#FQ z5NB8*6+T;2gmL79Su7LR#29}Go*FclPzLycAO`)GbSTDel}}KnYzQQK+J}U{6i3*q zM;ooGZF)QfP7~8WEZ~gzPh|p8p^j73EHq7F3<^5QEjLs7Y|$ye6~}XDVHMGAPGhPevM%6Ywc0rMVzzdMKX4b zkS}ReAb%ZZT=mQc!SR(8comcaX`Z@f9uPLgwa*p5Jbk>`G40GkOD=T>XW(+=0c~+` zu2iAgVp=sm+C^Q?RgdE^gYwf@Ci5emcc0bb%2 zMvNlJuBZv6HF>Oe;;n&)e>gqyLI1;{Ym=y||CrWKnRBZKxITVQFr8Zw+ zdsL4$O`Ln9$$MOg)(Gjc!6q@*XipC1;}sd_scrN3b<3= zWO&J3s<3LhqN4GHLlP>9LW`)(l00Cr8AvZLDQwUeWW~bwcQusMTJBe2>7BC zaD^3JhHSVM^^L+lTc;Q@R5?`2OqLKa6nHTjkVO3d+5YU2|CY19XB7s-!h&9L5Z%p> z!M6uNstn}j9o&{8wn4f9eyPhH*O|w3=47FDyl}z&!Uer#iYS{~@TJu;u>a)#-Yv;O zQ@{7bfuz%Q-hI|RNDelSIhPN?QRk|D%@hNy_JQ3ecaK}#qZaoC&qslaficVKp$(&! zp4#5ce=c#P7*Me!S+z9z(u$-vlw7zt>8koluPHb6>ry$u={|2eYa2BF)^f#iof&g% z>1Q4mJI0G^MvH5PUcOm&zqmQ^+TIkERcw7wSu?$-I)jrV~wf^IT^H*_-Cz3QuPmfKE7l^@XO2w_^4}0vhXX8g08# zT~j}gx>=4PZZ2fEF4NqsuHCvsb888W^jphFgmh~qy^UmV)ncH}8MfX=e{R^It2fcZ zrXCv2hHVtW8=l8D7&XK0l7?)}h?YTkBiS^vj2JQYNCDeeNRHU58*|Cqxir#m7m|%8 zaNEQ*vE*$>&GrK3PCi8XodULrV(-{Wwy$IFtfH}H9gQqsP+0$h#c17{st&#_Aaz_d z7TkjM`KSonU5v02jlxnasR6Kq)6v2e(LEOF{~AoU4) zflw!>N{Z;6NbQ&P=69JgY^;=jWA3ZRMA63?j!NjJ!2A^|5=IwA`lDQk*J18yZUP1n zFskq>7U>kUYl2}e8sKY%R=Lm8wO|i~4KOjkHU%=;rafP0v?nAWHp3K=HmBCh%dIUG GhWRHV0sxx; delta 3021 zcmah~T~Hg>72d1e)emS9mJsL%OF}}nWMeF2FkoUF>|kRAn2gglz@&`KE<&>YW_MLc zE0v6$xK4*oX*UhQZU)@AV|z?nJTQ$r%}YGvnKpne89Zoq`jCe};)hOX%`}3KcpL?xPmmDG@FL-g+$!|rwWcZjmMk|>gC%iZle3{)q3{n< zhs%OmA&HyhNg{y<^$5}sw?NJbUtK$62Jv5 zy)aAa2$;=-2B0bnJ%kQ_CyxzJX>O@tX5^(Wm2yh^BE@TT^v7?c653j^>wDjTs4;a%7uaA3SiYF0VMb|%SZj%bzyAceh3 zqg9?DA&F;BJYOco(F$6Le7aP;L&6eI&m@YP%9C1q9?(RR{4qHQ2C*E2T5J@I+}*;% zxt+&pTLOm{VjGVxc&1Rsk<`}ATXNQI&)Hsy|{Ai_7}6i6?!P6aFg;1|Dw(f ziwZ8@*T-<&E24@Igc*+ag=3;7#zqksi1>REDpG++EYA0G@o?D3zAjR9v@afJB7AQk z(jOI-LjzpS(gq?t<7a(5L-+PYp&j8xier3i-vAo-2mEdoYlDnfv>U8_7)f-z*m5MC zNbEuay^A8nuxym=g{(NApNS)vm5Xn!{Qgg?znWbcy|j91bmgZLUw$-_t8R7dkB`S@ zzx?R38;D98Qb5dqNIAQlVJF?|Y;m?b&%_Cs!}Iy|U$QP}#Px|ST<)NtG&#)qyEsjNmM4aPUk;wKE4>1weApA^IF^rxF7Tsgkj0F0VVRs^7 zLBfiJ5(zgHBJO8+5syR%MI}tupWQV%GZf%>PL#(Zp+MvuY<&T8&EuJHthYZ9VmJ|p z#lfAI0Ja)A07#!@3|u_Ki%OQ^V$lf4WS8O4ABu3Y@YL{8Xs}2iM%+65bNK@M`VDMt z7p5(GqjgQz0mwG+X})$jzw}{#X~t;2)S4-=ESI>JN?fD(noPZ;_%mbKvavdCtiIDc zZ-21+ue+C9yVI@Rsh;mI_xRI2{>9dTR3MmcJ@?2MO2uCP%R`jKiJU)0lP~eL7*Wuj}#*1(#aZi!n;~bMM>Ul&SH~4^k(-mp=MxsrfsxNn8r<$t&7I}kkj?UR}ZHuk1aZmKj-vJz;h(q3~#rk z%%17uJDT)?mXzmI%GkQ3_I|1<5#G`6r@m^Ip$4Xnq5rJs2@YKs@$dBr5QcRQkQP4B zO%E@@%|DNK+QG%0?G@OyCg3fVee7&fdo|?U14=l!TYu~boVj-hwAB&!YF{$L!6#Z0 zMtowZZfhXs)F^sRLwb#>xgr!XSE_2OA?KV;`E4F@-b2E{d<_A^<{MP)Zp8v&ZZB6X zSW$|FSIbERxlxXV3RTBGe4*xGhYP>&BB6bMAKu}??tA2&R{VZ#_2~-vgS`OS4=Plh zCe?!)bEjUFDkLG2(vuKLnX ContentIntegr try: # 创建临时文件处理base64文档 if request.documents: - temp_files = [] for doc in request.documents: try: + # 从base64内容中提取实际内容(跳过data:image/jpeg;base64,这样的前缀) + content = doc.content + if ',' in content: + content = content.split(',', 1)[1] + # 创建临时文件 - with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.filename)[1]) as temp_file: + suffix = os.path.splitext(doc.filename)[1] + if not suffix: + # 根据MIME类型推断后缀 + mime_to_ext = { + 'text/plain': '.txt', + 'application/pdf': '.pdf', + 'application/msword': '.doc', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx', + 'image/jpeg': '.jpg', + 'image/png': '.png' + } + suffix = mime_to_ext.get(doc.mime_type, '.bin') + + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: # 解码base64内容并写入临时文件 - content = base64.b64decode(doc.content) - temp_file.write(content) - temp_files.append(temp_file.name) + try: + decoded_content = base64.b64decode(content) + temp_file.write(decoded_content) + temp_files.append(temp_file.name) + logger.info(f"成功保存临时文件: {temp_file.name}") + except Exception as e: + logger.error(f"Base64解码失败: {e}") + raise HTTPException( + status_code=400, + detail=f"文档 {doc.filename} 的Base64内容无效: {str(e)}" + ) except Exception as e: logger.error(f"处理文档 {doc.filename} 失败: {e}") raise HTTPException( @@ -70,8 +95,8 @@ async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegr # 调用服务层处理 result = await integration_service.integrate_content( document_paths=temp_files, - keywords=request.keywords, - cookies=request.cookies, + keywords=request.keywords or [], + cookies=request.cookies or "", sort_type=request.sort_type, note_type=request.note_type, note_time=request.note_time, @@ -80,36 +105,7 @@ async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegr query_num=request.query_num ) - # 转换为响应模型 - if result["success"]: - response = ContentIntegrationResponse( - success=True, - timestamp=result["timestamp"], - processing_time=result["processing_time"], - input_summary=result["input_summary"], - document_info=result["document_info"], - xhs_info=result["xhs_info"], - integrated_content=result["integrated_content"], - search_config=result["search_config"], - error_message=None # 成功时无错误信息 - ) - logger.info(f"内容整合成功,处理时间:{result['processing_time']}") - else: - from datetime import datetime - response = ContentIntegrationResponse( - success=False, - timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")), - processing_time=result.get("processing_time", "0秒"), - input_summary=result.get("input_summary"), - document_info=result.get("document_info"), - xhs_info=result.get("xhs_info"), - integrated_content=result.get("integrated_content"), - search_config=result.get("search_config"), - error_message=result.get("error_message") - ) - logger.error(f"内容整合失败:{result['error_message']}") - - return response + return result except Exception as e: logger.error(f"内容整合接口异常:{e}", exc_info=True) diff --git a/api/services/__pycache__/content_integration_service.cpython-312.pyc b/api/services/__pycache__/content_integration_service.cpython-312.pyc index 8af7c34d6e78dd8cfffc11a3abfc000f61b86f85..4766af2c1daf611ef76c02ef3fce522fc8859b89 100644 GIT binary patch delta 684 zcmXYsO=uHA6vyZ7?Du9jO|scdV(KS_)Ko?E(4x{W!5plz#*;`B=pHIoCDBVUu?I!k zlhFY+cnJuCw2Hb2q80I2+Tw;35f9?QgBNkB;zbJ1Y<&ao{r+#}J^u5#^rxgR>$-|) z6kg6w{&=px6`@6z#dFyjSJkda?1n{+n}UV-s`m#(QGr02aNE z_IDndR(K}8t)7o_xs2Eu~NG-y;h#^-Kn+mg#e);ZkVH`3?r;0 zN4eh!DMvYPBkS-pV$p#2N+IT$Xek2@;+M9J8dR+G9&~VIwR=RT;=}HYBV(UGMA;t% zI?_f!xs3&uWxW+sPzIc@$ zTZDCQE`OMtInN2(zoxSHi%lgEmP1)ZS@`Rxsq&5zEz` G)cHT-i?Wvh delta 676 zcmXAkO=uHA6oBXLPxddn`P-i)8g12>O0Xd2(AwBgz4Tx(As35c3JFp}ZA&eRkhX#f z=|OCngDKt=K}b=`qDa+4(Mw6iP1BQ9@gmfd>klG$adz_t9^dz7-kX^}#g9d~Da#@v z(F|W3Yp=?0JW$6?&t-LnbE-~MsRnci=eT1my}~poJjf@2#ZS2+c(B4Jfx{LN6%T77ViQbo+2hJWKm42^=s& zID1II55;Z>;CJFA1aV60576b%irQd0uepWF8lICPpyRrWSKk4br7`K_*}d|CYTrew<+^$a1+>RiVQYaRhoh|^SRyw@XGvb z>@t}<!p_doi{b#Jctd%WpkCv?Rczg7U zRh+QY$!&hhjKuQHJXH{3ogEJ3{b)mnT$5JPK;I;G&51h%LG3MXF)T zE-DWCT=Y8_a50FqupM$yMMl)tT-2QdhKpe&Dz%cbc)T{Yu7^mXlpm?&VY-^lG!B+0hc5+ zLTikAW7I|?CNW}T;#%4S)8qv;P3>#n%MxuQ)3h&b<6qy`7tpj#+Am+;IkP{^;*!`7 z$(cLnp1*tUx#ymH*ByV9{c4AN+OKpvEd`JDr#+i&hbih$_z`@VK;zbr_e|Tco~2o3 z)6^|7>tkq2MsajwW{__D&BeV;`@h!f*=Sb98CW$JU$0;_-OTy~8MTyRwcQjO^|CBL zWo(mKbbE&O6rP|3W-D)Naam|VSM6-BcQmZBHQO3EUWnRa+h*lCH}BwF0>g4mwl=bh zN$@0qj2+YtdOy`e_fYjRmhM(}$~t6qlttE>2Ax;;d9ObC>XkRh)g#BQpL+hrjwi35 z+IRhh@7*}&y>hVM9FhR6VjDsz+WgM^yzci<}hI zNq5kwvo=g3x-}gocEDKo$;r`sljwRpeCB? zph(qy21T#BQA_TP(XR@aQ%A8>r@U9ysc`Fp0(vz;wi;?f3|PcyV2XOx9aIC@_B{QH z(m{2|UZi)?b{S38Q3?u`&61<+pp5bNvlOfD)d19V$e*PSD}%CcGbue}QWoW&5)_b4 z@#{L39m-BQ@f}I`Os+)H!LUp_uz@O|pP-8A)j%(pw;BA$C%bWW#HY~^Y>mG=6cQs9 ztg1Z@oS;|-t}=Bhw<~uJfY0E)mEKBildq$;(iUxP>s&H7KEw0rn>(*~4?|!;cwBwP zd!^^_mE%9Svh$h3!ot=H&=3lu>u*1Ib=R@0r~4)lUU~8K^`9IAG1uRI`pUD`*ScQ|l3St#6}!>e;&e6%$|h$6E*Xa7c_%OEtd3@f+ZybRvo>&UYrUE|TAOWKIIC4)cw2J=CukOLvvVzOhqGBwZME^uj^+kI;dX4{1jg0G zaV-|Dpm1?b^@762H@F1lW*~z1lqQ?YZRJ9|I(sY6bIqU~7p$PNx!v5B7PmliJjSh{ zsBdky^U*L-lnaBqty2wuDe2)r~K?}#X&qOmf+FYEN0@?~Inw)l9lWTq<%h{d0 z&F$pJQ30MLlsOHY`*|1jvEI19VsFJqF$w#l_eA?rAGj2=ZX`YHa(eM_dhx~dS-TZa zM~@gy`#bh_d}K)8zjDt?UxxLP!SmpVDgCl(=CEm|x8(K8)0G!ZRX#)2$Eg`TwIeAR zJ*z%3nfpv1mb*hSE`nN-6v-fzs)pTG_kO zXUgxdIkoD+^>7dHp4)Dm^DEFY4#| zqUV18nVw3_xJfaw`6D^GyKB+N+C87gBv|H~Q?X zZzJc+s=s7t7%`^xnff0(n*P4A=%d7xBie)7zRF99#UqJ1{jtM|c?Wb~eLflmlK&Ql zzxKNT<&pz<@IkX*`IK;td_HvICgH+m2s-|%Ffqo_GAcUWr!epqK-%H|&=VDC-Fit$ zkw@rM52s`qP6fM{vRkoU&8oVo^%~gEw44&KGISaAOTNT?PO&};cE@N|%SN4y4(@ZT zj?)3{6e%_G1ILtZMQ|foKQ)ZgBih4Gos>03!Wt9C>gSA|lrv7k85@C9kDQ)}_8KIt zaS>SKIbaU>Mjv6k2@-xo1b$wi{Yl((;;5(#Hc1kTz1^&ctvOC_wild(q39CIX`(;{$IvpCc;ak!}2 z6H2q<%2%>;CGmY%1Obubd!9tCB?4FZWM1;H6%y9G2&|D6R!X??*%}rV%;yr>1>E#{ zCA$z#LIn|I-EKw_707s15`l$nsg}iLEweaX36qT5->vULrL&*vpd}{?_#$Up3}iG@ zOCEb9If0c2cOoD{Ch;MEXfYZUEF6EC+SlK z`#MOGC7vuyQ|EydSW<{j0~P#ZqYp9A6FQQ#=`2&oRfhKn>fp{UXt2(;x*YA?4Wtv| zf``L4YoNk(-M|3~x`$dhzRik-tFzU81J8p(tjk$17Nne;v(`JfCRR{8Ts2%1d<%M; z-R*c7%S@;%S?j~LCJ=z7xW&T=N|(E>367r{TPy3}n(fexcGw%FVp?|>S$))K`RqTVT3ck{NbR=aqx73A>E7efy}3&Csza}dl$fF%o$#~op+ zzgRE8YTa(LZ{nZ;!h!guYM`$73lz&BsGr3$i2PES?U@Hut{(yDqDH4s=>p$-F=3g8M+6c*zBpzL>Qf zk>-z$rprdlu+ehSm=E%!N7a-uX(Y)sl9D!(m<)}vD5Y83qxg+FMXbmK4W0|cBZ;7d zLw_sZY{y9mK3I5)QU4Acn-?cDEDei<#OjTitiK|Yv*KSUrtnK~C>k#Ya7n~;(Bq4t zQ|?v}vp$9NeMd;NKl9}PYAwz%y0T{h2b zVfZQ>?m`532p1HN=6WX_4%=EdI30+QS%dhP&>3?J9OCc*2~+86ZE}mJ1w819k;;ws-Sz z#-foN3n1WtHM2h}^+x#;3i=)W-yW2`Su&*jS;b(X&v5r8_41La@jW{KnE_I(#~^z`j`2sj z%KTZjLG=**1^G~QhE5_xJzvp5H5$mhT77)XAz6oljlxup7ciab4t1lE#CD}Jnhs54 z5}_MRjJ8ADXhQ0#v66cXZDT52{3NSsijBiR;?!7ldJra-mJqH6a!iv!=OCjlah$O& zj$G4BGvRcI4_wGB?2K|}1iu?IQK>H4tqx&C8CCHT*$Ev{puyM?9eynXS1NIxQQM<- z(>pi$Tar7KWO7VvX80lyOWxD=)9c8#C^P*4IT&S*v&aIe8!sEg+($l$+8xX10kK6d zCNzElGJ2wuwPJMfb@+KaI$g`BK+Dhb>*)LnJ{dj*jhnaGIlRaav=E_?bn)_mkB2^s zPK-rChbcFdB>6=suSyrs@LAZh>asNr(BQN2>-)M)C7xjU1oC5OZic37Y(UUO(M;x% z=7Y`T>Xg4MpCdb4pj(`yK3mQJJV#ZPES@7fUovxXKJ!j)4zz!sqePg`EH2Ufyhw%Z zIhv&u^DZ@eNjdXwIRkBe9mpQYiJ3Q^!+DxxtVzp>HB>o0_y3Oyv`(+SFN8G3BDS!nNf(iiW>7wf_PSZX84R zfDquBoeVj>G@X3)j6tLBWVSPHAc@H0XUDFAn`JHQfW6(>Y~$P7^GQ>DzN(!uSDJHN zp8c5le3u{k4_%1=EQLH$DPtYTaO`@ z56QvgB?g%bt0c^R!TfhOe7EO_`H%dKHN}a!_$5HPXsGER*QrSrSCEmsc!<5c&GJ)KY1LZh zoK6Gna|s$ipED;RX094A=V>)y&TE-!z2^K>6=IAc#>`Y(ROfT#*lQ8tbWM#>etvdh zO`QClIHtxpFe7n`jP53flh@Kukk69u-GueZMBnuw4rfVR%9A*&7yIwTS<8X2J%fxv z+I!EWi+K}1U84ZREg{A*@MqJb3Nj}?N5yv|Me%_fJTgdFAF4TL1it_F$HX zXf5WPaYHSE3EhR*JF#&EYEB0mPgzcH4e}0=gMRyZErCOJg%4x6BWZ<)D z@p70sZT>6E!{sEMbG1Lr_anmZ6+%zXOwC4(m{%~s z@$ql+s{qqJ4eUyfcLgdC131I=IpEPF13l9l6j3;5{6PdiB>yO~(9e?WVvF(+A`X+f z;=HBLLDLe@d<@a!^KleEpGWWlf=TSCg}(NzK>Vo4r^V`oVzlE6z@r^hjNz${?{^%X zNA^sgN0!bUpkEyL=gfF{m}#ccLNIN4X~?v;(s0v`qXqw}Y4@OM7~1hfHz!(GD|HX) znZBAlJm(l#lTv01w`Ng!5m@6Y4_WhkdAKzv{vB&pqcz#0qsl5`5;mhX{{)=WN#&xG z4pjUU%&M)tfe}?!RVd?Q2R2o;)1C>DrNpcJW|lvK=0u9EKI{ku7%Ww>mc*Me{v?8s zLj^6=t*ir+IsEj{V&iRFTob2WOTlkZVgQPO&n3?4DqRl_V-^4yM&iI*)fH@2ba3xSx&8>` z7>A_l!6gGTzRf7c(`SWsCH*exwaza71)3g6-T!$<@;|jM)-6Q&e;SwnG5yPdOxsto zaSaxBA!ul1!!`WZ9cl2fD|EIABUnnD+yT&{u4g2kjCa(4mi3NsEx3t%v!8|>)w;)S z)AFrG)H2#QOQI#enTO5hx#rcydqHy0TNm$0OS*Hm?igy3Y-(S^)?#fjhUce8UE6~x zoN?z>N>ljda7{Zo}2Is)H*R?&lPjgU)`9nPqfaF>w-Hfi2Fwg>FP`& znzo*>O;rfj9y#ie=3W4SqT~PCbh=L_jCv`#ryVy{_{zX81wR&!%AhzbBwuWQoqlrQ zwCATXWjD~YJ4sP3V#zDiyW;h9Z3{4CKJ{95T9H zH4xpMt&n=Py#kaZkh-2F5JNxgSrZmR6I4YWBuVd`qnn4SBr&v<{HPbbY5V>nc{C1& z{|G>B`v!4Hzkae&`3WMgkcwUT6Sf$CQWXo%31v$kvV^vlU)FTB`dF4j>w`qqwKcd+O(qd-sw(lce64$Wq-7|6u%KcJ^~H_Zn^y32=Lg#eB`7w+#Ww#se!qd>KM0VWUrZvX_QzR78G^Isz3D+GB6EC_ND6a>K>Xt%2w5YqU(yB=lF0Qt!zC+kA}+uM zt+8AwS2}RXJ);!7z3M^3pnYi8;HEQa!$r$(A}+uNt+7S)L_WKpZPTE8sQQ9_XxW+V z!=<%f&z1}$S9t4AYevB#2{5>l3-Swfm}m`l2>1u;kii_gh-$@20tW8rq946^cnit{#jmmf7#Jy zxM96X0WL5!XpK#uNM6%#=(l@k^=~?nHk@5HfqZnjin;3mdy9hCSOTzmLXnaNvj=B+ zczL%C$xnCQ#AZMdw8j>O8HQvKSMVBqV90)E!#7hyJR$Y6p}I2=e9#nSdq!o@j39iL zpS*0S`b_0Gxu3cw%oH)~{`IJXxacqOdgLeO`s2qSSsru2!mo)S)Cj|Y*KNxKZ~>n| zYeJkGfF9x;UW1#4+!v~S_pZ6%S$1~&@cgwnO+iLzh0iKj;?@6KMrQeqylb#zNIq0I zq&_o$c;@o3RfUUKx*8!WSDea)sLTyT8MMaMgn2vxT$kPN_Erz-y~|E)AI_V9lLA~| z;LsW?l13%^31S6a5G(A3S-#bE7s}2y4_DqFW{Sj;Y=81XJ!`z#Ue{o?ciZX0;eslu z9&+JS=F|!6h5Y4IQpI?dfC+;kKm)V~LbX*@i& z3+jq3@C#)VH=oCf1^%O4>i~?(X__8W%W1_eEk%?6_Uh%oqfF!v-uRp`Wf~pdCx_KQ W$9tJu`1<=Inx9H1-lPzUqx*mVK_d15 delta 6670 zcmbVR3tU@On!h(M!Xp%#5Z-Snghvadr7v0w6lk?YDvVmRP1D><356uy8*HJJ)GgXN zPG5dwx1(dl)m=ebe>kaK(6NSg^oQ%r?l8}Ke~#nm>^j|D$5eJ_+@0Ode&0<(5(;Z~ z!>|8)&i9@3z0P;ObMEc?N0moEQ53ze)5S3O`>$U;)|qjfVLqi!@TUqiX1;y8XlIRr z;n}XNATRXf6JBBcz4k-})5LI^A%=^3MG>Ga?o*hwr$0JvGO?1b-eK>s?cHXvTlVsT zH6(*!WGj#`b4>}2(OK>xrd`9S`6yn?M|UVvk;7@idmSn+YDm={!)f_ga$HAFg!5nHKQnR=v zbEri~&E|?1Tsjl-bGVWP?}<;oTmQ;z&RZfQVySh5CEa2fLRxe9sye9Kvh z{DJ^KB8A-n0n2efA-6#`p=tp!8g2zH#gYZWy%Hr9an%c&xC;5jx6ExI{}tS7WSY1; z>CkZNP+jT&BC-Z&mGz~VHiB1tg|Suxapq$_5pez$HR%s-8`PNA_l_qX(k9k|8%GO>-z_Af_u7 zZYcDN76(fp%OEYC*jkq1^z;XBrkZqy0W2r9rq3{365L45W%FsGE;Tf#_7wZAUH;a7 zc0Y4Kxr5oyn$qD$T273lF@1zhhPTsmSR;I#o|Acef>|dn#y}ZjTSoWK(a*3A`%}&8#(q!>nA(zta>dn>J5tQPeJv zZmd?nQ@ygWQvGhF8rwoLsnZC&t5>jl8gkaM$=4}Gum2f;0wsb;D=8eJPzYyotMrl* z!)J<;y_Ba4_3&ZtD!-uxa7R-TEX_+~?}m+eM_4_4l$YmeKDbQ*>k11L|DFZe8A)KP zh))=0r5Mg8N5LZW^{)bFVJV!dPKFDGh49nD4erqxVG-&UmJlo_C?%lS2;~Tpwu5(B zI~{gjD57rEev4qY+4l;|uxHZv?Q5pd>j>nSqLX58w@aYyEou;QN*c*;e?hWV>7TEz%?5UX<$hmpl^gBOq^N z_k%7yK!jPBg!`#GuRn?8kKc@-E4iBj`==E~OH<-|ajy7#1lbkObm=3iXrTrB&27SV z=&N`DZT)%0*3H59cMN5S3|_^sXsGZHt@G;1Dr}SDEjkcy$?F07t0GcI!AdEyt(R9 z9O;&{q7#?h=Gk4}uYfiqms0J z@F&u(rs=~{N%XI#*q@JvrY8lv|Af=U|3HwJbXVOU*!`Yw)F&wu!Yo8?e=!VJ6lk_I z=M)P)@M2?xt-otbY7MvbAer=kwN*}gkS;b~b5^3e39s0}3xSdC}<3 z&FRXBu%5W90>k?DU16pg;Sbe@d&5l~{QsD0ps-qcIU8@s<0lYTxY1>V2c6p4$Yznn ze+owS)Yh-i$j7%`S7LIC+n2^&M>Zo+!S4SMYVS$)yu1BAc20SbG#00mw{yZnG?uF8 z+@uwVHk-LvdlQK82*FeLD`b>@oP)_NUSwC(08DA|3F-4I|5Z=7BZ3 zy%92h^FRfwI*H>v?+vRIkZQgYW5k(Xy+3jT-%bG3{s7Fn9kxBxK||25uH7qjXUK-P zkcfW8d(Q9XXk`+)i%Ebq;oAse_!Nm-r#nwmt_G&S`Q2;bLH;rJ41CJJt|wbftgK$d zVGM=um5ybKD=d82k-LG#6*_H25$oihHQ)=WQ z?19N=k87xk374VmTF8ZtjPs|s(R0sp@A!@FUk+I#*--OB&jKa(@(WG65}JF(AI^Aq;X(~Gzj!46 z3=*VRdgto4xOk86(GCSPy!69RsYxAa*0hq{ZO}7PCx?49P|@fr7IJ5yiuv)#7F`|b z4%?bdaA@?BO1|B`H~cRw0LcxpI*6bhG@QuMC8}| z7<`Ov9&hvJdqr>Y#IETTTW5&oTLRhMRMQNTjqN%Xam4DQ&RFNT%UeIC_immToUU#T z4uFGiWtPq`X=&oj3{;SS#E~GhtHUW4v0PmA7cl+%GVCfP}8@OB?3kXALI9XtrI)L=@^|X zby3RrW^d~RMlCd<%0FSXw|-*1pT3EuaFqq4K@nInrW{@4kANX8{%cT@93?mv7GDFJ zf$_!1c&FDjRsZAdZBv^k2d6jOOTMJ|%HXCqwKy=779-!k7{%i&yh?AYS39v`dikBQ z`WtmD?3-MYJHsr&=(Mp$w54FgHC8{aAKN@SI9;-V62R!IfR85AGJ+P$

clj4did36|hijf=(3Psyg{**XXJh^MSrtRM-CjR`S)HLaZ5Ah*I(PuIG z)KWtES^;ldsk=MY>Ja#)SmNNdN!< diff --git a/api/services/content_integration_service.py b/api/services/content_integration_service.py index 043eaae..672145f 100644 --- a/api/services/content_integration_service.py +++ b/api/services/content_integration_service.py @@ -78,7 +78,8 @@ class ContentIntegrationService: 整合结果字典 """ start_time = time.time() - logger.info(f"开始整合任务:文档数量 {len(document_paths)}, 关键词数量 {len(keywords)}") + + logger.info(f"开始整合任务:文档数量 {len(document_paths)}, 关键词数量 {len(keywords) if keywords else 0}") try: # 确保输出目录存在 diff --git a/api/services/database_service.py b/api/services/database_service.py index 8d3a855..641fc3b 100644 --- a/api/services/database_service.py +++ b/api/services/database_service.py @@ -12,12 +12,46 @@ import traceback from typing import Dict, Any, Optional, List, Tuple import mysql.connector from mysql.connector import pooling +from functools import wraps from core.config import ConfigManager logger = logging.getLogger(__name__) +def database_retry(max_retries: int = 3, delay: float = 1.0): + """数据库查询重试装饰器""" + def decorator(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + if not self.db_pool: + logger.error("数据库连接池未初始化,尝试重新初始化...") + self.db_pool = self._init_db_pool() + if not self.db_pool: + logger.error("数据库连接池重新初始化失败,返回兜底数据") + return self._get_fallback_data(func.__name__) + + last_exception = None + current_delay = delay + + for attempt in range(max_retries): + try: + return func(self, *args, **kwargs) + except Exception as e: + last_exception = e + logger.warning(f"数据库查询 {func.__name__} 第 {attempt + 1} 次失败: {e}") + if attempt < max_retries - 1: + time.sleep(current_delay) + current_delay *= 2 + + # 所有重试都失败,返回兜底数据 + logger.error(f"数据库查询 {func.__name__} 在 {max_retries} 次重试后仍然失败: {last_exception}") + return self._get_fallback_data(func.__name__) + + return wrapper + return decorator + + class DatabaseService: """数据库服务类""" @@ -29,7 +63,46 @@ class DatabaseService: config_manager: 配置管理器 """ self.config_manager = config_manager + + # 从配置获取数据库相关设置 + db_config = config_manager.get_raw_config('database') + self.pool_size = db_config.get('pool_size', 10) + self.max_retry_attempts = db_config.get('max_retry_attempts', 3) + self.query_timeout = db_config.get('query_timeout', 30) + self.soft_delete_field = db_config.get('soft_delete_field', 'isDelete') + self.active_record_value = db_config.get('active_record_value', 0) + self.db_pool = self._init_db_pool() + + # 兜底数据缓存 + self._fallback_cache = { + 'styles': [], + 'audiences': [], + 'scenic_spots': [], + 'products': [], + 'materials': [] + } + + def _get_fallback_data(self, func_name: str) -> Any: + """获取兜底数据""" + fallback_mapping = { + 'get_all_styles': self._fallback_cache['styles'], + 'get_all_audiences': self._fallback_cache['audiences'], + 'get_scenic_spot_by_id': None, + 'get_product_by_id': None, + 'get_style_by_id': None, + 'get_audience_by_id': None, + 'get_scenic_spots_by_ids': [], + 'get_products_by_ids': [], + 'get_styles_by_ids': [], + 'get_audiences_by_ids': [], + 'get_content_by_id': None, + 'get_content_by_topic_index': None, + } + + result = fallback_mapping.get(func_name, None) + logger.info(f"使用兜底数据 for {func_name}: {type(result)}") + return result def _init_db_pool(self): """初始化数据库连接池""" @@ -57,7 +130,7 @@ class DatabaseService: # 创建连接池 pool = pooling.MySQLConnectionPool( pool_name=f"database_service_pool_{int(time.time())}", - pool_size=10, + pool_size=self.pool_size, **attempt["config"] ) @@ -90,18 +163,16 @@ class DatabaseService: return processed_config + @database_retry(max_retries=3, delay=1.0) def get_scenic_spot_by_id(self, spot_id: int) -> Optional[Dict[str, Any]]: """根据ID获取单个景区信息""" - if not self.db_pool: - logger.error("数据库连接池未初始化") - return None try: with self.db_pool.get_connection() as conn: with conn.cursor(dictionary=True) as cursor: cursor.execute( - "SELECT * FROM scenicSpot WHERE id = %s AND isDelete = 0", - (spot_id,) + f"SELECT * FROM scenicSpot WHERE id = %s AND {self.soft_delete_field} = %s", + (spot_id, self.active_record_value) ) result = cursor.fetchone() if result: @@ -114,6 +185,7 @@ class DatabaseService: logger.error(f"查询景区信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_product_by_id(self, product_id: int) -> Optional[Dict[str, Any]]: """根据ID获取单个产品信息""" if not self.db_pool: @@ -133,6 +205,7 @@ class DatabaseService: logger.error(f"查询产品信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_style_by_id(self, style_id: int) -> Optional[Dict[str, Any]]: """ 根据ID获取风格信息 @@ -166,6 +239,7 @@ class DatabaseService: logger.error(f"查询风格信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_audience_by_id(self, audience_id: int) -> Optional[Dict[str, Any]]: """ 根据ID获取受众信息 @@ -199,6 +273,7 @@ class DatabaseService: logger.error(f"查询受众信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_scenic_spots_by_ids(self, spot_ids: List[int]) -> List[Dict[str, Any]]: """ 根据ID列表批量获取景区信息 @@ -227,6 +302,7 @@ class DatabaseService: logger.error(f"批量查询景区信息失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def get_products_by_ids(self, productIds: List[int]) -> List[Dict[str, Any]]: """ 根据ID列表批量获取产品信息 @@ -255,6 +331,7 @@ class DatabaseService: logger.error(f"批量查询产品信息失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def get_styles_by_ids(self, styleIds: List[int]) -> List[Dict[str, Any]]: """ 根据ID列表批量获取风格信息 @@ -283,6 +360,7 @@ class DatabaseService: logger.error(f"批量查询风格信息失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def get_audiences_by_ids(self, audienceIds: List[int]) -> List[Dict[str, Any]]: """ 根据ID列表批量获取受众信息 @@ -311,6 +389,7 @@ class DatabaseService: logger.error(f"批量查询受众信息失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def list_all_scenic_spots(self, user_id: Optional[int] = None, is_public: Optional[bool] = None) -> List[Dict[str, Any]]: """ 获取所有景区列表 @@ -356,6 +435,7 @@ class DatabaseService: logger.error(f"获取景区列表失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def list_all_products(self, user_id: Optional[int] = None, is_public: Optional[bool] = None) -> List[Dict[str, Any]]: """ 获取所有产品列表 @@ -403,6 +483,7 @@ class DatabaseService: logger.error(f"获取产品列表失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def list_all_styles(self) -> List[Dict[str, Any]]: """ 获取所有风格列表 @@ -425,6 +506,7 @@ class DatabaseService: logger.error(f"获取风格列表失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def list_all_audiences(self) -> List[Dict[str, Any]]: """ 获取所有受众列表 @@ -458,6 +540,7 @@ class DatabaseService: # 名称到ID的反向查询方法 + @database_retry(max_retries=3, delay=1.0) def get_style_id_by_name(self, style_name: str) -> Optional[int]: """ 根据风格名称获取风格ID @@ -490,6 +573,7 @@ class DatabaseService: logger.error(f"查询风格ID失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_audience_id_by_name(self, audience_name: str) -> Optional[int]: """ 根据受众名称获取受众ID @@ -522,6 +606,7 @@ class DatabaseService: logger.error(f"查询受众ID失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_scenic_spot_id_by_name(self, spot_name: str) -> Optional[int]: """ 根据景区名称获取景区ID @@ -554,6 +639,7 @@ class DatabaseService: logger.error(f"查询景区ID失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_product_id_by_name(self, product_name: str) -> Optional[int]: """ 根据产品名称获取产品ID @@ -588,6 +674,7 @@ class DatabaseService: + @database_retry(max_retries=3, delay=1.0) def get_image_by_id(self, image_id: int) -> Optional[Dict[str, Any]]: """ 根据ID获取图像信息 @@ -621,6 +708,7 @@ class DatabaseService: logger.error(f"查询图像信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_images_by_ids(self, image_ids: List[int]) -> List[Dict[str, Any]]: """ 根据ID列表批量获取图像信息 @@ -649,6 +737,7 @@ class DatabaseService: logger.error(f"批量查询图像信息失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def get_content_by_id(self, content_id: int) -> Optional[Dict[str, Any]]: """ 根据ID获取内容信息 @@ -682,6 +771,7 @@ class DatabaseService: logger.error(f"查询内容信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_content_by_topic_index(self, topic_index: str) -> Optional[Dict[str, Any]]: """根据主题索引获取内容信息""" if not self.db_pool: @@ -704,6 +794,7 @@ class DatabaseService: logger.error(f"查询内容信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_images_by_folder_id(self, folder_id: int) -> List[Dict[str, Any]]: """ 根据文件夹ID获取图像列表 @@ -732,6 +823,7 @@ class DatabaseService: logger.error(f"根据文件夹ID获取图像失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def get_folder_by_id(self, folder_id: int) -> Optional[Dict[str, Any]]: """ 根据ID获取文件夹信息 @@ -765,6 +857,7 @@ class DatabaseService: logger.error(f"查询文件夹信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_related_images_for_content(self, content_id: int, limit: int = 10) -> List[Dict[str, Any]]: """ 获取与内容相关的图像列表 @@ -807,6 +900,7 @@ class DatabaseService: # 模板相关查询方法 + @database_retry(max_retries=3, delay=1.0) def get_all_poster_templates(self) -> List[Dict[str, Any]]: """ 获取所有海报模板 @@ -831,6 +925,7 @@ class DatabaseService: logger.error(f"获取海报模板列表失败: {e}") return [] + @database_retry(max_retries=3, delay=1.0) def get_poster_template_by_id(self, template_id: str) -> Optional[Dict[str, Any]]: """ 根据ID获取海报模板信息 @@ -864,6 +959,7 @@ class DatabaseService: logger.error(f"查询模板信息失败: {e}") return None + @database_retry(max_retries=3, delay=1.0) def get_active_poster_templates(self) -> List[Dict[str, Any]]: """ 获取所有激活的海报模板 @@ -935,6 +1031,7 @@ class DatabaseService: except Exception as e: logger.error(f"更新模板使用统计失败: {e}") + @database_retry(max_retries=3, delay=1.0) def get_template_usage_stats(self, template_id: str) -> Optional[Dict[str, Any]]: """ 获取模板使用统计 diff --git a/config/database.json b/config/database.json index cbc6c4b..4ec8470 100644 --- a/config/database.json +++ b/config/database.json @@ -4,5 +4,10 @@ "password": "Kj#9mP2$", "database": "travel_content", "port": 3306, - "charset": "utf8mb4" + "charset": "utf8mb4", + "pool_size": 10, + "max_retry_attempts": 3, + "query_timeout": 30, + "soft_delete_field": "isDelete", + "active_record_value": 0 } \ No newline at end of file