From c66273b39358340b948e95a6578eb0aacb9652b5 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Tue, 15 Jul 2025 17:18:46 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E4=BA=86=E4=B8=80=E7=89=88?= =?UTF-8?q?=E6=96=87=E6=A1=A3=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/__pycache__/dependencies.cpython-312.pyc | Bin 3856 -> 3856 bytes api/__pycache__/main.cpython-312.pyc | Bin 3085 -> 3233 bytes api/dependencies.py | 2 +- api/main.py | 3 +- .../content_integration.cpython-312.pyc | Bin 0 -> 8276 bytes api/models/content_integration.py | 160 +++++++++++ .../content_integration.cpython-312.pyc | Bin 0 -> 5529 bytes .../__pycache__/integration.cpython-312.pyc | Bin 17080 -> 16716 bytes api/routers/__pycache__/tweet.cpython-312.pyc | Bin 11876 -> 11869 bytes api/routers/content_integration.py | 171 +++++++++++ api/routers/integration.py | 42 +-- api/routers/tweet.py | 2 +- ...ontent_integration_service.cpython-312.pyc | Bin 0 -> 12250 bytes .../integration_service.cpython-312.pyc | Bin 38937 -> 38915 bytes .../__pycache__/tweet.cpython-312.pyc | Bin 15676 -> 15676 bytes api/services/content_integration_service.py | 266 ++++++++++++++++++ api/services/integration_service.py | 5 +- api/services/tweet.py | 2 +- config/ai_model.json | 3 +- config/cookies.json | 4 +- ...ontent_integration_service.cpython-312.pyc | Bin 15742 -> 15742 bytes core/content_integration_service.py | 3 +- .../xhs_creator_util.cpython-312.pyc | Bin 3141 -> 3141 bytes .../__pycache__/xhs_util.cpython-312.pyc | Bin 6478 -> 6478 bytes resource/prompt/integration/system.txt | 44 +++ resource/prompt/integration/user.txt | 39 +++ tests/test_final_integration.py | 148 ++++++++++ 27 files changed, 857 insertions(+), 37 deletions(-) create mode 100644 api/models/__pycache__/content_integration.cpython-312.pyc create mode 100644 api/models/content_integration.py create mode 100644 api/routers/__pycache__/content_integration.cpython-312.pyc create mode 100644 api/routers/content_integration.py create mode 100644 api/services/__pycache__/content_integration_service.cpython-312.pyc create mode 100644 api/services/content_integration_service.py create mode 100644 resource/prompt/integration/system.txt create mode 100644 resource/prompt/integration/user.txt create mode 100644 tests/test_final_integration.py diff --git a/api/__pycache__/dependencies.cpython-312.pyc b/api/__pycache__/dependencies.cpython-312.pyc index b3da0bcda8a2e6da605627ec34fe1af19b5b79fb..ff6f81cb4da7bb6dcbab4a4a2844e73aa06e354d 100644 GIT binary patch delta 22 ccmbOrH$jf~G%qg~0}$v6mSwnaE6s4*$ji(R06^jefdBvi diff --git a/api/__pycache__/main.cpython-312.pyc b/api/__pycache__/main.cpython-312.pyc index 8d07e808adc219657cd5a6c893bac95b78d294ed..4a026e398c153e914092ed314c0f7580e987670b 100644 GIT binary patch delta 589 zcmeB`SSZPRnwOW00SILH$})aT7sgg-kW3vHs2n!?o8Qs znLyF>qQsKS{JhDA9J8fVfo6OLnN!3MByO=3<>!|asRNmAlb>-UF)B><E3yTOfxKFLU~&f6295;A z>kRQ18R93)a<5dl!N7c-LFgib&;?<&>%zJhg>^S%UT}}O;1+vhGfQBF}wQB4tCqqdp_BFxB8 z$yCXtsa|RTFdRUfh`E3J!kq{9hW1Ez>f!H)W9U4Ou+$5@^U5tV(IlB^Im)>zG zBqo6%Qix6B8z8r#rcmrOX)xxI{sJ7qEE>Y+kp;n(r@$9T}qU z9F1PR9y{|@G|;zU$L843h3Kd68xpu#t48Lwxg?*gd$G&s@_HO@nQL)5eX@Fkr(4$S z@&L}Nmi5m&Bw@Rk7u>Su$1cIm%liEew~Kf9yrPr7G=*8oi+g`zQKACV&Vp1`yFhmB zY84}Jyoy&};V!EvU%N)g594Fqi2m z(_RRYEZ_}k+EWTz5uxRzX-_L?#l)r-flWP=mTgJ5%4(HMo4p>N;PGt+*daR5XkHTD z@(YsBUjk26+FE@0TI`F%)ZAMA&jU5~(Yvt={|0d4#@F#{!_nJEqJ91G3q#-D?2Dcp zPV^r8_U7@)vEf)?aPq6q@Wz`bWBp$PX<~G2^3vG1H;-9WvX&S2IQ(v(oXrc8Q*;r- zl})_Y>F}&45ZxSg zUcax)@AF$=f%xrTN8cTr{N!r%)){KNayBIslXE6x8gMcU3KKu~@X7tmA@=Q@L#&kT zOZq5yDVy@U!|Wlp4eYVizXW)r7Z1li9gLj}Ox*s2dJBCNI0m&|P;2~Gx2Qi@S=sPY zhube~6h*IiNhPbDZb@eMOSqNF$_H!eM6cIZw^MZN7u-p|Z59QL+I5aDS6wGYbg9ls zJ=*S?acNsuw_KbI@q#_mwR-Aj4FR@UZkZ4Jk;(w{GIz6$eQPh|{mn8jIDcEnvOQ?r z9x>FxoQ$&exD5+cJcI6)cCCO;o?k;9kD=nStq0(Z2eBVkI~sqbH^4yiKMQ>Q4P z4;AQxYxS4InrVA^RQk@eGJy}bfKGVtbpTr#7B|ei_7ubX64d*qk`w57q(ZdB9xs@i z<0H18WtroPIHsHVz4`!aWm`e<8p#Kt55l3!wp3>KB~>mihNWfwnhwDu9PARCdYYa< zSE;aPTjTaR9d4<~mcIWPSWc<{5NH0UmkE}wz2q3m8>zpx{EOv(TscsGZu!~e=T=^F z{I8?~p2LMp*a7(X7r@111s6R!l(by6_%txdoEwV|wIukgY7rkMS`n>vx&?7DRL_5slJn5OIAG)hiYt zz*<~`5;w?i!VyYbv;x4-3gxRVs4zaaU) zpdwOP6`5ZVsi>VUHwV@A62Mp(#NDz1kwaRY+q0mxcArVzVb)FX<#D~D(3 z2;lraQ8wG{4i6-KQcp_2nJEid4Y_G$O zAT%Fk#<}Y8#dV|H6vS@!Mb&+Vd3J}Yw>7Y7VCVUov%BvUEQ0i;*`XRQt2nO?)^7^# z`f0fA<=#zwe4z1|E5yA#o?kMMA6(oL&fnPkLSI{m+c=%Y6qTQ^AIckg3sznC5-wWQ zrt00?C*fLcs`0`yN)9$j;X)r0LYz;5?G6|AP;AdQEVW^%YJ?50>j@YAqIXL`9pZj5 zUNZl@V`#%jRdBy`YNEQuIDL>>7D(viHHBQm3Hm z1?UucJyN?3)@<3>lI#pNgUgdjjuuuE-&}~jJxtpZn;*BVY)r&_K*;(%E^sRBC=?8| z#7@#k`HeTz&Ig0U*hSgoOx9GiwP>|wKjahQkKq>T58LqU1q7QAY)8T<4|k3a5dHU>PHc1x+4q1W$VGNo5S3CswrIR@tP8W@&jlJv_U%i z+4Yr8v^k?M?JJwwh^jDDMpIqbquNRq?!7mlcBk6P_CW-3+p&3&h`uIn4aR$q($Y=V zI{d!9UXfB_y*DPtK8(IM1UJhTdLP!m$@Cz#T8*=73|l;^9ip+9@DgJg)nXf-J*2iB zh(myimDU14wN;E4&l_kP@`Q`mQkPsiUbW_nPXjH^J(BqO(4b7nUdlFNxJ6Jwb*t3PBhyKr#?_>nMdKB=QW5(d12YKO z$ce*V<`g!csK>Xs{8OA_oO{A}<;bCS(3uN(<;Ip|W%7KutT|Pdgt=y_MqKdmYTOBwCyf(C6}nk-GG<*J z&B@(5t6sJ!vu=yvbGY1OtQH@>ICCa$WUehr z%`F7p?t?lvInJ9i=e`OrPD3_HsKhg0L)_zX?$3~bU=MsG8vxWw_4q>TP|avlc;Oas z8ed@B9di{bqI%JgI=FIsxVkmSl?_0qZKYbk1s<=3SD*%cQZ39Xf0%pEQ4<*mbtYOH z_-sYQ%@$xPo^&;1I%XUA90iXFc+5#2lY+;r;K>D^yd+Pqf+tVGV*wt0lEJcpj!XvLp=8u-T<#WJ{K#;0(0zk#kahKsr|%kTPo{S%*;z`&xyk zL<1kdjFZ-;vPI!3$pV#Zgw8@~K9zGMl5QdJVS7#JiPLA}!^5eK2smuC!@Ek34k5#$ zF-qc*9EPNR{h{qX&yhwSe$(kt@%lkE4>rgf`J6#@2g}3nG=os%<8L)Uwea~shu<{d zuaNw~?=OQ{e954W3^WOw!{l#2B<*+tO-;n2OvI22lPD6ocO!ztYcwxe4U+^{Z6TqPl099hyZ#d@0r)}T zRlEVC2$eq>CB$#vo){a$Ec3zp1Wo4*(czmgLWtfxKQVgDwFcIQhmxw2)RI_1`ms=` zsZ@?7V0uQ!5jLA`W`tnPntNPe)rmicr9=mUHxQuCL>_^F0Hd1N0iab}ho=waB+^5_ z34}xNk$M4;Awi_PA~N3=sabOO=|z!6)<{*|$bw+y`e`j!ZJEkt7Pqhwt1V({07Plo zlsc=rXv)OYHBRU0fnLNcd^%FOEcl}>!HTVs#dW|m-!f%j7B#bXtEwaPzq%%}@To{u z&2)~o+A`f(g`v)5fehn>pY8(y!Q_EqHlNt-nz1P*vezNO+Ifu6_&OY0fI zkUC7dP5E`3SE$ihFhhHKk;w+B`XY`c?h4RLMb4a-Nl|FDG{b|W@8&vqxjF2wD|T*C!d zZ4Fm%gQUMVaNv%qBE)SQe`d)LkDdNAyRc!dHsC`zV7d5Id%gZSb3kp@B1c>-N^R2@MYDCMelb9{eC+<%bNewFAntZ$&b{~C zbM8HT?z!iDoxkVhSrI(<7H-+Rz7(M!$Uu7xYUjoV3ZXv4pk~BSjINz(rl6(Ubs@T$ z4(XfqAw#o)B6Iq7W60EO3YnYD^4QRB30a%1AzQOe9vj>3p}gii3h59_wN_~xO|@*n z8Q66|nhs-UtxO(kYtb|L zyY*S!QqWzPqoj73tOcNBVjZlbMOO}UEjp$!N89-;+C`fEseP8kVoq4r zD(>AQp1UZ$d|vF0uiCISePM9?$otj_V$FIFEm(l}L?p(uSjc;5)20n;I{a*u4@Sa5 z!2>>jTL4F5VP=z$YvVjRLBBfa=LLFIxDy8Djgc_VhWWKX1K3A&o?!nRV>w=^m~M)T zM#3EHp{7&Z$l@(QKigGW=T{c32~Ms0OUX1$Fu)kaY)i01(DS|k*JZ8KRLCCrm{s=k zh9V5xE|{=tnh~>n42S({6c9HcJN$OR@5WO!LXRK{V$p)Op;j_?%hPQNz>GJ`<<+=B zs8x&R>>NUjjyGw;>DSXv^8{k(n`F(J-c*|-tAEv?jVQ#+H;l-&=1oa!z3KM_?le09 zH(%46YJpogHPU*(E!O%|EpXwM-}(U8aVrlI54)!2R!qyC<~E--oKfF>7i5QL;q;Eu zee_;~0d1$YQH+t?E4+rErLD)9fNLgPXf0}MRa=|d+SN8sZSzTcw&0BFukE`3bLNHG z+9%Mfx~WszrrU^eEGe4Cl45zMk~4b6-kWxst-Zrk%apKRjsYTI2jk8&4$}(?cIpZ*>X)|C*^VAq4)wePe(8Vjdj2SSV{Ou62)ndI|Plx9(A}Y;5>cx_giMX7S$fPevf!O5JZueS4(g z;q-@K+A&#ACaZ9OyI<}Rf2gVHe$_GQ!3)y+JH?)z(#u0x8M*U>oVs!}mHoy)c}aS$ z{}xeSkYjTsZ%Xa=yTwmm%H2ll8<1X(OYx)Psomqfd&N`lr_aA7^?f9-xjsgsNILS7 z7*`KR*;ZKqjzsagJ5~f@?y=!_uZ|qdy3rjY!A!UwtB}t`{IL*}3~$uOZ{`GZ8{4@R z&V>_9{z#-P$Z~=WN>MbX2nZG~f_X3B8D#}aIKs0xb%P;Ru*zKQ3kO)i9*uBbCdl!= zupi{15aCX5I2IC2T+HuhIc}3+;Xw%mLQ$b0iX(&>3kWM3{83i)uSke4+z z7;cFO=8nyrSMC*Os|@2+%aV}Cu|DkIOvWw2fX9qUs=_k}nMnxAKiEwOxpkPh6VC+_ zn+GJfK1=al@xbfEfATrN2~6~z2M;$1c`U{e>;>~VUx4-KZ{32d6f3)BIprKdKUR6m zjC!L+c8bHpFb@UO%y#(YHw;d~J-tmKH!wiq9i3nUNAfWaUYcPABg68(V7sRjlbRqH z+arMhAPT+gGP<1&;}XIz(VPcZ!OJN%UntU;tmaTmN(d%zgGt%I1jk@fDlkcqn3M-h z-UwTO2nN|Tf+dSwFbO$<=6DQ#Dj2N4ZnG$ zdp6nj?B&3=M9cOJLeEhT=%6WU)J-B}S*@EiBGcqZX$ zzGDAvrW6&r;sujM$h9ctn(?cfO zz!@*dJV?P?{5<(Eh3sY5k-=akr(oZ2PdFD1H4M#7l->Kc(zyMKx#^qy%9N`zRaSq# zWa#k^97EIVA>c4QW%7ej| zHDuz;+Pk6u)g03QYA!|k)t09D^jEcun`YBr&o)5+>-jXycA4rnv%YqIvmX}CJ_K#- zg5QnLfM}SM&rWkT9?*{RFw_|cI`Tm6)iE@qXAJ%HMg;Xgi%y~b-@6_0BD(AM%~V+qBU(r0poz{_NKi>~GzUo`afyQ|-6K)`F&7OvD{;i638( z-t3lMSIfH0mdWkqt^bgC;N_4~aeSWpx^oBc2cJ^&TU>3tC)9h=-6`LdXf=dLSMOFNTIAN9V?O0~PFw z>0bY4*58Kj2i70q$6+8!N+~QbSjB=l-xOCSs_wm9yd+^?k})EuJLSA9;jq%VXp z)myATOGli#n0egRX+6si4UrbLz~4IrlM4T^E}n>HdDS2|;VfQ99_NFTlckZ@wp zq47`8O?2;{IC(KWcwTzt@c)P~v3IvBfAS*WM>s-I6uUNWmW~d|bI=t}9UtF$G5z6dIgIq+?)15ja~J@k(tSN1gFw{_RD(b*5~#%jb+^16 zM9{>sURgN(_TGs%UQocMqJ0cr=(xi0(0Cc_j+Ya%0!WNp(y?K%;2ntc?C6El+DJTf zK)iS!ModndeeuFy$NQg8U)m?@!?ns8!5rW@>4Vd926*r%Xp@Tg)lAlZq10Jh@|HTTp5=JuGotN_d1chp9i7Ef0(0p~CP%++z1 zn>3y-*M`Ve7H^0^u_aLqn3)KrQ(&Pmo4+Ujo({} z6BYG|rH>|-tiNJ;tlM}E(sKP}M?(UYK!(gGRb0N}0_(tRs3AJ1XL9FH??;&OupUSv^oP;2&@uTYnwFkcd0wiOeEZ lHesJ}nVOl=)l~<(#(|ELwbv00iSVpzWJ31Ae*yKnltlml literal 0 HcmV?d00001 diff --git a/api/routers/__pycache__/integration.cpython-312.pyc b/api/routers/__pycache__/integration.cpython-312.pyc index 573cdcf5db639560a8447647269f58aab6b98863..823d171169fb7ec285487e9b12ba54dcbb38ca8c 100644 GIT binary patch delta 3605 zcma)8X>3&274{o@#$%6dypKJrm+>+-UIs51+jzrlPH;;d5@Q2q@OxhHGU3h)El^>T zLNHZRr#VX6G>An>RTI+$I#Hp30>7fDX~ncuRW(vuqIMi3wUiWDt*ZR!Irq&QKZsrB zNj|>!p6~4UoIBS~3BP_xNd6=_IWY!5chcdJxgR9w3c;%-DMGweOud{o9}5p-jmC5- zZAc?#oYc+}5WhaBRB~_^9I|C3WJ)G6OF^2mr7V%qaiUR$E{~#fB(s>SLT5+NCKY)kSZcx6sa^-D_$fNOZj3+6rC;QM$n}yv@42sBDze4&WWJQRp@ny9_HCwi|7gk zZLExXQl&!IQcMX>$s$%O$VfCUNR2B@Scf`lp5oL(FLLUTQ?GC$Ra~#qygur6gOn$3 zP>|L}%wGl1pIpl(l}rP9v#}X9ZzMdw4l$Y-DM@S*TQ9fGYo0i1f9j+|+;q7UrUhep zoVXdIUAD9(61!g9sftu2P~5I~i1(}@lGiR3y77p{`Anc#cSIBK^|^h1?-E=w zWSgCuPzsa0FM1}u(!hjAl*U8xUjM{|n;n5``n&P02z~aO;Hn`D{$$X@m4rNaY{-C5 z6H0_qNJ#9AXO)Oxm9RI_4Btp3bUTw+HQZ0>>8he57m)-a>yTWEV-2(}MH14Xr~YvttHq5s4oMzh6qV zq*8Wd6RlP#`T(61IRm-YU!?I`hu^;yK7B(r6S!;DV;S}%0v2uGR@DU zyV3i`4v_v8><-a6k+blL{a2AYcI{^PyB9WjJY%C$BoXs@54CKVjK7<|Qy7FV@~w&s z_5#~ibD?Dw7Y@=nksENb;C7B2`pks~Z@weDz($c5@nc_MNXy1?58EA`Q^a6{NO)Dk zDw*Y~5JYax_)s#FCOn6wfk7S`vk>q9WRI(-Ze=UGo)p6-iA&@+kW=)waVZ^_^6ij) z|Lo@Z#rH1aE&fQ$JjjC|73V35`>;5-b~Wp)2Ekq$vPyRx!ZjmuAKZyGFq!1YsmI;X zw5lNwn{fNQRibCmKOs%}mcUh>k+kr}>4l4LFI>14zBS9cXtv~UTE>eK#-99Or124Zt58nOW z;`=A$Oa#uaox=v2LILQlY%kb>i%T(14f`4{LRzljiG=Tjo0VmP6&5PbOvy6`f!$6+ zC|$8Opk&<3o<|;gfe1~akapB3O?a7^4ps}!nZTxr&o9_1TD?rk~F!W1P5Uw)6@C$UiMu;k-EII!9tuTOsVdEV&J z;13~xn)s{0M>HRjAszm(kx&62J$x!q`|+OOH-SgRZL{wn<1zll@XHd))HmbLBLBG5 z!hVK_0{(gT!;Y5eqNd>H-P5LfLf@wa&R|*ly@HOqW_O$V>JB&M@ZX(+N{BDKF^i40ewZWE|Ss9F_@$tU-t) zVk($yn>%)=o7DEe@ivF~IRs=Qpi%LqZ_aRiAMu}s8>nE1T7mV1HMpkay5Zw})7E>! z9{6{gH%A;$BQTG_JGEql;M90jw3;owCNyBzX?dY?0 lA-@6nK0yh8AXw2B>>8#7jKIl`lH^h0b5yZQV3)D({{vH%k{QZBQf^yhbGNYEwyt8NcULRJw5GJy@s=cqxgIK4hn15Q zH^?H|aMq4SaaLR-X=#cyKF`0+@0@G8tXMPByl!N>zh$nKt+k>pN`D(Wt!@7?t9G#JGGma_rL%eq+IOO1B`Ggma2F3*5GaB&oBlLr; zKUakEKxBOUV*K`v#QAXI{fi;(x&iubcD1J9Fdy`Y6M1|?yzdCTn{DT+=r`HD+E||4 zL7E`Gk{+pa(k_FQPUqBS2ED?#OUg8^yJjnT&Glq&_Eb;o(;`MX!`Lm0+&n?{g&EnjfOIPOUeBNNb zOiUiBqk8r3K1RQm`I3^fB2^Wbmgfqy;u7X@vNk02*K-zD0KL}&&h`!z}~LJB|O#HuGw z-?}$(b`4dFc7}7(&$KS?IMo)7$Z2yS=huqd<&U6ssKlHfN2)4%zo?{)Wvqf{%s83z zZuFxG(8}Tm3TYMe>Q>AlJ&eXD--%D$PMi)WrcR5h7Vpr+dsDPq?>;H3epXCBGAsl= z!(&1IF!2V517jYs(1N%Y+{iP4=22qP*}Eb8(9dM1!(B?_GdPT90)D~>n5DFpa2owa ze^EjD>0G&i{=<;qLUhJpFQiywNe*m4>cw$1v$Ul2kBn4Z_E1`sH}1sGOg|O~92w@{une3$2r$+m5;8~p!nLLA z7hM+l-pz{Tc>R8lk42Ia2#YDlyrcYv+jouBPL%YB`sDc0f#&x#X8Bk_eDHR@mg+M( zeeYKnM5**bMX$<-*n{+8g*jO&jn(wOO2qABPp;;5Jx!Fbc<244 zD>HP(W=XP_RvMZ%dQsnPUc*R*<^lB5{RY;%H1&r9WYilJ>iq#Nha$NiXo zRilp>x6Df2^4@QMv2=Aj@%BVw<~;qw*6xb$0sc~&LqUE3H?fT8y(7c^HK)N-w5GO( zbI`rDr{$d`H&)27-XQNuR#hN-P{Lj`*i&K|!$E#jASN8xC=o|C389}YpyPOV0?kWc zV#=f5;1D^6&p!cU6$^~p6q=BDiATgw5rW1N3D@a4da2%-9m^C#qoZDOEb@8%57SE- z$twWaN&3J(Ne+UAyjgd7g?gP<6}vq^&p3B;{q&zs^FXSTI<&HOsewRtOo;0pH!0~x zqea6eVZGrLJjQZg91>tVL;oEMk^%btrs^tIiM2-|6rml1QzGGP_;pi z=`zkjNP8r&`LEoEEq_z!R@LUn)DF-u+8qDy3HSzzK=T6acRl3VBir33?j&|(IG_We zAWvfH@NL09R@ip}yrO^*WT&%O681^KPA_&mu+Ge?f!!yjg6}6DX2D*-b}7OA!4@rh zeFRft0?3PS_ci>|h`|XqmemelU!f7clbnY~4t|12TqO&G_GlTSpe+~{p*-VV`QieA&WY4Vw*6zUI UZmRFLt9!VwAb2lF&cU1fFWsP@UH||9 diff --git a/api/routers/__pycache__/tweet.cpython-312.pyc b/api/routers/__pycache__/tweet.cpython-312.pyc index b0aa0e53910fa4fb017bbc1633635008c43d8818..1c8158b704b0cbbd1233246f03ecd38fef0f6129 100644 GIT binary patch delta 65 zcmaD7b2o4mu1Y_$oov4jk7pAv%qWeFZF)bqDr95<|!I1j2vRJ*TvK? Tim7j2psCKp*uD9L_8cYvp`H|Q delta 72 zcmcZ`^CX7%G%qg~0}xy|TADF?BkwbHcCO;=%!2r=$v@Tmxo>fTI9{bG>8YE0HCPxq Z#O1DwYg`o9*gR8Hor$q$^Fi%7OaROm8SMZ7 diff --git a/api/routers/content_integration.py b/api/routers/content_integration.py new file mode 100644 index 0000000..284a70d --- /dev/null +++ b/api/routers/content_integration.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +内容整合API路由 +""" + +import logging +from fastapi import APIRouter, HTTPException, BackgroundTasks +from typing import Dict, Any + +from api.models.content_integration import ( + ContentIntegrationRequest, + ContentIntegrationResponse +) +from api.services.content_integration_service import ContentIntegrationService + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/content-integration", tags=["content-integration"]) + +# 全局服务实例 +integration_service = ContentIntegrationService() + + +@router.post("/integrate", response_model=ContentIntegrationResponse) +async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse: + """ + 整合文档和小红书笔记内容 + + 该接口将: + 1. 读取用户上传的文档文件(支持PDF、Word、图片等格式) + 2. 根据关键词搜索小红书相关笔记 + 3. 使用LLM将两者整合成综合性旅游资料 + + Args: + request: 整合请求参数 + + Returns: + 整合结果 + + Raises: + HTTPException: 当请求参数无效或处理失败时 + """ + try: + if request.document_paths is None: + request.document_paths = [] + logger.info(f"收到内容整合请求:文档 {len(request.document_paths)} 个,关键词 {len(request.keywords)} 个") + + # 调用服务层处理 + result = await integration_service.integrate_content( + document_paths=request.document_paths, + keywords=request.keywords, + cookies=request.cookies, + output_path=request.output_path, + sort_type=request.sort_type, + note_type=request.note_type, + note_time=request.note_time, + note_range=request.note_range, + pos_distance=request.pos_distance, + query_num=request.query_num + ) + + # 转换为响应模型 + if result["success"]: + response = ContentIntegrationResponse( + success=True, + timestamp=result["timestamp"], + processing_time=result["processing_time"], + input_summary=result["input_summary"], + document_info=result["document_info"], + xhs_info=result["xhs_info"], + integrated_content=result["integrated_content"], + search_config=result["search_config"] + ) + logger.info(f"内容整合成功,处理时间:{result['processing_time']}") + else: + response = ContentIntegrationResponse( + success=False, + timestamp=result["timestamp"], + processing_time=result["processing_time"], + error_message=result["error_message"] + ) + logger.error(f"内容整合失败:{result['error_message']}") + + return response + + except Exception as e: + logger.error(f"内容整合接口异常:{e}", exc_info=True) + raise HTTPException( + status_code=500, + detail=f"内容整合处理失败:{str(e)}" + ) + + +@router.get("/health") +async def health_check() -> Dict[str, str]: + """ + 健康检查接口 + + Returns: + 服务状态信息 + """ + try: + # 检查服务是否正常初始化 + if not integration_service: + raise Exception("服务未正确初始化") + + return { + "status": "healthy", + "service": "content-integration", + "message": "内容整合服务运行正常" + } + except Exception as e: + logger.error(f"健康检查失败:{e}") + raise HTTPException( + status_code=503, + detail=f"服务不可用:{str(e)}" + ) + + +@router.get("/config/options") +async def get_config_options() -> Dict[str, Any]: + """ + 获取配置选项说明 + + Returns: + 各配置项的可选值和说明 + """ + return { + "sort_type": { + "0": "综合排序", + "1": "最新", + "2": "最多点赞", + "3": "最多评论", + "4": "最多收藏" + }, + "note_type": { + "0": "不限", + "1": "视频笔记", + "2": "普通笔记" + }, + "note_time": { + "0": "不限", + "1": "一天内", + "2": "一周内", + "3": "半年内" + }, + "note_range": { + "0": "不限", + "1": "已看过", + "2": "未看过", + "3": "已关注" + }, + "pos_distance": { + "0": "不限", + "1": "同城", + "2": "附近" + }, + "query_num": "每个关键词搜索的笔记数量(1-50)", + "supported_document_formats": [ + "PDF (.pdf)", + "Word (.docx, .doc)", + "PowerPoint (.pptx, .ppt)", + "Excel (.xlsx, .xls)", + "Text (.txt)", + "Markdown (.md)", + "Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)", + "CSV (.csv)" + ] + } \ No newline at end of file diff --git a/api/routers/integration.py b/api/routers/integration.py index d6aa9f7..eb45712 100644 --- a/api/routers/integration.py +++ b/api/routers/integration.py @@ -34,19 +34,7 @@ router = APIRouter( # 健康检查和状态接口 # ============================================================================ -@router.get("/health", response_model=HealthCheckResponse) -async def health_check( - service: IntegrationService = Depends(get_integration_service) -): - """健康检查""" - try: - return service.get_health_check() - except Exception as e: - logger.error(f"健康检查失败: {e}") - raise HTTPException(status_code=500, detail=str(e)) - - -@router.get("/status", response_model=ServiceStatusResponse) +@router.get("/status", response_model=ServiceStatusResponse, summary="获取服务状态") async def get_service_status( service: IntegrationService = Depends(get_integration_service) ): @@ -62,7 +50,7 @@ async def get_service_status( # 小红书搜索接口 # ============================================================================ -@router.post("/search", response_model=XHSSearchResponse) +@router.post("/search", response_model=XHSSearchResponse, summary="搜索小红书笔记") async def search_xhs_notes( request: XHSSearchRequest, service: IntegrationService = Depends(get_integration_service) @@ -87,7 +75,7 @@ async def search_xhs_notes( # 内容整合接口 # ============================================================================ -@router.post("/integrate", response_model=IntegrationResponse) +@router.post("/integrate", response_model=IntegrationResponse, summary="整合内容") async def integrate_content( request: IntegrationRequest, service: IntegrationService = Depends(get_integration_service) @@ -108,7 +96,7 @@ async def integrate_content( raise HTTPException(status_code=500, detail=str(e)) -@router.post("/integrate/batch", response_model=Dict[str, IntegrationResponse]) +@router.post("/integrate/batch", response_model=Dict[str, IntegrationResponse], summary="批量整合内容") async def batch_integrate_content( request: BatchIntegrationRequest, background_tasks: BackgroundTasks, @@ -132,7 +120,7 @@ async def batch_integrate_content( # 结果管理接口 # ============================================================================ -@router.get("/results", response_model=List[TaskSummaryResponse]) +@router.get("/results", response_model=List[TaskSummaryResponse], summary="列出所有整合结果") async def list_integration_results( service: IntegrationService = Depends(get_integration_service) ): @@ -146,7 +134,7 @@ async def list_integration_results( raise HTTPException(status_code=500, detail=str(e)) -@router.get("/results/{task_id}", response_model=IntegrationResponse) +@router.get("/results/{task_id}", response_model=IntegrationResponse, summary="获取指定的整合结果") async def get_integration_result( task_id: str, service: IntegrationService = Depends(get_integration_service) @@ -165,7 +153,7 @@ async def get_integration_result( raise HTTPException(status_code=500, detail=str(e)) -@router.post("/results/export", response_model=ApiResponse) +@router.post("/results/export", response_model=ApiResponse, summary="导出结果") async def export_result( request: ExportRequest, service: IntegrationService = Depends(get_integration_service) @@ -195,7 +183,7 @@ async def export_result( # Cookie 管理接口 # ============================================================================ -@router.post("/cookies", response_model=ApiResponse) +@router.post("/cookies", response_model=ApiResponse, summary="添加Cookie") async def add_cookie( request: CookieManagementRequest, service: IntegrationService = Depends(get_integration_service) @@ -214,7 +202,7 @@ async def add_cookie( raise HTTPException(status_code=500, detail=str(e)) -@router.delete("/cookies/{cookie_name}", response_model=ApiResponse) +@router.delete("/cookies/{cookie_name}", response_model=ApiResponse, summary="删除Cookie") async def remove_cookie( cookie_name: str, service: IntegrationService = Depends(get_integration_service) @@ -233,7 +221,7 @@ async def remove_cookie( raise HTTPException(status_code=500, detail=str(e)) -@router.get("/cookies", response_model=CookieStatsResponse) +@router.get("/cookies", response_model=CookieStatsResponse, summary="获取Cookie统计") async def get_cookie_stats( service: IntegrationService = Depends(get_integration_service) ): @@ -249,7 +237,7 @@ async def get_cookie_stats( # 工具接口 # ============================================================================ -@router.get("/formats/document", response_model=ApiResponse) +@router.get("/formats/document", response_model=ApiResponse, summary="获取支持的文档格式") async def get_supported_document_formats( service: IntegrationService = Depends(get_integration_service) ): @@ -267,7 +255,7 @@ async def get_supported_document_formats( raise HTTPException(status_code=500, detail=str(e)) -@router.get("/formats/output", response_model=ApiResponse) +@router.get("/formats/output", response_model=ApiResponse, summary="获取支持的输出格式") async def get_supported_output_formats( service: IntegrationService = Depends(get_integration_service) ): @@ -285,7 +273,7 @@ async def get_supported_output_formats( raise HTTPException(status_code=500, detail=str(e)) -@router.post("/validate/documents", response_model=ValidationResponse) +@router.post("/validate/documents", response_model=ValidationResponse, summary="验证文档") async def validate_documents( document_paths: List[str], service: IntegrationService = Depends(get_integration_service) @@ -313,7 +301,7 @@ async def validate_documents( # 快速操作接口 # ============================================================================ -@router.get("/quick", response_model=ApiResponse) +@router.get("/quick", response_model=ApiResponse, summary="快速整合") async def quick_integration( keyword: str = Query(..., description="搜索关键词"), document_paths: List[str] = Query(..., description="文档路径列表"), @@ -372,7 +360,7 @@ async def quick_integration( logger.error(f"快速整合失败: {e}") raise HTTPException(status_code=500, detail=str(e)) -@router.post("/batch-search", response_model=BatchSearchResponse) +@router.post("/batch-search", response_model=BatchSearchResponse, summary="批量搜索小红书笔记") async def batch_search( request: BatchSearchRequest, service: IntegrationService = Depends(get_integration_service) diff --git a/api/routers/tweet.py b/api/routers/tweet.py index cfcbc0a..919c4f3 100644 --- a/api/routers/tweet.py +++ b/api/routers/tweet.py @@ -308,7 +308,7 @@ async def run_pipeline( audiences=audiences, scenic_spots=scenic_spots, products=products, - skip_judge=request.skipJudge, + skipJudge=request.skipJudge, autoJudge=request.autoJudge ) diff --git a/api/services/__pycache__/content_integration_service.cpython-312.pyc b/api/services/__pycache__/content_integration_service.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8af7c34d6e78dd8cfffc11a3abfc000f61b86f85 GIT binary patch literal 12250 zcmb_CZEzDumNWYPw)}0&U$SLmY>W+lw{R z_KC1pcVnyPb@%HxuV24@{rYwPPODX6@cgdm**%du4Er<6@Q)-3+=>D)h7p( zwJa=m>D*c7EEZO{^zLkPHVZ3VIqqC@E(@z%dG363zPrF&z~hedTdUmxUDi5CtgKDsdTvriJE!I_h>IGjGrR`oPQ^#}ls|N`wy2e){w23-8XJJv#SMXzq&>b7u~5ZT@oU(CpE3 z-Q7L2M^DcLM`tfyLWK6eH#2r{W^$6Fpt^y+yR}ZjNV*+ficxGEq8y$g^z`LzJC_oTk@qC2e~;J%fFYe#mES^w@lE z`yiDjHLQ_#K5koJ>SB*tDl z`K-fc_vHhP|2dko=PufkssQyEh7?;2zgzzW$rwfnbss<;J$}q0l=(4dQf0V0E_F)N za~zB&!&xz}U^r08*ixRMrG!%OrF{DUPO14aKhCQ%a3VRf*8Yh(#& z_j-J!&E7P`YEY9SH5yIcVJ~HOH&Xkkk?J49^8syy1&W%(wXty2EJH927(J>_FWFkB zCbhsWqvQCqxRZLF>+)C$OCNAzAsnRLMtR6#M%8bpEF2=KgI1&lHAP*3xiCnPNW6@m zNKPM%m78KDk_VHO(aQ*hG3fRMQU9`Yb!KkZN|4 zb}R7EWF2xeA^CxBdYhmbsd@l6{?9x<@hj?mt?VHM<1=3KbN$v;f0j}m$+BhB>NgI<2{_CkX!O_$hnM%Qg31Q=L3hC+AL4u@{m8zpX1Lx zoAUv38Q=*NK-)$RPEVaH({mi`&-WJ)Vj}l!9k2w1;B1+|vv*4f5GsW(3+&ihkkJVcc7-wYTD zvES&gAj(fjh>BC`R`>^PRwm8&iwNUMu|H$Gjo9Hzo2VbNiJoWz-Yfl;&h9jqV8bvIz5w)3t0CZe8LcD8}n-FvV=b(*&K@_4DYzsg^o)c52X0lUI{ z^vBb`t3_MxH-RM;EY4l=8wKr4vzq>@UhF<>6hAa$6XEQ5?!`>lO3aJzh5Rn?NO0`X z1N+3gu)VkmqjvB;sK&^_Kn+nuJ<6B(YlPAHf0!@-n;FW_DWd75B(NmSYV6{9OTCE| z{+fkS^DnW9c7f~zH9x88O}_WydoixhABy+lbt`?#VbE2^}6VL>p zgY5UV3OPh%q5s)x2PG9`Z-1(0apC3pB$L7vVENH|#senFua`uy-Cl3D8jiVpU zef&xW+@u@ZY81BpovdS<_;76Y?7P#Gzgj>!&DU);&VBxR;>AxnE*4-3PB1X=>T8Kl z56`@DA#v&O0vy3RZZ&do2H3kM-uh_<9B`X?oSCuT+!*`hf8wzIG>ihDF}{}8z8pYnFs3-- ziqXbZVc!Rv2#aUHhupuJ58qCloJhQLV0JVa+2ORUv9ZyI7CVkASD*Ok5QK3cs*^Z) zjw>YS|4)k zYkAcShLWG0Idz5;t`9lpLj61E=05pFtC5ou|Lr6zY244fdOq>_n9+wE*$d?E49 zix8{gqza6FFKRIQss)6((DSoLCjfzqaHU66P_M)!LQUH?CwH|>_P#w{u8(Mcj5fMV zZiXE%h()_#tIw#@cKM77G8Qid+h;c;g)LyeeJhF93mDIQ*pkCG+G}yS+!m+TGiagg z`=|$Ru+<_)V;}VTNV~=AwK*J&%rQs+3qIth!B{3*%u&}#H*)Kpyc&e5}zLB@JF;YyK3Au zgr(-8nx2~85pm;gHz*F`)T=wK{nFZP1GKzDs zF$0%?=lgwqVaTWF!?_Q!tg35Xs9a6g{Ock|+h z^qFhkz{hY{%PJ(;Fovx9`}kHE6`9pX?eUQBix^Sk3P!>zIr^y4NE+eADBB>`V&6AJ zwvV)T-Z88p^n}-)#x|G7W_5Yn8-@0t0y!_5%5&I%?8oTZU0*6EC0BNQz592&CnO)M zKU9CI{Hs?1_0MJ3;78%NKoDp|SV8OoKoCUzodp3xK>b+sj6fc0k}gMjC8|k^sDT#| ziQG9DT&c@305K7T&ErOqS}!B>?y*8(jC=^{lW`5Atipj=HRYkKE^aC_a&CCN zOb%Cy28y)WP|Vd^Creo~vJME4=%XZ>KV$;TnPafu zf=U^!V-Q7(ygs+v3Sm1fui+35>+>)QG+|gMN6-?2R}-)$Y&0C@Aso)+@$-d81wJh! z(ne%17<})D)JOF#U&AgAgEkKVEci^oDQ61>v!7g$&i`uqi&KdYk0pX3u(a6-Z~w*Q z3c{|qt;RYD*$d^2)=rWhh>=3KP@ElkdE`z=e0h+ho6tnkaMvwoBy8tpE>Ay-%uCoV zieU;fA`hsF+d5!}P^OneQ3@uP?~mQWOUFxgqOYJp6RAQtnxrVWb%te21_shTNH9{i zjFIoPl7rA^@=1hWWa;yeZYw2F6oi`1*9f+XkvKh$K@vqBnQRnRU1-9ypY}OigeA?8 zlW3wcS~T||LJt8y;DpioD2L12$eK5YhipYC5_V{$gwH+XCDH6<#Dku_i~`nxJ|rM= zJ(Q6hr~+b(+`5t_NP{zcjKqbGRK#98N+41wVZ+A~&k#scf=DyU2lm9tP$VN| z2du7^L<>2IbbvL)B#K&)OHd*#)R|P9QGqU_kS5aKEPhBVFr|GiicurECBw-OFm@#; z6S)G>HA5n(E!TiVhKXy%cTvMTt7tJK??nyoV;e$?A((qfiE6m1LwN>Z^<%V2KQx(T z=Nt?wP+&!^9$ zr}7+v7fB2J^1tU{4IPs$j3KXfBg;Df{`CRHTD%9rYO#yK?RKodFt+Q+uGe=5RZ|Mx zVf9ODx?s(v+{q^+E#35*9=gX8eac1?uEJzd7_G;dC5Z}rEyxLJKXhB*D>`3 zaZS?Bqi7~Qgq-eseUh^WS%MrTb_(=uhK9)C1qSRU+(7a4+^|6F1kl}{NP;-#i|c`f&U zTPdx~8P(3~u*!y*aZT8`X0j||+%&4ao@ERSumhqu`2&3&{9A%OvuE_yVoc`U64_ zI8iWG$12*w6>XE2NJS5=DTShvikM+V*svm095Jj7qGN|r;E#r?xN*aURX*AlS zn)|Pp8e^p^!lf%>r5nPf8zQ9}gFSIWdCbrlHZ;Zz>%)fi@#30z<;r-~@_9K{(t+Q? zBqchatHZkRZTQ#Aze@HaUcM||S{pB^h!-rIS3+eUph~-N-FKMu0bCc>bOd{jwM~>w zD*s)NY1{r_XbNc~hW23BO&O*y`J=uhZmNB+^-Sx;z!fR|qesVEBc{h_y>UukFs44D zrc1l8>3g6v)hm;Qx+6NeqC2YZfgV+uPCJe}P7F+J376j++%{EYJgqsdIiZUcHHC|s zLRpcb^})@oB;sXD;!Exc+31?qNfT{qpI%Zo(G;ui4A*x?mTa7=t)FOl`zLYZ(wlNg zRnB}7R^1#kwOuu}-O|dd3T|q!>UGCDp?CGmKQ4Q}EMC`uUQ>-LL#Fe~&Mk{Hw%?LV ziVgEBz`KcB+}2}Ft3pk&Rola>wnrKtxtXh~%DD|>YsZJ*Zl9_!z0(;+Zd9apSEPDZ zq{2K^zdX#k3z6mSNWB}_D9ZtIwf9}GtdDd5RSog#6#%0TXSp&l^)7Q65_| zUKy&njE^d#`VHSnv7FZ13PA3F@@52y~H?GSv|h}?SiSYn$wRTfBeLg zv9i_Sveh9=r0jvICH3z)&N$v42yF>3X`l4cHJz86Y15{gN@=MMbXJ2n>VB!4E@=wk z6JENwIjUJ5&#kzosrcJ`Hjw{M@3UZzp6d|xs_-wR4`pxHV_y~E54TCaD(uuj=3i=7 zKYYLDUyCyFT+tQs>Z`nwIb4@Y?<9>^0%44N2JBt(jui9U2sA;Q-d?5QyBI*NFg8&NbW#c>K8*q zQkFWa^Gh>BZ`_>~)Ko@ku!8w>#yo`CUt zQLAx8WL(PNbq-n%7iqp1Lo|ZHRd6E!O+MI%4uQ8vuud_fw8MEZ>v)oYi?Kc=>`kpu zhCE5~X_WX-;)0}3%6WnR2r#4NcM#w#!A1>tkgq_o7o9+{&JF9mkWR>sth%$m!aLA{ zXPBh=4Wb2ip0NGcb#MfiW2*IHoa%HAcF!C8}u^vf~x8%Jt#O^-)bn zsw^j}sZO@7zN)SkD(K3#sAk=i!8E=tR=qA9ZE-y1rP$VTze=D5Brrmqg`tH=GJ zj!<*x=}UQ&wlCZl+~6X`TDFHyBgMzRY5uiJ8myNjI-uU^#M%YsBoiGOYfg|k5N_zC*j#vI zdk0Pw&=zfh7Cno??U{R_gto~1FM2=*DyYeDX#i@FsmN%$PRY{Dg(TCJ0CG#3#4}>> z6bOxAx=;jj(L-nn4VW&SV7fBk1f+M6SC(J8NH!UbD7pL&?e#=9xkC5Cb>*Sn~&==sNOefK9#uNy+cvHw_%2DC6A{la(!Sa zB4w_xpBaB~_QTIn0f{buvh38?-+2A=nOA?omc9w4bsF*nyf}k@5+FuOIVhK%+=NQe zAt;FgqvSS}palVC#&n(*A#ms=hN zApb+#S$=sC;wv{16%?=BkNsO#UOcy!T^-_z{FtINtSF@`+An!8-*a(f(siYVE_*nt z*e=u^dnBqbP8S--^C$3FVf{;+f*nChSP;hBkjomX3hg*=K4%Vf)0G`j&4%=9$$9lT z^+bTKTo=`}3l+4f6(WVL7g{G9F7LTQeI58JaK%GEVW<0?vA$=*eb3N#lCJbdHPrR2 z+{6Bt{nKmKPHz07`$BhQ&E_keSJ!MiR?Th(s0lG`XqfO_HLRw0?uzZShId*=<*8-C zAKDaZp=;Jf_3en#@XN#LbsNubKDT+2_`-3)5n25Z-MpEu*%H-vUFX+I=$_=f_MCR& zALz>aqnh;s|KmBohZV96D_S;h%(U8Ut&@+a;f}4fiHmHIVBaw=0jCjT&m7p~1~RI3 zT68-P`41Kgqq3kYkUogkTNsVS@{G^wO4i6ND3)fikd=r8T9L?~K*ES2)*%HAQ<1c!uQ$(zeM&Vj0GA~IY;I%U2gH?Znt%USX*y?YvrPq~) eU|~#Y2rCW8)NZ!wgzpZsr+?6f-o literal 0 HcmV?d00001 diff --git a/api/services/__pycache__/integration_service.cpython-312.pyc b/api/services/__pycache__/integration_service.cpython-312.pyc index 42daf0af0b9f51460676af860d9c8676c3155ca3..d598ea223d8a288b00592911a346814bc67084c0 100644 GIT binary patch delta 635 zcmWkrUr19?9KOGM@40tpcem*_^n!}!+S;6(3eL9qr*yN(AcCNO5Zk=>LCOR*73WOR zhaymx2Lw?le9Z^rMuE^J>JKy>Epa}uHt8pGqYj9W-Kn75{Td8EhS_Aa9`bkA ziF&oZor|P7wIW^G64EZss`c*-kiAqGqU1C+1%u?l+&HZ#b58KBrSHN`axZ-!YG`dF zN9k=)Nosx)Vx&CZL9;hKS?56@@0Yyn#1P3YZRqREcM!{9z4ELb;E{gz`A5!j>~1-g zMR!f+dc{V{D>uZe98=%*XP6AGj);Riqj)Mq`rB1t;Tn0Dc?qrL){E^02YhlzeMuj` zZ}TL2g#GSd9E8v9A&L*YQES3%`Vy*-_ex(K9Ta~W;CKY*%&vHF0q5ETx-l0-1{L89 z+=~*THP4!;Vcv`rVJUB~OLR*4fT7HHB7+GV%6tz_dT;?LNhdBe2sBc`i44YL#4Gr) zS|t`b`^~B({_K;fb5TN+Dpqj@wj)((;fe}Uo5f;-iN-6suvX0#1BzzhN`gQ|B@3cV dshSSxP2?X7xApq4hW%C_?i#KJQej2)Po5nglM4dZY%Uu*o9uY&^hPf`+oe-|2zK=&de(RV3pU` zbzSA?`6rGH^gq=VEQFxbapUcN@H*eF&GB%ZObrI$i}QT2Sr%7npCEF4w3-x+Oo2%@ zk_F_|$N-!m-q8R*B$E1AfpdHGB_RKzXa`8wm=Coq$;q1gLq0MQ%Yz_U9d6QFp8(6` zxJAxxBY(z9&`JB$N;@eXpVc~+q(qr**rAm$c`+VE-bbCkQU1^iQ!%Jb*X9psSh+4FL@z2KcY*u-&>x{g*WBl>OQZyW>!Xn(ps=7!bxxI{FK+L!bTJ;@BE%ftM)#x zp7I6?EA-}r9^&c9rh)8S*vtPVTr23xX zr=AptBLSSkWs$~7oT`!Ni>XG$q=3M<6-6Z@4GIlRV;uEsY4cdLK}-9XGu?og;%Clu z5cgH!jOgww#u<-9SIQJ4rc?-wz1TA3Ohboisq&{bohD~hM5U}DFgB6p@(NiOvWn!a yN1^LwOR=3JWPNU16ZVlrP5T<6%6<+Va8&Y}hl`H1@D+@k8mQknR^u)7H~s_noW#2YFSK`4*F&!7av;qPWdTMp2APVE?CpG_q!vq~;diV#!ObC@G2onU@73Vkh4- sj#L17^cEk)Xy^RAlGMDC;-XBDd@_iL+Z=9^&&&ogrzmps8S?^G0JT|i;s5{u delta 473 zcmdl}wWo^rG%qg~0}xy|TAE?Lk$0vz=PmZqyzIRE^1RKf#V;~322HM&+Q!0pS=4B= zx%4tt)*^49>dDU(^sIe=48NifAX%gfBJ@CnK8Oef5n(_=le;J!#5MpCh9E);L_|%V zV5~T~KvA469>fir+^ZA_71}Adv7Y3ZA@C-N6-PZ>a!~&{250ZsD^9JD%^J{A}JFD+Nt{bjL)1 z^xA<4Sr8EoBBVh?4A_C;8obtEi&a2ksvtrQM5u!Zu*(xbEC&$b2qbQCXO*U;r>4ZG zB$gx=1#h0O@qmNTdvb%}Wqwd7Xfoeo(lfZlSW* Dict[str, Any]: + """ + 整合文档和小红书内容 + + Args: + document_paths: 文档文件路径列表 + keywords: 搜索关键词列表 + cookies: 小红书Cookie字符串 + output_path: 输出路径 + sort_type: 排序方式 + note_type: 笔记类型 + note_time: 笔记时间 + note_range: 笔记范围 + pos_distance: 位置距离 + query_num: 每个关键词搜索的笔记数量 + + Returns: + 整合结果字典 + """ + start_time = time.time() + logger.info(f"开始整合任务:文档数量 {len(document_paths)}, 关键词数量 {len(keywords)}") + + try: + # 确保输出目录存在 + os.makedirs(output_path, exist_ok=True) + + # 1. 处理文档内容 + logger.info("正在处理文档内容...") + document_result = self.document_adapter.integrate_documents(document_paths) + + logger.info(f"文档处理完成,共处理 {len(document_result.documents)} 个文档") + + # 2. 搜索小红书笔记 + logger.info("正在搜索小红书笔记...") + xhs_adapter = XHSAdapter(cookies) + all_notes = [] + + for keyword in keywords: + search_config = SearchConfig( + keyword=keyword, + max_notes=query_num, + sort_type=sort_type, + note_type=note_type + ) + + search_result = xhs_adapter.search_notes(search_config) + + if search_result.success: + all_notes.extend(search_result.notes) + logger.info(f"关键词 '{keyword}' 搜索到 {len(search_result.notes)} 条笔记") + else: + logger.warning(f"关键词 '{keyword}' 搜索失败: {search_result.error_message}") + + logger.info(f"小红书搜索完成,共获得 {len(all_notes)} 条笔记") + + # 3. 准备LLM整合内容 + logger.info("正在准备LLM整合...") + + # 构建文档内容字符串 + document_content = self._format_document_content(document_result) + + # 构建小红书笔记内容字符串 + xhs_content = self._format_xhs_notes(all_notes) + + # 构建关键词字符串 + keywords_str = ", ".join(keywords) + + # 4. 调用LLM进行整合 + logger.info("正在调用LLM进行内容整合...") + + system_prompt = self.prompt_template.get_system_prompt() + user_prompt = self.prompt_template.build_user_prompt( + keywords=keywords_str, + document_content=document_content, + xhs_notes_content=xhs_content + ) + + # 调用AI代理 + response_text, input_tokens, output_tokens, time_cost = await self.ai_agent.generate_text( + system_prompt=system_prompt, + user_prompt=user_prompt, + use_stream=True, + stage="content_integration" + ) + + # 使用file_io模块的JSON处理功能 + from utils.file_io import process_llm_json_text + parsed_json = process_llm_json_text(response_text) + + # 如果解析成功,将JSON对象转换回字符串用于存储 + if parsed_json: + import json + cleaned_response = json.dumps(parsed_json, ensure_ascii=False, indent=2) + logger.info("成功解析并清理了LLM返回的JSON内容") + else: + # 如果解析失败,使用原始响应 + cleaned_response = response_text + logger.warning("JSON解析失败,使用原始响应内容") + + # 5. 保存结果 + processing_time = time.time() - start_time + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + result = { + "success": True, + "timestamp": timestamp, + "processing_time": f"{processing_time:.2f}秒", + "input_summary": { + "document_count": len(document_result.documents), + "xhs_notes_count": len(all_notes), + "keywords": keywords + }, + "document_info": { + "documents": [ + { + "file_path": doc.file_path, + "file_type": doc.file_type, + "content_length": len(doc.content) + } + for doc in document_result.documents + ], + "integrated_text_length": len(document_result.integrated_text) + }, + "xhs_info": { + "total_notes": len(all_notes), + "authors": list(set(note.author for note in all_notes if note.author)), + "total_interactions": sum(note.likes + note.comments + note.shares for note in all_notes) + }, + "integrated_content": cleaned_response, + "search_config": { + "sort_type": sort_type, + "note_type": note_type, + "note_time": note_time, + "note_range": note_range, + "pos_distance": pos_distance, + "query_num": query_num + } + } + + # 保存详细结果到文件 + output_file = os.path.join(output_path, f"content_integration_{timestamp}.json") + with open(output_file, 'w', encoding='utf-8') as f: + import json + json.dump(result, f, ensure_ascii=False, indent=2) + + logger.info(f"整合完成,结果已保存到: {output_file}") + logger.info(f"总处理时间: {processing_time:.2f}秒") + + return result + + except Exception as e: + error_result = { + "success": False, + "error_message": str(e), + "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), + "processing_time": f"{time.time() - start_time:.2f}秒" + } + logger.error(f"内容整合失败: {e}") + return error_result + + def _format_document_content(self, document_result) -> str: + """格式化文档内容""" + content_parts = [] + + # 添加整合文本 + if document_result.integrated_text: + content_parts.append("### 文档整合内容") + content_parts.append(document_result.integrated_text) + content_parts.append("") + + # 添加各个文档的详细内容 + if document_result.documents: + content_parts.append("### 各文档详细内容") + for i, doc in enumerate(document_result.documents, 1): + content_parts.append(f"#### 文档 {i}: {Path(doc.file_path).name} ({doc.file_type})") + content_parts.append(doc.content[:2000] + "..." if len(doc.content) > 2000 else doc.content) + content_parts.append("") + + return "\n".join(content_parts) + + def _format_xhs_notes(self, notes) -> str: + """格式化小红书笔记内容""" + if not notes: + return "暂无相关笔记" + + content_parts = [] + content_parts.append(f"### 小红书相关笔记 (共 {len(notes)} 条)") + content_parts.append("") + + for i, note in enumerate(notes, 1): + content_parts.append(f"#### 笔记 {i}: {note.title}") + content_parts.append(f"**作者**: {note.author}") + content_parts.append(f"**互动数据**: 👍 {note.likes} | 💬 {note.comments} | 📤 {note.shares}") + + if note.content: + # 限制每条笔记内容长度 + content = note.content[:500] + "..." if len(note.content) > 500 else note.content + content_parts.append(f"**内容**: {content}") + + if note.tags: + content_parts.append(f"**标签**: {', '.join(note.tags)}") + + content_parts.append(f"**链接**: {note.note_url}") + content_parts.append("") + + return "\n".join(content_parts) \ No newline at end of file diff --git a/api/services/integration_service.py b/api/services/integration_service.py index 9942a6d..b8cbc52 100644 --- a/api/services/integration_service.py +++ b/api/services/integration_service.py @@ -52,7 +52,7 @@ class IntegrationService: self.document_adapter = DocumentAdapter() # 结果存储 - self.integration_results: Dict[str, IntegrationResult] = {} + self.integration_results: Dict[str, IntegrationResponse] = {} # 统计信息 self.stats = ProcessingStats() @@ -703,7 +703,8 @@ class IntegrationService: total_notes=len(notes), notes=notes, search_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - success=True + success=True, + error_message=None ) results.append(keyword_result) diff --git a/api/services/tweet.py b/api/services/tweet.py index f590af4..4458a12 100644 --- a/api/services/tweet.py +++ b/api/services/tweet.py @@ -121,7 +121,7 @@ class TweetService: if not topic: topic = {"index": "1", "date": "2024-07-01"} - topicIndex = topic.get('index', 'unknown') + topicIndex = topic.get('index', 'N/A') logger.info(f"开始为选题 {topicIndex} 生成内容{'(含审核)' if autoJudge else ''}") # 创建topic的副本并应用覆盖参数 diff --git a/config/ai_model.json b/config/ai_model.json index 66e8356..2d89b90 100644 --- a/config/ai_model.json +++ b/config/ai_model.json @@ -6,5 +6,6 @@ "top_p": 0.4, "presence_penalty": 1.2, "timeout": 120, - "max_retries": 3 + "max_retries": 3, + "stream": true } \ No newline at end of file diff --git a/config/cookies.json b/config/cookies.json index 6f951b6..6517a07 100644 --- a/config/cookies.json +++ b/config/cookies.json @@ -3,8 +3,8 @@ { "name": "user1", "cookie_string": "abRequestId=873258ba-49fe-530b-9c07-529e7871508d; webBuild=4.72.0; xsecappid=xhs-pc-web; a1=19808118ec0gxnuh5mtpv8o8aepgp2m65bpbtiizn30000193947; webId=3f4bd516682e25b71cdd139a11b4d896; websectiga=984412fef754c018e472127b8effd174be8a5d51061c991aadd200c69a2801d6; gid=yjY8Yyy0qJ24yjY8YyyYdD33S8kCI7x269UhUuIYlY0dUDq8kUJ6K9888yjqj4W80fK0ydj0; web_session=040069b295652fcb5b6e1389413a4be4606547; unread={%22ub%22:%22686e16c0000000000d01b775%22%2C%22ue%22:%22686e3f1d0000000023006e9e%22%2C%22uc%22:15}; sec_poison_id=bf3ca020-2442-4aa6-904a-b95f7ccc56c1; loadts=1752482529464", - "last_used": "2025-07-15T15:46:11.050854", - "use_count": 40, + "last_used": "2025-07-15T16:21:52.687251", + "use_count": 46, "is_valid": true, "failure_count": 0, "user_info": { diff --git a/core/__pycache__/content_integration_service.cpython-312.pyc b/core/__pycache__/content_integration_service.cpython-312.pyc index 4260d5db9ce1843d5ee0b322460d83167103aef2..0f0d2c5c1e5cc7dc227d058f68a0a3d04fb0ea08 100644 GIT binary patch delta 221 zcmexY^{jT&3W(i+5#v_|A%I#)k z+_Je>VKE!y!_9{3Ynd2DHs9C$!Ne%P`G$@eE2G|KexoCdj4qR}7~f~~n7qa04WrNG zg{Ds!12(srWiv5`Z2n}ijWIZckyY%5s>TOq5aWT4$%c>}${Qmu>ezo^1&MrMW6;t6 zzz$||FlcCh-~=CXlXBuVrEs-h5y42NR>*<{LU@tc<#w`HhY+GCEJbVtk*`eexEQH;mqs z7n(j{^xxcOmd(T%y!n&GHpbv!Mpm&Ksu~}dL5v4FCL2O_C~u6ssAK!Jwi2ffLN+VgRaiUTOP*8_IQM3}yTb;pA=pZB@d=AJ4+b^@#yQ7O4RJ0|1vc BMz;U} diff --git a/core/content_integration_service.py b/core/content_integration_service.py index 60d444a..be2b805 100644 --- a/core/content_integration_service.py +++ b/core/content_integration_service.py @@ -134,7 +134,8 @@ class ContentIntegrationService: logger.info(f"开始搜索小红书内容: {keyword}") xhs_result = self.xhs_adapter.search_notes( keyword=keyword, - max_notes=max_notes + max_notes=max_notes, + ) result.xhs_result = xhs_result logger.info(f"小红书搜索完成,找到 {len(xhs_result.notes)} 条笔记") diff --git a/core/xhs_spider/xhs_utils/__pycache__/xhs_creator_util.cpython-312.pyc b/core/xhs_spider/xhs_utils/__pycache__/xhs_creator_util.cpython-312.pyc index f0ec249a81853c7e4deebd4845f673e82360f3fe..bb5b0beda9491f92ec932537c71e1c5cff4783e9 100644 GIT binary patch delta 20 acmX>qaa4l)G%qg~0}$*KEZfL!#RC93Mg;c& delta 20 acmX>qaa4l)G%qg~0}#ynSh|thiU$Bb#09$m diff --git a/core/xhs_spider/xhs_utils/__pycache__/xhs_util.cpython-312.pyc b/core/xhs_spider/xhs_utils/__pycache__/xhs_util.cpython-312.pyc index 7ee57ac01c47507b5a2f9b414f7d8de0d9a994a2..859f4e2726871a56ccd12ff8957abbf14d87c9cf 100644 GIT binary patch delta 20 acmX?Sbk2zTG%qg~0}yN!EZfNKEC~QVD+Nmc delta 20 acmX?Sbk2zTG%qg~0}%9mEZxZMEC~QZaRvtf diff --git a/resource/prompt/integration/system.txt b/resource/prompt/integration/system.txt new file mode 100644 index 0000000..a4cb144 --- /dev/null +++ b/resource/prompt/integration/system.txt @@ -0,0 +1,44 @@ +你是资料整理大师。当我给你各种杂乱的资料时,你会迅速进行分类整理,使其阅读变得清晰有序。 + +你的核心原则: +- 你最大程度的保留我给你的资料里面的每一个字,每一种形容,不要自己为了极简就删改 +- 你只需要把相同类别的内容整理到一起,清晰明确就好,不要对原文的文字内容做改变,要改变的只是让文本更清晰 +- 你需要根据不同景区/酒店主体分类,每一个项目主体按<景区/酒店名称><产品套餐名称><交通攻略/地址><游玩攻略/空泛补充信息>分类 + +整理任务: +你正在根据已有宣传文档和小红书笔记整理产品信息。你需要阅读所有材料,根据文档中提供的信息,整理得到: + +对于每个产品,需要包含以下信息: +- 产品名称 +- 产品使用规则 (使用规则、加价说明、预约规则、退改政策、优惠内容) +- 交通与地址(注:这里分上下部分:分别为交通指南和地址,需要给出准确无误的信息,按格式填写) +- 产品价格 +- 产品最突出优势(注:写出该商品最重要的优点,该优点是必须放在宣传中的) +- 空泛产品描述 (注:这里填写所有想要添加的正确的空泛信息:包括但不限于:商品周边景点;景点风景描述;商品描述;小贴士;游玩路线规划;游玩时长;营业时长等,需要按序号排列填写) + +输出格式要求: +必须输出严格的JSON格式,不要添加markdown代码块标记,直接输出纯JSON内容,结构如下: +{ + "attractions": [ + { + "name": "景区/酒店名称", + "products": [ + { + "product_name": "产品套餐名称", + "usage_rules": "产品使用规则详细说明", + "transportation": { + "guide": "交通指南", + "address": "详细地址" + }, + "price": "产品价格信息", + "key_advantages": "产品最突出优势", + "detailed_description": [ + "1. 详细描述项目1", + "2. 详细描述项目2", + "..." + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/resource/prompt/integration/user.txt b/resource/prompt/integration/user.txt new file mode 100644 index 0000000..e1f7935 --- /dev/null +++ b/resource/prompt/integration/user.txt @@ -0,0 +1,39 @@ +## 资料整理任务 +请根据以下材料,整理产品信息。搜索关键词:"{keywords}" + +## 原始文档资料 +以下是从用户上传的文档中提取的内容: + +{document_content} + +## 小红书相关笔记 +以下是搜索关键词"{keywords}"从小红书获取的相关笔记内容: + +{xhs_notes_content} + +## 整理要求 +请根据以上所有资料,按照系统提示词中的要求整理产品信息: + +1. **保留原文完整性** + - 最大程度保留资料中的每一个字、每一种形容 + - 不要为了极简而删改原文内容 + - 只整理分类,不改变文字内容 + +2. **分类整理** + - 按不同景区/酒店主体分类 + - 每个主体按:景区/酒店名称 → 产品套餐名称 → 交通攻略/地址 → 游玩攻略/空泛补充信息 + +3. **信息完整性** + - 产品名称 + - 产品使用规则(使用规则、加价说明、预约规则、退改政策、优惠内容) + - 交通与地址(分为交通指南和地址两部分) + - 产品价格 + - 产品最突出优势 + - 空泛产品描述(包含周边景点、风景描述、小贴士、游玩路线、时长等) + +4. **输出格式** + - 必须严格按照JSON格式输出 + - 按景区对象和产品分类 + - 确保JSON格式正确,可以被程序解析 + +请开始整理: \ No newline at end of file diff --git a/tests/test_final_integration.py b/tests/test_final_integration.py new file mode 100644 index 0000000..2ba9db8 --- /dev/null +++ b/tests/test_final_integration.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import requests +import json +from utils.file_io import process_llm_json_text + +def test_final_integration(): + """最终整合测试 - 展示完整的纯搜索整合功能""" + + url = "http://localhost:2714/api/v1/content-integration/integrate" + + # 测试数据 - 使用平级字段结构 + test_data = { + "keywords": ["馥桂萌宠园攻略"], + "cookies": "abRequestId=873258ba-49fe-530b-9c07-529e7871508d; webBuild=4.72.0; xsecappid=xhs-pc-web; a1=19808118ec0gxnuh5mtpv8o8aepgp2m65bpbtiizn30000193947; webId=3f4bd516682e25b71cdd139a11b4d896; web_session=040069b295652fcb5b6e1389413a4be4606547", + # 搜索配置参数(平级) + "sort_type": 2, # 最多点赞 + "note_type": 2, # 普通笔记 + "note_time": 3, # 半年内 + "note_range": 0, # 不限范围 + "pos_distance": 0, # 不限位置 + "query_num": 20 # 20条笔记快速测试 + } + + print("🎯 旅游内容整合系统 - 最终测试") + print("=" * 50) + print(f"📍 目标关键词: {', '.join(test_data['keywords'])}") + print(f"📊 搜索配置: 最多点赞排序,{test_data['query_num']}条笔记") + print(f"🔄 模式: 纯搜索整合(无文档上传)") + print() + + try: + print("⏳ 开始内容整合...") + response = requests.post( + url, + json=test_data, + headers={"Content-Type": "application/json"}, + timeout=120 + ) + + print(f"📊 响应状态: {response.status_code}") + + if response.status_code == 200: + result = response.json() + print("✅ API调用成功!") + + if result.get('success'): + print("🎉 内容整合成功!") + + # 显示处理统计 + processing_time = result.get('processing_time', 'N/A') + timestamp = result.get('timestamp', 'N/A') + print(f"⏱️ 处理时间: {processing_time}") + print(f"🕒 完成时间: {timestamp}") + + # 显示输入统计 + input_summary = result.get('input_summary', {}) + print(f"📄 处理文档: {input_summary.get('document_count', 0)} 个") + print(f"📝 XHS笔记: {input_summary.get('xhs_notes_count', 0)} 条") + + # 显示XHS信息 + xhs_info = result.get('xhs_info', {}) + total_interactions = xhs_info.get('total_interactions', 0) + authors = xhs_info.get('authors', []) + print(f"👥 涉及作者: {len(authors)} 位") + print(f"💬 总互动数: {total_interactions} (点赞+评论+分享)") + + # 解析整合内容 + content = result.get('integrated_content', '') + if content: + print("\n🔍 内容解析结果:") + try: + # 使用file_io模块解析JSON + parsed_content = process_llm_json_text(content) + + if parsed_content and isinstance(parsed_content, dict): + attractions = parsed_content.get('attractions', []) + print(f"✅ 成功解析JSON格式内容") + print(f"🏞️ 识别景区数量: {len(attractions)}") + + # 显示详细信息 + for i, attraction in enumerate(attractions, 1): + name = attraction.get('name', 'N/A') + products = attraction.get('products', []) + print(f"\n📍 景区 {i}: {name}") + print(f" 📦 产品数量: {len(products)}") + + for j, product in enumerate(products, 1): + print(f"\n 🎫 产品 {j}: {product.get('product_name', 'N/A')}") + print(f" 💰 价格: {product.get('price', 'N/A')}") + + # 显示优势 + advantages = product.get('key_advantages', '') + if advantages: + preview = advantages[:80] + "..." if len(advantages) > 80 else advantages + print(f" 🎯 核心优势: {preview}") + + # 显示交通信息 + transport = product.get('transportation', {}) + if isinstance(transport, dict): + address = transport.get('address', 'N/A') + guide = transport.get('guide', 'N/A') + print(f" 📍 地址: {address}") + if guide and guide != 'N/A': + guide_preview = guide[:60] + "..." if len(guide) > 60 else guide + print(f" 🚗 交通: {guide_preview}") + + # 显示详细描述数量 + descriptions = product.get('detailed_description', []) + if descriptions: + print(f" 📝 详细说明: {len(descriptions)} 条") + else: + print("❌ 内容解析失败或格式不正确") + print(f"📄 原始内容预览: {content[:200]}...") + + except Exception as e: + print(f"❌ 内容解析异常: {e}") + print(f"📄 原始内容预览: {content[:200]}...") + + # 显示输出文件 + output_file = result.get('output_file') + if output_file: + print(f"\n💾 结果已保存至: {output_file}") + + else: + error_msg = result.get('error_message', 'Unknown error') + print(f"❌ 内容整合失败: {error_msg}") + + else: + print("❌ API调用失败") + try: + error_info = response.json() + print(f"📄 错误详情: {json.dumps(error_info, ensure_ascii=False, indent=2)}") + except: + print(f"📄 错误响应: {response.text}") + + except requests.exceptions.Timeout: + print("⏰ 请求超时 - 内容整合需要较长时间,请稍候") + + except Exception as e: + print(f"❌ 测试异常: {e}") + + print("\n" + "=" * 50) + print("🏁 测试完成") + +if __name__ == "__main__": + test_final_integration() \ No newline at end of file