From 674082e7d71750500cb5b0c1a77ab10494947897 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Wed, 21 May 2025 09:49:21 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E7=9A=84=E8=AF=BB=E5=8F=96=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../content_judger.cpython-312.pyc | Bin 24458 -> 36022 bytes .../output_handler.cpython-312.pyc | Bin 26945 -> 28661 bytes .../tweet_generator.cpython-312.pyc | Bin 39775 -> 41899 bytes utils/content_judger.py | 459 ++++++++++++++---- utils/output_handler.py | 79 ++- utils/tweet_generator.py | 134 +++-- 6 files changed, 523 insertions(+), 149 deletions(-) diff --git a/utils/__pycache__/content_judger.cpython-312.pyc b/utils/__pycache__/content_judger.cpython-312.pyc index a00ffc680d3b5f4ddfa6d7c37d1c8cf8881aa896..4e7640eaa2308114969cfc1066108973ae3262d4 100644 GIT binary patch delta 14519 zcmcJ0c~~1qwr{ug6$ua^kPt#FlE4OoSMUmW1KW53Vq;?~n3gdZ1ZhcLuoO9oli(zd z@hFbL%NQq_7*Cu;8P80V#EHqwy*Fg;yWeX?@+U>Zd(V>(u>HOJzF;TwW*+(DzEj=W zyd`tLFSoI(x~oo|I(6!F^{HPSzjPh{{SYhZU!_tJ23KbDti6KQPbMkxw@>P8vk9DF zbqTi%&Ad(ps=n`X<8^5#&m?mt-O}ySlb$p@>EyqBsMrlOr!h0O9Vf8uECS!oCgkpu zxPEprAv%h07ZKv4*mf}?fmA}I5K<_U5=rosM5Gcjc#<7r85LK9_?-$e_-nM7pE@v>nh#9$tK(%l|FOzyk-{nY9Q~86!uP0 z#npogN?3Th`_iIo;uUxvr+;NYlJkA_RZB$$8}GY1vScj>f5Cls#b3lykedy^?<`#R z8%??*E6ag5U_uO307@X(1c%@fJc3XE2?zlpJSTCo9IQ^Rdqr(l%1Uf!28Ok;FwDtz zavXRk-+gh5VvmFK4iurwiNBJ`!3e&SYZEdh80Ns=!I^t#zBbaF=VaT&F$#8+f)MQk zPUIX1n-K5ENS=f1VE2L0PVOrjw|1*$F(GmA&SHf0QH`OTcb*$%g=4R*OlV%maWCDf zR3$~LNdX~qa0xk)oQ3rv$+_-jTQgiKkFO_E9bAwp#lh|5Mhv*0IfOROh{z>$L>`e3qY^np4;zx$ zDM4(|m~zDqNxUI#Nd-pgh7n`LF$cE+ z%foExQB`$jMepKyK1ObHCLz<)5k^N6m{`&In6={Q%|Wx5O008e^$_9yGu6#?i^Q zj@}x00j*Bwu6%p#73Q(7VPpNRPux?#JUZch_tu%CcTT@|=Z(S1cTeBC^7ag_o5A%n z_^cVcat1fd;KndEgKLcv5^o7Dvd|<%2Br2Q{uBczNb$xC9sueSW z6kAz2?kxkY>H{?o`tb(LZsRItWmX@7GhL)C+$gQ!+sB4S7CqRT@ z9Qa97A^FY*$!W}ix$jnKcupL?0c>|@yM_f>U7g0|WDz_Ei(ZM@4VaN%e+RK4m!f11 zN|vDnz5>i#8(Cp)JK1CJ4f9EJZ&z!Z*~kuayIXB-d&3f|jqI?roA$SM+09|`o{ocN zf_bo_DTO&E)sTnNp~gg-yIVWJMu6U8KG4-+FG1gB{z~9VA02#C4!&Uo&yiD zcV}|oRrJ10d~$s1+%e`&`S#iwy9#K~3*|X~`~6c>=Z*jwSk}b#_a@&uUQ$xB+K7jF zNIz{hJ6aPE3&hezzmE(t^H$zDb$#;Vw=1=FBtn#orn-Io#akmU!z()Z;>UMh8>-YA zQ^TSTYlp>ZYqhkQ!))Lm=62Z3-BxlvG!6^R7JIjuY_&mM4@r<{;S6&L^KN^4SOkqo zn^k(C!?rifB2jDmZnt)|#syU!-Mw9Aq;8@Kr~EMj;bY%}-TI(jU|WD>2v zVUEpw&=wZ7?QJDZ-L1W04!R4ABj(W!^BI=3b(vc&U=1`W-a$5_hRN2}J!UWj+KdpL zHO%jAb=hoY(h^pX=KW^U`anIeyOrE$Cc`4LwXL<+Y_gi4A~yjqayCkKA+93E?vV7> zWGF~nTIhSm2o;aAu0CB&kL#yQa0r-M=YvHb!6syxgZXc<1G4;3YDWLMY0g1hsGR0u z>6xK4<;^r@C?hNWFH0SY{L2nyW#3gLDUbnJexf=)WXI<(>AJkr@v_z)o7wjPf~@g z0p;$HQhQUO9bW13DGVpqPbUNKX(c9C2W5pmS>dofAe$Y^(G9nFay>e#V7@PBKD<>j zRixS$kj;8TwPH}>1Lfot2Xp57a^`wgdFT3bmZOHL0}h`oFJ80Ko9EA2HX!-V#lxiO zFwo5WkWv>?=G^6Ta+4@Y_H+_vtPC2K`wYwdhAO11I@~AEAK(wFLOBJ4%B|N4+hA5@k4RHYuS$Ld!tPAqZp zyd1x3X<|{d1fnTr5oT4isEBi?XC0Y ztYKtYi)6|u?_YC|!;uzEOJed3>#5?Z(ItMDYE%DuupzY}CZf%+T7t@C(##3D;^@aawv(N@NS$BhT6jaT@bA+sR&wTF6v~;` zR{k)Z27G6}&%&gN6B~|gpwvsRwT;g5FWX2h-9%+=4oJ3yQgbLt&fh0w%6k}Fs2rE1 z1|``(Nj9Zj=6%ZBMro=8lA4>T8OPWCFwF;o8S9hq61}!YRL94@$Vp$b0Q+J=YHcoi zOqf}_SIr*F)7Q#bUy5)j`cljRqA%sFS_SvZ6mG4W{bdFhm8iL>Hn#xo|HK#8&13&b zo>^DQ{*w_$_oZA&+j!7R^os4Iu7MkH8nfZPeK5@2DEo|&tVHh7Th5KH(LTO>W9_YY^g)y)~ri+|DS51kxy@ak{nV(ysrfo zy;Tml-v|Byxxyg{DC1Lqd&%tlUZhAb+hQ)WY-G5YG%qB?23P889GXPsyhxTGTM zc_H=~kz@NYXR=KnW#~wbb_2T~XJq%2;!Jg2FENo2Muu_%#Ijvgmh;b zk>X5;?Mhl_3DV2LbcX=$=ftIq-o-X=8IBBcr6U6dyBusqnINVzCQ9Z*VrItbB4TzD zu~Ff$uV4flz{y(M+-R2$kq!CT^c6J~pjZU^RS_80|0UAt{2uH@GaMo!4U7l|N2H_N zwvcIuVV#T6z_v!}I~PaG<2ex%JSt=-@}eHN;#3dvc18O9Id~J9=XlO6B8whb6^tf} z9u<@<0eKR8j*I7hXN#~YLiuyZ;K(9WbZ_)B;Ud%t?|np#QAL4;W#3mzD?g!DlZbIR zvKnCRi3;9{yz2)pA}87w#|W)c46i^=)dMu}m|$MjXm(wqXfub{!H=3MSk)5_F`4-j zUFJH(_irdIX+P0I7wrwlT=Y@|u05ISimpX!SK?Y!g#^r$Im8EXD+@-Imna~!_PB`r zI7gxY?75IAeDqvLbwL#V95Ws1!v$PB0V8?v`guB`O=QKdT-+9^S+K+$aA%r`(3N;PlnG z8hzMYPIZE^&@TCbi!d-k#1)u8pZ@?cJOXB=zW+`9(RTq^Epz=mcKV-v;pqP3Uxnx> zfBf4)6frvaiOwQaKWh24*gx21@i_sFkkPi=5V7I7o@Fo+2x?js@h5S@yU;0 zi*3Xac&ue`p+3QqQrq4R%f4k~9hBbr54gw`a7R|71R+9p0Nk;u*FT!Nda4W#0w!O& zaQo7mQ@&g{7ayf5beC0>D zl|Rg_udwCtj$MbHJBxI-4qKO57kOx7FqX(;i!(u9^V}clV(!CTgt;{5Fh9bxkwnmj zkr$SBA_T`|wYMSgCd{KB335Nwk_X&d>vLUUL93;;>yWj>8kR-xOuJjH<^}U;D1{MZ z>cqh0iPtAzcz*J|k1DkQ90Dw9peKo$4&y8>Q)#fHNs1KO%aOilr2_cW{q{3JH1(eAcY0cM{Z#+!`iS&;djYh2i1d+^DL{!BGrdG7&<@`~Z0gu&6W-&K z{T_ga09A}iK*OtlMF=Env+0)uOd~sTGMF(;zISnI*vo)CQ|Eqt`?*giKE8xblNdgT z!8t&fZeM?G^6W2fUH+J9wVJF14u5rbZBx2mtyk}|C!sFhfBV)aF91e#`~CL;NkTBq z!+k_iq{{;GAP|s;P{K?Rx97!CE{QVI0wv@Ql(e7(p-ALblr*7a8zfBH|DLo`re4-Agv+y3mUhmNRmgNiu>Ck|tCvw2^>I?rv?{2VhVS z3E-s3e6Wpdg*sz4gIP%cS0wChHdBWMC=NgkNutDx5`>tNy^tUfiy5M+g9!8B!QN#H z5PKchFK}%?s zy{!!V#it=vvz6R{n#tk~7^oEq2C|rFl}OCpy|zPrwoX468>E|xIK-MmS|*N4(oq1-a}tDEw-!$$tZ`eW-URq>S?5AU6IBTKz} zDsx3ZQUw5oQr*8c1WD~QPa!qltlvhp?Do~~9$s*1$;Blek+;g9x6DiW@~VS*YkhfZ z19^4SUJE7HLQ_qN`}mqUuA4cfK(~m>Svjy4O11gHoJGEzMP8*pr+Q%R_o(DT-%#I( z!JjjCVC}d7s|;)T+TWMIU!Zfh2=xbN9NuD=x3$t!wny77|pveZ}l`wkX>-IY<{q; z&R149HqT$SX*g-Txb*$X%atCBzj)QKU|g%eB)KRVae6m7k zEtpP*-tOkpBBdsm{aKNHGjILC`jEyjxcfrqQ0Iu+v)@ZzKlITdzovFz&0jJ!p@LcW zT$muUU~ttmi=SN(Dqj#RU*{`dH@3=OzGYYz?YzfdygJr7HG6eHy9V_qhL-mj#PLBzTy?2&Z3f#t}v8q9MSu7%R_m^BelM~1>=U2pka~E z07@Qp>3tP-!HUhkip|uPW`D(YzhOtvu-j+YO|^CU4g1E6<^+o>eMObtMWeO8#r46( zO}@oV)Dt`Xi+A~pOu?c(zM?%;yTxDBb63jGOCMM_t-zGJ;cdQ**`e}EZ>6t%^`LmT z&ZjILuUHBZZ zbhWJSF)Xc)6;c&aibAlZH1+9?CpS|0+X9N_kao>L?M;_L8B`d33geah_lqtUdA9kB zmW>L>7T-{8z01QC*~s8flfp&bOfSf^@`hp+l8I(h9Z=MOk`O1~tO zV9%i_^AUrm_N#dtM>W*umVja>NCGe#dZ{v9TLY49H**U^S(;F$Dx}ID*A@n~b9~x4 zo|F$WKgjeh^q1B6wJU?#bw2I7u{C~eNfav z8^+rGx+h>J)Jgs8@1|h#%+TzrQRAqJD&8<48QkuZ8^+5Q1dU21HQZF75Mxe*Rj-cG|mE7>dG#3bdn9gTq7ks15|6x2s3+P~S+1IN4P$5Dm zT~;c;W>7fJpwq)00p*u4~H?|!(_sw35}jP?eS~o^>0jo zBN<^eivyA+;}TaYf-?u}N0w7Xi>a(70m)LBQJwy0osW=akvG}PrqoNu^Nhi~xxT!) z)Phaa<}K7FR|7S#F*t9BZ{7~7rOiK&@aLI>d0oCddM@(pcaw#Rq`zW9Y0?kVQW({Y z^?$)i{yDEo-=xLNTUl@uT zXJccE#f=55FJ)}F|1xEz)CD#Frd_3k?5hf4gP!eYWi{y7el-tD{5mcw(X-Kg5v!q; z>z~B~hW>J4BhV~XH)gW~8Hh5F&4rSH4p9aQSdB&8fSw0k1ZE4HfM$WZNy)yEf+%k& zxlnQ=8&TfS3e%cZ>>I`!eRDedYdH=NU#D}MRaeO^2XXwe`+qm|les^8LXCjhCDHANgYCcx-Z?JB(V#;o z`f_jp8safu?TnH0M_&+GpmXvetfH$fyCvDdzkizv`+Av^ z|1|#)^o;U^y-|wY01PIspPzhpknt%aPA%haPk4Vb@yV|yK0ABc^BkOg_RV*D8j4-W zKcYPmYa@9XnumoZQ(IRn9OatA{3gpjOV0s|ksFpq+$Y#3w6>e!T#xpqY=?Ty;Kp@y zo8gqQyO%tL7>H>Hi8*8L_`$_+XB5yrDR~~1q5@{P5zTbA)T=6^51bOpmrZ)S~GR*e>nsRETvgXRl8Lp?#wg0D0Ss2#+3 zw)QQ>rOb<&Z>s#+a5h*J%F*8y^Rklq>%LEMVJYc@*@Lp-10#n=y1nzM^s3Rh)aGr0 z^~+eYf9LEEaBG7y?U(aO%{Z;fws{m z3%0Hcx6Ru{BG7|80*O&X?@Sm&i0^RF_LF=CCC5?nDoReEWB?K)kC|7LW9$NzG5Tk`}<8Izz-ED}Yi0WLV;ZftSp)1ewh4kHQOE8Rab zPAElhKlz&byOvq_Gj8q90v674ui0rYY>TaZKp=r1y^s#WDtQ`W3n8|R521_z(u_5l|-GvgCDeHt}CVaeq{DX$NhBbxY`most> zQbGtELRck*5JgC{fqMu&BgAJp=XvoCEx8U(w22Pl3gg7m2yvoxnETr;=`IJ$Awo+m zEU%F-008P@2-g#L3PF|!E?9Qu(Pfu>_c1iFvo7-7V<_S=iPD#2`p9O~se!0OSb7Ns zJ>s;)qXC4uqeDp-kscp(Lfqt|MNTF%q9R9IIVC^YD&-Naq!DP+{dPr$i%>cwAZDgR z3TalNm<}zC1YZ+23W5c7&@0e!Nv`V)Cu|*JkpSV z$|@e+>H({uy&ziuv{f46NcfR2P1NpU>W*3sD1mvyK;Z(2E-SMC4hlrkFV>{@?K>|X zr{kqQhR{3kDiTpD1_MPgRiC=2-aMk2`rXU$eU1TNQ>TA_`^8^RzBIzHa6h$Ui~EZm z`A>GYqTn)!v@%-(GK4^^w=P9GxpHyplqc3X5&}Y{dQp57cnGt!ww_*7?<_3@omtHg zPG#yf143y#RH?11Wn#J~Pdp0@-2Zb&@m_?xRB9Q-j*gr}$lT=H&jLg@b?gWV45P<& z=ebWAm=D$V4>8ak6C5_-Ju-FWA{sL7S};sv!qG@_y}i`kxo%$`!)D__hp_>p8$u241W7h)rZ%$mjpUc~?!+@ChH?5(pl=h<#tBFsq-+FE2 z&L@Mn-+POZ5+c{w58`bIf?OSkA8E|{n{&@=r}|w8L;v>LGZ8Qx;fK%>BQylg-G22$ zz=#=;lrf)guk}wp{{bV39R+m&`M1}O*-MbrG|LFbI|(9#H~26o`m^t{BPVY9g8QZW zCj8bX7jC}?e9thjG;><8)SeEsj9-kVhDNTVRY(-3%8vF3-<*FX!Z4yVT4UOn!9+21 z7Y(8>rD1R2d4G3uVDw-|B=UXfV}x%`eDJOJg!b0E$0k2}9;U8rk6i|Jw=VqxDen4N zI8;FA3n@?*n>)nq*yV{&hiFnLb}`~jo%?tK;vA8K`pwzbv`Al{eMZCF558iELrnjw z2=DU{as2CuhCjJDb@T*d@icD#&6!~&roB2*4v>(JvHZ2`;dwzlcwTtK>qLX(^mYa2 z>nM8tZ++6w&_!+IOjNIoTET}yQ%3vbVRmrIbeDSrJm<&V)WH z84X0;PK3MDE9RlT!kYWG4Gf)IzKQQL#i_DjQ*C_~{6*M2WMLV~esL&0t6w}Jke!em zlTaC}M-Pv7Qw@O6?WZ~qP-zDPf(qZxq(wPSn!d*_#(RP*jYecR1c8Xk-y*Le#(hdtd?O*dt6Qr%C3$S!;pjaIHk zlBca<#URhk0YwAsgR-^5yi1~sBG<@5PqkmW(63$uIGZMSxbRZZ#iEgBFUzl6?AI(A z5XV%udi2ofK5BD2wbx3u+bCNfm3=rMciha#9*`z*{D&CIPRc0VT+c?zHFxD`6|mjX z@Ok&BiE0Fd${OrG;OjmBgE)k*ML3}auSH|&W~Qc&wH-a}U=b+W&N8FS%(A0wKdXuj znVnUQvfLUrnu3*VP^wfpSmu+c``01+*!PvpcvG2qZuNx?uAvQQH;%OW)TIC>4{$?j z{YbG-J>QG_)D;Zy2<9^A9BJ~*9a-mbjUJ{N8b5cA_E1frho`7^68f>?)u?k#4L##4 zS!nx-LMi9pkmpW-!O?@=FxEqD*hTF+L}~j1^25^|I0lmJa0QYK-&^3nkH}BRj>!h~ z!^I=57iSOWP;*F1X$=T$pxHF#p!kApNH)Uvr1+Kd{EGSg!twOX!Qu<0L!~1|&lZ2y zB7b^Czj!?Qrhs97A^TiLZ8G^+JV`BYrCOh&$iviA4iM9cHy~*nStgV{*}!^|&RKg< zmfnuWAl%CWHBo{rI$GVxuCK3$?<(@lNpM!NTkxYVBThaIH%87Kw8@JTkbYDuCqIJB z{nwpF7Y%5j(;^aKAY>X!=+irTNqinXB6xuuMaeTL`3jQD7~MYuKcGz(iIPar`da0+@yP( zxB~8ydx|t}<##0-uISzhJcG;mzI+qTRWaS7&H$DPLdKjj(#tuG(maRCegO#tw;!@H zyKy8E`2|V}PzA5G7yLXTEN$x{-8@qo9&)QqO3^sfA=lNq#=Xp>H~blshA{Svf@cIT c%AS!?@`V9x(f`17f8%Tw;;MTXN@-#KKLlO#k^lez delta 3870 zcma)9eQ*=U72lKYWa)IWge{OiWcwr=JHq%ww(%FnjXy##1PpOXz(5pPCya%W$ev`d zkx)oT;v_WKbA*qC&m=8m55^hRRACWM{s1ndI!_Z5I_bV2OPB@aEcFP$@y;0=$F6KKZRb704L}sIM6NQ zh;BKTE*v9D3bY)(pXjDJ^?uN;=5!o`-WX1U-ZX z%=2UlukL1Y<$gIwHX$iTAx?ogzq|_+0enk!l zr2RC!wzN5Sa_*sFI%FFG#{|f|WEXIkqmXq{c8H*;%>gBf`k&2tof^OLZE!-C@C~r{IQYWU^DlaLIh|fxuhZ>>cCQECertc?z5Tb%BQH-L{v>hf;J23! zOr3pq^5jTM$JFIRiMLN;g_mDST=+yN+gDTooxHE#+sHmR`^Krn+1GD>CQP0@F4PZ} z3gs^-f=M5VkvEX}%C62elW&G5e{o?hlp$qPg?C$@dwcU8Tz1Q0DWXrsH7+05>$LGc zhr`M9J_8cYI+7xoJo0hkl~3;+I@ZYgve4XOHW4~8b@9!~gCmK9hw(CZUObl=e)%2| z+;fizKwPnFeEEnzTrct7M^nG~Wg{C0zCx@dEi${HG=(+;tu}S>;;qYPCih)zWGz&j z^7eWAo$<8U3X~#;OOSb7X?NTE2l+nUq7KF>erJE5H%_{Ek2_BFd+Zz^*WwV{oemG@ zd<;SlgbR=v#I+PBxAwWYI0f+l7hy5pk+_`qLW>IKU|x~j`?zi=^x*Ara<*BC3NK>g z>|T3ZDX~r&gu>OHg+X40EK}#tLL>H*Zj(Z@%_cBpY)fLvO1$^5{KvB=kOluNI-}>o zw7x*q@J+$OFk_5qvyZM9wS}=l%h{$gO(8l`xOBWlENl+z*zXk}JMR>Ij1Cu8cSf>m zhC5=W;^B^wUXjUbRMow&th9_)OzGn~I9v@O&f z{@K%E96P#d+&eCV-bM+CnW~3u#DIe)L+AHddW7Z{z z^N9NLSZ>i*`l25uGL18UK-LpF!x7D>Cd@XEKR50O=eI_6ZKAI2{kpvQ5qf~8` z)V1`iwo>X^F@+_i6qZ!VF<;Lh&Ff@VyGefCL?PaxKyCq;EqmP5B*ca!Mr>i8J74(o z+K=SGrNeNH#tb|XKv>(Z&-zgQLDCv*Mn(Z;KowB=fy*Ev_xW`i61`;O7wsID1ZY3) z%9UXEREL^`#a==<@5vJ;n$c0R-ms0Kt=-5|6Mnsce`7anxBUKh4on zOOUL!4_K#GvY_6X>b0C2>8IfWL8B6nUK1kH<1*t|RkQQ1g;JDdT`D>Sgszg5h(agNOK(=Kr@(znarZ`KzUt_{eNBxW6<981L*I z1OV3}9DJtE&-}R4BzJw8%v+YS^fRf1Kb)7^H>r=w0?b|por*O6rz2T11lN{1=T${pDTaTT0*47Ar zeAcM75OI|g1E*~V-)k`l?e>OJn1R&cJWTZ15TToWz=@G@r^n4Z;YvJHg-N#H9xw_& z*o%S~Uy-NX4(Cp9pT`Yb(Yr;HL|F!*E9Y}~ZGG;o9w}kC23vPxvKEsk5Q(c(3)uQN z*oGy9EzZoQYLh@8tDs(KwfcAcDYH*y~uDCSrkECR}mUkrk>{gmB-HZ(yUw zGSOHTTaa_aHR_6)7Di2VqNz?`xuW2-0vOq7Zn>CSer_O=TRZ#&lIV<4ZGosQIGuKG z{!MKK2087hHf&ygQ@0{!$c-ARMMHJy@ra=*tZTyhhNJzWcG0=Malcr;YEFIqP2GlA z<&qKRs7=(B{kNeKn1kg}lT|cXLlswQBBm9wocvh!!qdHCHbUBxb1E^nYJx3}vXvrR z88VLdiq_>(>uS-u`bKZWx<0}_9c7;p*=NGGtr50&!u&|oTqT;TLQPjxV)e>s^*XV7 zUHHk3k?QV7)rK=GBAF|{ zVpiUtrm)>Tela@4tBTf=$1A|K@+w5Ho7yyJ_@Y9!qE`Mzef0{f{6;B(_#0MgMeSJB zwVNO;!rJ~}qDT0;zmn({=p81478dWgK9<3sk*TFJt4D5OqwsrQP0)uw>SPHfE}WS> zJ)XL=Sd>x)!F5=7t~eoXJFp8ATt*Q8n_{8kbRXa6=Dl{e!zq32a2cL##srr}oRq%N z&G>x_CeL7ELxi;`q_RjB!q1T;u1DnT;2mR;x^T1>1L{6h<6HH7f)kgvfaU$bV7 zMGn>20Jptf^phS7;bRc<;H}M)9#`6IZu<_W%_b;cw*;|sn1>0zHA37ylen-IbKRKW z4B&c1-Ukq07^SNcq%em`2Kw=<5!nyECOf|)dkJdE>8xph$8kP6hP$`il>{?S%4zB` zVkT=Il|55jMLkX5-A2?=tB4s$D5<8q`h1GM`zT?c$mCLZ0ZTd@LPPRkq-!BYX1D>7 zxXKId4rhu8;^eA;Az#=}cz|gf!D6GwEBcWXr{Ar>mK_ z-}}Db_xrB5((2N6@Yx-pzD5vA3|(3E*4{zpocfSO!fIi25)I$6d{N89aq_)@B^UzT zF;>CJ8Nz_j=pI*v?hZ}J0@%0^5@Y3pC5#}dU`KW`ki&CX28H^xL(C#<^NksX(iI67B*R2j|;%3?0@o-6Yxi+4Ojqoy> zA4}XCcqqH&%^U^n$uSWLr-7Izr(qHyk+XI_gAN{<-#|jrI5xho;b^z?`#Q2$o_vK;Oj&$@>$+$WD)xD&G{_=q5AdD_;T@) z-rzZ0Jgh9J#;qJF3@k5iW?l(QH+nzsS0)7P{1ieor|`pSW>LVut?|= znq!o;KZ#u-TGI;q^{j*`asZ|np-Q4r=p^KbD6*6j&q5SOn7xc6{*xWm9F9%excFzR zp-#oyqpHyC%!X^)Oz`$1`2vGG*=3!143b^B5joN=b_t4%reQT8=5kj-#Lmq;aY$cr zA|{Go!0TXZp(-Ppkua6XSj2{qRnez)aI?s*6}T6;if6guxx&?WHAjUeR~yO2P0yJe zJ^$p2TJXgj4gXw}1`^whY83WR?4kSxE*D&+wE2V+dWt z8Rx0N_=<4|3@%1!B%vBi@HZutSatCdj{a}i9*~GJ_JMtvm}mA55M>8=~oXxOR2lBE(JC9*3*j;ku;^+d6xkaG=~0^FtH}o-I$I6)!m3LpJ7?CL*>w`?|Wi zolbbI+>&5INb{Tvo?Au2#ENpsS8~`~G0`7Kdn){@Jie&Tm*X$mFkvm_%bNVwl><$@ z-f}%IYl2AgCi{tE-rnGA^4m8}6qNH7P5y!q!0M07eH4o&XO5%{r|?(Net?)rG9Psuad*{`?E1YQ)RqO;HRxIKG#bJ-2bhSDXI zLWRz8{@=bq%Jw)G(^_@n-TKd4fJ|uj({%uztjS9>D3*iUXc#*^ z8KbhG*8T`1;dD)73+iaoNt{$zui27I`q#EwRf?5NZf{nFBS*pR#wj}O_+66fEXMB^O9i=7x@Yt3o;NaMy zt(#-O<*`p%hho5=plin`vF`}xaoF0m4SWU9clD|+3W|loas}ox+28}XiRp@ch!igu z!9~jraInh&u8uunXi4(B0_>8o)CtQfVQCSThcK(>=k`8{bRF=ne}sjEac5ZI*KdkQ t&+m*#2e?l~*b|2o#`c~CKKOQT8F&o;-n(4e7YnFkyFbH(J1e|^{Ts3jCIJ8d delta 2352 zcmaJ?eQXow8Nc`5ob9u7=dbfuY!aK;P8=tIZ4xIyOc8}7)WG5l^3h7H;sU{Z*7l%C zXR4{%M72mw_xu4Vu%v&CWex4pSsmFHZJpW)w$a5cN#jIF8yfBY+LjcmO4~ns&*sPn z+wP?2e(&@Ap6B_!kM8dM>d)x6pCbN`9H*tomAq9qVCj30|9%a9frEWk7u>A*7d))B zVYOHe4FL~k$@8Uvhbt4y$7wl*qOv-8sm_Ku5}6NpU`wq|&M@+#N@0llt8|uw;ky0E z082q1EC$&!i4#rW40f(|nPFtDOMvS7H7pDKc&*C{!G;wqft0sxBkDv>(wwDIF-aqG z@Y7%o;$XVYvD{Qmh;~ARUx(f69pk9DB??iZvzVtLc}@p$5}H${{UhAe;8Hf0)w5Kl zs+2+~dU2g-xKTNdOP)q5Ysh%u{_`$g;v|Ep601b_4bM0O*`_F7FX`cacxTA>q?mD4 zG2_$4BsL{T2Kde<0jk1`*?)~*)+=dKMkN-~;}Xwe^Gmub)GA*2;~F+p4sV59ajj&O zbu`pH(P3Jp!}MQt$Qmq`pu}FGME^CsLPas+sF$=)MRX}kQJ=-pAZuEaS6+;EMr`Kt zr((sL+{P7MICyV!jnAAFGQlUq7c%t%k57W^qTp{tW6g6}F1%8Mg{F_K;2?ArY6VroNU}Lid`?EZBMymqV zlAeh5N<4W1R_w5sD&>&uiB;K+B-{45OJG%(wkNzsWGSj>k}O3PmJ604H24gO6GT+3 zgX_)qQjJK9Wc`2r&(o!<=g>LoW!y`hLxC+|Z25sDN`g7dBORM=FOa$J7(1yN=Bl`GV6o5*v<<1tt=Aoy~cT zyZTv$n_t&C{B1i{#rZ_TX*@)M)p~BkBCT)R!^vBjLnM_ z2kBiMXdZOC9jE}l-GeG4$8CK?e&)N6(>rPKwheG{p{1hSe?O{(H`{7;gt9~tBdQXQY?G zGY&Ud%vSJyJT+Tu>O**Ti<{ujoNTumf9AD!(|C?%2%gh2-GX{fuOU_EY;2DT&$*pF z2p163^MVQ&SfLq%4d#N91KY}gq&tr{Kf|7 zdZmDuN;PHq<-;TCEpp}pGZR>(N4a_R3d<; z;A+B+R4|!1>-vpM`amY>&!kQt?oX%t#Y36?mow?q&tXs32~{;Y;L`hZQ(d#jH-J9y z+%pK5*f-dGMR2Th^1$s-K5=j^apaT4ky*phuQB2V;GF|b^#EF=WITOfd(ceXLcVr2 zzU6nfo2lDsrrn^vU9Og~ne8y)+phW!4*#BGI!w8igB+{+NY?Z_h#&eA_2jmHtGf(c z&Rr6R%Fr*M_Sj!lKa;~bFeaZz({M65p!=2V@XO{y_)9W?J_YUZ!{`B=Ivzz=bJvf% z=!%YDrmJr zpH>{P(y>ly?ZQlYrl=iDr#(lk_RN<;4>aRBgEI#4oK8!tT}J=tyqkca(|(xWeed4; z?z_v|{O4cTbC+4)t3E#74E+j&61NY0-0d5LzUYqm#TJd6{()i;%zQqS3h3`ns)%S5 zS)@YIlBg3|#JU9*3H|xdpD!ooycdlEyGCDexqJ)#`d`^1kd`~SnhukwG#sqV_zIxx;)*R)e^ECZt z`P>3Jy>xaao$MUzmRMw_8M_Qq@8S`Dp;YC^$xbq-*f~p0EVsZSh^;= z%&u9?cA-PVkAzV;XVcZv#k*E%!b2I5;Ak&qUWp{XU7{pfB6q%Y{`4x@ZR1S(HQ;AHFoo3`NgdEDPuFkY5QW1S%G=$W^hJ`TjNq!xGJjbwv_4 zU$U8@n^?J9dFVqoZAGojF2QD|6+Plxq}#WoL{9neu6bbL^+zqlrI62*_pKPVTnUB~ z=QcFW3JT8s`<(BdzjgHVWM|jyj>A*$9n-|#Ln&s^iu?A%L$}^J;Jmc2U67nsW0q3h ztv6nFcD>{rI7Ky+lap(Ie*(VqIklf(gYWAcb0F248hC#4*!z=%Z#pmh(Rt*#$zuaI z2R$!jX>b}9lF`5nwmp!nEeIsP3ktE~C6t+uAESsDQ!v9mclP$*di!m2G;L14{hHe| zIx+RqdGPeqOCLCoyfWE)VDea}I{^~RaWSG%0A|7q&|vy+{#xS2Nx4?EvI zM16vgE|SStd$TlV86A_Tu}=1N-2Ow)?PDKNXCH9g!)BT7u`4ld#u}X&*EQ8@s+*fk z`X-a6R^Oz@T2ozflSXSY=^J;LH0?r74OP5{vdj{2X<)xQR}c&^9t1;^XNCh~%6jRN zvvXjwcaQV+H{5Yj9h$LNGb~A{!Q}D2N!u}ZjUEab?gW}BbB^1vdnTHtn->pH_P+K1 z1E{U3kB&M|z6!HZwZT1fsvLJd+QZx-?!e8}dZQ7-G-!<`4b~gY22+;i={S$M61hr- zbHP6u*&7s`=MZ6SldicDmrx+c7lGH@aQY-op|R&T=j(9MRfmws2Z zeg|YQE|81{E!H~3joMw6m@3ujjSew1+A6D?%}pkU82q6k@MorDYHrXs86CbZiK(tp zPgRX3hft?CXxkhLt*+i|H0gDfP#dU}8SBY{;KT*^NjkYPkW6%hkkn2Q(FOVT5O);sE#?&k*960bBda#? z1)F*QEu$Xg(;`N$*i$&}ubhQ|;5{W1-T^(T5%1_}Hr-2)CZr)f>ke8cf`WVVPZysk zKDoAEYY$Da2c>iguge0DOS&a|_~L=Wf$Sk<9ZDEd57+aXxAB!#{HAKY8uM!7sLXU- zra121?cK9vR2Fep#Kb1_KQWjvEU;%4+7pU~t4Cr=d0F%WYw3JOA)j8vtBb#sl}yAu zMrG8~xU~0t-t-xlse7L7&*f9|hV*>#);j_=&etwk*qPsz-?Q2}?a8PjIt#}`G!)c} zI!h*`0iC6$rb|1V~c=<6tj03#>ZO1(&*H2 z{mKE=V9ih?zoBX*w7Ro+TopT}O1G=hM^u@eMKl(LW=tMsmq+y#^luoIFCLVPWv;Sk zt{M&;&0NQqRgIO^*vo48+8v{1J168}eSRtSSpP-&JyCk%;aC$3Z#L6DksCV>Tdb0{8 z9c%Zl?W*oc=uz{F^t_^G)T8!05#uSJW`$mP=z3^uziOaiD1A7a-%>jgy4{^)>s*q@ z<>7rQUY^KH5~sZxWl&%4fMQr^7nkr)mGk0q-lP2IyGe{J_ZKE zRl_QNL)jJeu$13e!EdeSD;oF)1K-%r8=mE#<@hkmsATuJByh%Y*d7gENaI|K*V--V z;ra{vvj-)-dgYh0d??Dy%1anj^Z8YLSoNq(_sh@IQkeGs-DsB!2tt~3VcFo$!D>D% zcTBRvE?IGZS^)BUMmPmNUbMnv^%CaOgj^*hm)Ygflw9$OER1KaEb)pcj1gQ}wmiHr zQgAg=NIO^Ky@~<^R~H8r$pqJA!lD4%a@A#4h;`Hy5r?fuHKa03g<8n7VZB}k%5v-yJ4#cZ$m=dNZV)rr4fi3M>@j()i9D*9j?~>1Z-J*Q@P2||_CAXtQTu(a~ZeoW(YcM#x%d`fw-gQM_ zm>O#V;0(tNq%SoSb&#)8Rp>bRAvFaZB@5Hu5q?00skX7SQUS8s;+MQH72;URBH!ng zBONJO`6N0+j;y@l?eNq#X$@`0I-{*4e+mWasZyK@F9X{f09>w|Ap2Kkpijvqif!b_ zRbk@oAgcvvA>pgzQ3F}CIyTvrwn5;=1DprA0AK~!0#FW60dSG@t+t9jf##=V;hNO- zp)*%|WxZKftH+hbHhOoLMwB+oXNOYA9 zuh}O2n##Iu357e^$i%F%gc5r~$yh?EJ)x9eUpJCa&r4#jtK<7K`4yF;>St`1ilbR+ z(tz3?wQPJPRoQA^xy|<7+HVlz$i%u1bi%g1G%r&2SMWyxa0%ce@}cSTxNEfe(1i_m zH3qIjY|_>mE333defs10GuxldSCQDY!u|s)2T5n^=jiXYvRx5~#nq&(ZHN^J$P?}T ztWscewx2?3ME?_qqd=LZEh9W7H5v?$6)Z&LG?#`lh@D%+iV*oP_qccn1b+kg#FlCC zMd(Fa!R}oM>1;#RpAb^o2KV}+2v^ZF9xZws46_4#L2f;_ETkQp@Y->&Ja{>Nxt;PYpL`)Si5G&HyBF@1+&hV~sE)y-IsJz-oN$i1MI4Xq1gZ%<|}yq_?fakzkj zLr~Xba-S4%4qylHgf%pDlf$E`x!HhWGvExcEHs&S81%TF-0x9)@1h*~k1;MKktZ5` V#$#gd2sa^Afu>tnThEEh=zp4YP;LMK delta 2517 zcmaKt3s6+o8Gz5Z%U$=$1$X8B6j7jvC?ZOOAQ818YXuY^P}g<$0*m{=vzKD*T|tcm z5+CTvOpL|`+DXdPX=8gc5gXGoQ;jwzGj|Ie@#;*?6pgld&}yd{?P$-xI1`)c^uT`i zKmX%A?)lGuPhaBx{2mv3#cDMn)_iXlwCUbI8k>aGqw^jsdH63gQ=lw|b_-}^t%wT^OkJS_V zMvTn0a1GJbtQMZ~kCQE)_>(c7nX>sLI%W7KRS~`(hmb89VehgW}x}*e93aDnFLR!gpcF(CH zBuKJa#ehg$k~h$r6=Lv-S?*v!5(83J3$s4J<@vGnQF?MYX=ITaAM%M}ho&d4fG6lD z8)3*oKrO%ls9?~H;x4z?A$fy={CL`wQ9{4UD70(l4ifZtNKTJSa*-$t%=X|NnI^_yN@@><2w3R*eX5V7Tr8}g_0D0lwP z97B!?OY-sW4s*k`BeT@1b@y~!t|?;3?$ZzGj~k~9$TG8U`Sfhv4mGXxjQ@;tbj7HD z)TyrbswwT`mK_u6nZwm1c6Hr`h^ zYKvEGYFFF4rx04ft>)OHidzGZgzs_p5i&l<-8Z1*+`-1-rjfa$^VPboV@b|F>r{6N zO3J;OxG0jiXe@DYpJgH;=W0SxB%x?5p}5a{Q%F7DqzZYeA@A!+v;7~4WAEyQLAYv2 zju?{Fw5rkcQQ`7VHk!K5sXDi*DXwwD_KD=wVeZWO%QX?9US0201?Qg(&WBSx1V0EB zup>B?yfO)$tv0Vr%0$1{t+2D+g(PE5obE#QoEo$4B9EDT(ahJxDR1Un;&c!20=i>v z0~YD;=B8ply*~F1qnC}N;datad*+q#dsy$Ra(>ebGTQfrvXh#gOEbynZp*Dc#Kg=NIX#OV07c&19vqY&R~{ zbggWCP-Qfo%jeTfjV@oO7`-zDCMPZcBs*!N|6aN@9Tu$3l7byxw=?uY$oVAKJ6uv5 z8K8B`7T`m4@3LR;XPBQ}DJiel;Vz}O;$6Y6&1C+P=zltdRyvw+ke+l@VK=?u_%yL| zCX1UKIdrH0om;&JZvFDyt-jvdKORsnt-XU2J9HSquE5jeqM~pk^ zlDZ6Y3#eNGe(J0%z-{#9y1b(3tNs;e;pEA$0ScfCum#WrXa>AXZ`O6|&x7~@73+(O zvY*roXM3loRV1+YZ|#MI!MhxAkp8wl*|-Bl9|OHpKO0}9=7z2OB+frg#j?1hzA ziyI=v4OfddMv6D8n*w9SLDe$if(otrc)Vi z8xiSGUU@5d2po7{uuCMGj$KfV_p*Zwk&o%NgBy6J@&@Q{4*I&InIb{l|4NsqUKhCB z2EfOO+yUGL{1xywz%jt*fG+@F0=@#|0a$IEzVn*R7j$_ z6W{?io}6OH*L_wJZDWRhK!TuunHvVRmRWec9tVT|t?5H?*$ c=Ih9M9SO8&Xru9-t_W|#Q`@mJIdlpC2Q~Wb1^@s6 diff --git a/utils/content_judger.py b/utils/content_judger.py index feab470..813c6d7 100644 --- a/utils/content_judger.py +++ b/utils/content_judger.py @@ -12,6 +12,7 @@ import traceback import sys import base64 import re +import random sys.path.append('/root/autodl-tmp/TravelContentCreator') # 添加项目根目录 from core.ai_agent import AI_Agent @@ -66,8 +67,8 @@ class ContentJudger: 6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除。请保留文案中的换行符 \\n,不要修改或删除换行符。 7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写"为情侣定制的山水秘境"是可以接受的。 8. 案例如下,请参考案例评判真假信息的尺度,逐行逐句仔细分析不符点和修改思路,并按照分析思路落实对每一处不符的修改措施,严格审查每一篇文案: -{ -"产品资料": +[ + "产品资料": "周末不加收【南沙越秀喜来登】1088元/套,豪华客房1间1晚+双人自助早餐+自助晚餐+2大1小水鸟世界门票,免费儿童乐园,户外泳池+健身房~ 不想待在家,又想带娃出去玩?更不想开长途车、人挤人?为你推荐路程短、不塞车、景点多、坐地铁就能直达的溜娃地! 南沙越秀喜来登是广州南沙区首家国际品牌酒店,坐拥广州南大门,拥有得天独厚的中心位置,可俯瞰蕉门河美景,车程短,不出广州也能玩! @@ -97,15 +98,10 @@ class ContentJudger: 酒店地址:广东省广州市南沙区海熙大街79-80号 导航关键词:广州南沙越秀喜来登酒店" - - -"生成文案":"[ + + "生成文案": "title": "五一遛娃👶必囤!南沙喜来登1088元住景观房+双早+门票", - - "content": " - 五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088元住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:10:00-20:00全程开放,滑梯/积木/绘本一应俱全\n✅ 户外泳池:9:00-18:00恒温开放(五一期间每日消毒3次)\n✅ 健身房:8:00-22:00配备亲子瑜伽课程(需提前预约)\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50元吃到撑\n\n🍽️【家长友好细节】\n• 自助晚餐隐藏彩蛋:儿童餐区设独立洗手台+热食保温柜\n• 房内配置:加厚床垫/卡通洗漱杯/尿布台(无需额外购买)\n• 安全保障:全区域监控+24小时安保巡逻\n\n🎁【五一专属加码】\n5月1-5日期间入住,凭房卡可免费领取儿童防晒冰袖+湿巾礼包\n\n📌Tips:\n1. 周一至周四仅限双床房型,周五起可选大床房\n2. 水鸟世界门票需提前1小时至前台领取纸质票\n3. 地铁四号线金洲站下车,打车15分钟直达酒店\n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~" " -]" -} + "content": "五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088元住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:10:00-20:00全程开放,滑梯/积木/绘本一应俱全\n✅ 户外泳池:9:00-18:00恒温开放(五一期间每日消毒3次)\n✅ 健身房:8:00-22:00配备亲子瑜伽课程(需提前预约)\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50元吃到撑\n\n🍽️【家长友好细节】\n• 自助晚餐隐藏彩蛋:儿童餐区设独立洗手台+热食保温柜\n• 房内配置:加厚床垫/卡通洗漱杯/尿布台(无需额外购买)\n• 安全保障:全区域监控+24小时安保巡逻\n\n🎁【五一专属加码】\n5月1-5日期间入住,凭房卡可免费领取儿童防晒冰袖+湿巾礼包\n\n📌Tips:\n1. 周一至周四仅限双床房型,周五起可选大床房\n2. 水鸟世界门票需提前1小时至前台领取纸质票\n3. 地铁四号线金洲站下车,打车15分钟直达酒店\n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~" 输出结果: { @@ -113,8 +109,8 @@ class ContentJudger: "title": "五一遛娃👶必囤!喜来登1088景观房", "content": "五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088r住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:酒店设有免费儿童乐园,提供丰富的游乐设施,让孩子们尽情玩耍\n✅ 户外泳池:酒店配有户外无边泳池,供大人小孩一同享受清凉时光 \n✅ 健身房:酒店提供免费健身中心,适合家庭成员共同锻炼。\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50r吃到撑 \n\n🍽️【家长友好细节】 \n• 自助餐厅:供应鲜美海鲜、精美甜品等任君选择,大人小孩都爱吃 \n• 房内配置:55英寸超大纯平电视+独立的浴缸+超大的落地玻璃窗,尽览蕉门河风景,尽享亲子度假时光 \n• 安全保障:酒店设有完善的监控系统和安保措施,全力保障您与家人的安全 \n\n🎁【套餐专属福利】\n1、豪华客房一间一晚(周一至四只开放双床房) \n2、2大1小自助早晚餐 \n3、赠送2大1小水鸟世界门票(酒店前台领取),无需额外购买 \n\n📌Tips: \n1. 周一至周四仅限双床房型,周五起可选大床房 \n2. 酒店前台领取水鸟世界纸质门票 \n3. 地铁四号线金洲站下车,打车15分钟直达酒店 \n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~\n" } - -8. 必须按照以下格式输出修改后内容,不需要输出无关内容 +] +9. 必须按照以下格式输出修改后内容,不需要输出无关内容 { "analysis" : "分析过程", "title": "修改后的标题", @@ -154,58 +150,133 @@ class ContentJudger: logging.error(f"从PromptManager获取系统提示词失败: {e}") return False - def _split_content(self, result): - """ - 参考tweet_generator的处理方式,解析AI返回的内容 + def _preprocess_for_json(self, text): + """预处理文本,处理JSON结构中的问题字符""" + if not isinstance(text, str): + return text - Args: - result: AI返回的原始结果 - - Returns: - dict: 解析后的JSON数据 - """ try: - # 处理AI可能返回的思考部分 - processed_result = result - if "" in result: - processed_result = result.split("")[1] # 取标签后的内容 + # 1. 处理特殊Unicode字符和标点符号 + char_map = { + '"': '"', # 特殊Unicode引号替换为标准双引号 + '"': '"', # 特殊Unicode引号替换为标准双引号 + ''': "'", # 特殊Unicode单引号替换为标准单引号 + ''': "'", # 特殊Unicode单引号替换为标准单引号 + ',': ',', # 中文逗号替换为英文逗号 + ':': ':', # 中文冒号替换为英文冒号 + '(': '(', # 中文括号替换为英文括号 + ')': ')', # 中文括号替换为英文括号 + '\u200b': '', # 零宽空格直接移除 + '\u200c': '', # 零宽不连字直接移除 + '\u200d': '', # 零宽连字直接移除 + '\u2028': ' ', # 行分隔符替换为空格 + '\u2029': ' ' # 段落分隔符替换为空格 + } - # 直接尝试解析JSON - json_data = json.loads(processed_result) - json_data["error"] = False - json_data["judge_success"] = True - return json_data + # 应用字符替换 + for char, replacement in char_map.items(): + text = text.replace(char, replacement) - except json.JSONDecodeError as json_err: - # JSON解析失败,记录错误并尝试更基本的处理方法 - logging.warning(f"解析内容时出错: {json_err}, 尝试提取JSON部分") + # 2. 处理控制字符 (ASCII < 32) + cleaned_text = "" + for i, char in enumerate(text): + if ord(char) < 32: # ASCII 32以下是控制字符 + if char in ['\n', '\r', '\t']: # 保留这些常用控制字符 + cleaned_text += char + else: # 删除其他控制字符 + logging.debug(f"移除位置{i}的无效控制字符(ASCII: {ord(char)})") + continue + else: + cleaned_text += char - try: - # 尝试找到JSON部分(从第一个{到最后一个}) - json_start = processed_result.find('{') - json_end = processed_result.rfind('}') + 1 + # 3. 处理JSON结构特定问题 + # 处理大括号附近的换行符和空白 + if cleaned_text.startswith('{\n'): + cleaned_text = '{' + cleaned_text[2:] + + if cleaned_text.startswith('{ '): + cleaned_text = '{' + cleaned_text[2:] - if json_start >= 0 and json_end > json_start: - json_str = processed_result[json_start:json_end] - json_data = json.loads(json_str) - json_data["error"] = False - json_data["judge_success"] = True - return json_data - except Exception as e: - logging.error(f"尝试提取JSON部分失败: {e}") - + if '\n}' in cleaned_text: + cleaned_text = cleaned_text.replace('\n}', '}') + + if ' }' in cleaned_text: + cleaned_text = cleaned_text.replace(' }', '}') + + # 4. 处理转义序列 - 保留\n、\r、\t的转义,移除其他转义 + import re + + # 第一步:将要保留的转义序列临时替换为安全标记 + safe_replacements = { + r'\\n': '@NEWLINE@', # 保留换行转义 + r'\\r': '@RETURN@', # 保留回车转义 + r'\\t': '@TAB@', # 保留制表符转义 + } + + # 应用安全替换 + for pattern, replacement in safe_replacements.items(): + cleaned_text = re.sub(pattern, replacement, cleaned_text) + + # 第二步:移除除JSON必要转义外的所有反斜杠转义 + # 处理常见的多余转义情况 + cleaned_text = re.sub(r'\\([^\\/"bfnrtu])', r'\1', cleaned_text) # 移除非特殊字符前的反斜杠 + cleaned_text = cleaned_text.replace('\\"', '"') # 将转义的双引号还原为普通双引号 + cleaned_text = cleaned_text.replace('\\\'', '\'') # 将转义的单引号还原为普通单引号 + cleaned_text = cleaned_text.replace('\\\\', '\\') # 将双反斜杠替换为单反斜杠 + + # 第三步:将安全标记替换回原始转义序列 + reverse_replacements = { + '@NEWLINE@': '\\n', # 还原换行转义 + '@RETURN@': '\\r', # 还原回车转义 + '@TAB@': '\\t', # 还原制表符转义 + } + + # 应用反向替换 + for marker, escape_seq in reverse_replacements.items(): + cleaned_text = cleaned_text.replace(marker, escape_seq) + + # 第四步:再次检查并修复字符串内的换行符(确保100%处理) + # 这个额外的步骤确保没有任何字符串值中包含实际的换行符 + pattern = r'"([^"\\]*(\\.[^"\\]*)*)"' # 匹配所有JSON字符串(包括已经有转义字符的) + + def fix_remaining_newlines(match): + string_value = match.group(1) + # 确保所有实际换行符都被转义 + fixed_value = string_value.replace('\n', '\\n').replace('\r', '\\r') + return f'"{fixed_value}"' + + cleaned_text = re.sub(pattern, fix_remaining_newlines, cleaned_text) + + # 5. 确保逗号后换行不会导致问题 + cleaned_text = cleaned_text.replace(',\n', ', ') # 替换逗号后的换行为空格 + + # 6. 尝试解析检验 + try: + # 尝试进行轻度解析验证 + json.loads(cleaned_text) + # 如果能成功解析,直接返回 + return cleaned_text + except json.JSONDecodeError as e: + logging.debug(f"预处理后JSON仍有问题:{e},尝试最后的修复...") + # 最后的处理:使用simplejson替代内置json库尝试修复 + try: + import simplejson + # 加载后再保存,让simplejson自己处理一些小问题 + fixed_json = simplejson.loads(cleaned_text, strict=False) + return simplejson.dumps(fixed_json) + except: + # simplejson也失败了,继续后续流程 + pass + + # 7. 记录处理后的文本,以便调试 + logging.debug(f"JSON预处理后的文本长度: {len(cleaned_text)}") + return cleaned_text + except Exception as e: - logging.error(f"解析内容时出错: {e}") - - # 所有解析方法都失败,返回一个默认结果 - return { - "title": "", - "content": "", - "error": True, - "judge_success": False, - "analysis": f"内容解析失败,错误信息: {str(e)}" - } - + logging.exception(f"JSON预处理过程中出错: {e}") + # 发生异常时,返回原始文本,不做修改 + return text + def judge_content(self, product_info, content, temperature=0.2, top_p=0.5, presence_penalty=0.0): """审核内容""" logging.info("开始内容审核流程") @@ -220,48 +291,159 @@ class ContentJudger: system_prompt=self._system_prompt, user_prompt=user_prompt, file_folder=None, - temperature=self._temperature, - top_p=self._topp, - presence_penalty=self._presence_penatly, + temperature=temperature, # 使用传入的参数 + top_p=top_p, # 使用传入的参数 + presence_penalty=presence_penalty, # 使用传入的参数 ) # 保存原始响应以便调试 self._save_response(result, response_id) + logging.info(f"AI响应长度: {len(result)} 字符") - # 使用简化的解析方法处理响应 - content_json = self._split_content(result) + # 尝试多种方法提取JSON + json_obj = None + error_msg = None - # 检查解析结果是否有错误 - if content_json.get("error", False): - logging.warning(f"内容解析失败,使用原内容") - return self._create_fallback_result(content) + # 方法1: 提取{...}的JSON部分 + try: + # 移除思考部分 + processed_result = result.split("", 1)[-1].strip() if "" in result else result + + # 找到最外层的大括号 + json_start = processed_result.find('{') + json_end = processed_result.rfind('}') + 1 + + if json_start >= 0 and json_end > json_start: + # 提取JSON字符串 + json_str = processed_result[json_start:json_end] + + # 预处理JSON字符串 + json_str = self._preprocess_for_json(json_str) + + # 尝试解析JSON + json_obj = json.loads(json_str) + logging.info("方法1成功解析JSON") + except Exception as e: + error_msg = f"方法1解析JSON失败: {e}" + logging.debug(error_msg) + # 继续尝试其他方法 - # 检查必要字段是否存在 - if "title" not in content_json or "content" not in content_json: - logging.warning(f"解析结果缺少必要字段 'title' 或 'content'") - content_json["judge_success"] = False - return self._create_fallback_result(content) + # 方法2: 尝试多行解析,逐行检查是否有合法JSON + if not json_obj: + try: + lines = result.split('\n') + for i, line in enumerate(lines): + line = line.strip() + if line.startswith('{') and line.endswith('}'): + try: + # 尝试处理和解析这一行 + processed_line = self._preprocess_for_json(line) + json_obj = json.loads(processed_line) + logging.info(f"方法2在第{i+1}行成功解析JSON") + break + except: + # 继续尝试下一行 + pass + except Exception as e: + if not error_msg: + error_msg = f"方法2解析JSON失败: {e}" + logging.debug(error_msg) - # 添加Base64编码内容 - result_dict = { - "judge_success": content_json.get("judge_success", True), + # 方法3: 尝试使用正则表达式匹配最可能的JSON部分 + if not json_obj: + try: + import re + # 尝试匹配 {..."title":...,"content":...} + json_pattern = r'\{[^{}]*"title"[^{}]*"content"[^{}]*\}' + matches = re.findall(json_pattern, result, re.DOTALL) + + if matches: + for match in matches: + try: + processed_match = self._preprocess_for_json(match) + json_obj = json.loads(processed_match) + logging.info("方法3成功解析JSON") + break + except: + # 继续尝试下一个匹配 + pass + except Exception as e: + if not error_msg: + error_msg = f"方法3解析JSON失败: {e}" + logging.debug(error_msg) + + # 处理解析结果 + if json_obj and isinstance(json_obj, dict): + # 验证关键字段 + if "title" in json_obj and "content" in json_obj: + # 构建结果字典 + result_dict = { + "judge_success": True, + "judged": True, + "title": json_obj["title"], + "content": json_obj["content"], + "title_base64": base64.b64encode(json_obj["title"].encode('utf-8')).decode('utf-8'), + "content_base64": base64.b64encode(json_obj["content"].encode('utf-8')).decode('utf-8') + } + + # 添加分析字段(如果存在) + if "analysis" in json_obj: + result_dict["analysis"] = json_obj["analysis"] + result_dict["analysis_base64"] = base64.b64encode(json_obj["analysis"].encode('utf-8')).decode('utf-8') + + logging.info(f"成功提取内容: 标题({len(json_obj['title'])}字符), 内容({len(json_obj['content'])}字符)") + return result_dict + else: + # JSON对象缺少必要字段 + logging.warning("解析的JSON缺少必要字段'title'或'content'") + error_msg = "缺少必要字段'title'或'content'" + # 保存错误日志 + self._save_error_json(json.dumps(json_obj), error_msg, response_id) + else: + # 未找到有效的JSON + if error_msg: + logging.warning(f"JSON解析失败: {error_msg}") + else: + logging.warning("找不到有效的JSON结构") + + # 保存可能的JSON字符串以供调试 + if json_start >= 0 and json_end > json_start: + json_str = processed_result[json_start:json_end] + self._save_error_json(json_str, error_msg or "解析失败", response_id) + + # 所有方法都失败,返回空内容 + logging.info("内容审核过程未能产生有效结果,返回空内容") + empty_result = { + "judge_success": False, "judged": True, - "title": content_json["title"], - "content": content_json["content"], - "title_base64": base64.b64encode(content_json["title"].encode('utf-8')).decode('utf-8'), - "content_base64": base64.b64encode(content_json["content"].encode('utf-8')).decode('utf-8') + "title": "", + "content": "", + "title_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'), + "content_base64": base64.b64encode("".encode('utf-8')).decode('utf-8') } - # 如果有analysis字段,也包含 - if "analysis" in content_json: - result_dict["analysis"] = content_json["analysis"] - result_dict["analysis_base64"] = base64.b64encode(content_json["analysis"].encode('utf-8')).decode('utf-8') - - return result_dict + if error_msg: + empty_result["analysis"] = f"内容审核失败: {error_msg}" + empty_result["analysis_base64"] = base64.b64encode(f"内容审核失败: {error_msg}".encode('utf-8')).decode('utf-8') + return empty_result + except Exception as e: + # 捕获所有异常 + error_traceback = traceback.format_exc() logging.exception(f"审核过程中出错: {e}") - return self._create_fallback_result(content, error_msg=str(e)) + logging.debug(f"详细错误: {error_traceback}") + + return { + "judge_success": False, + "judged": True, + "title": "", + "content": "", + "title_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'), + "content_base64": base64.b64encode("".encode('utf-8')).decode('utf-8'), + "analysis": f"内容审核过程出错: {e}", + "analysis_base64": base64.b64encode(f"内容审核过程出错: {e}".encode('utf-8')).decode('utf-8') + } def _save_response(self, response, response_id): """保存原始响应""" @@ -273,6 +455,29 @@ class ContentJudger: except Exception as e: logging.error(f"保存原始响应失败: {e}") + def _save_error_json(self, json_str, error, response_id): + """保存错误的JSON字符串以供调试""" + try: + error_log_dir = "/root/autodl-tmp/TravelContentCreator/log/json_errors" + os.makedirs(error_log_dir, exist_ok=True) + + # 创建包含错误信息和原始JSON的日志 + error_info = { + "error_message": str(error), + "error_type": error.__class__.__name__ if hasattr(error, "__class__") else "Unknown", + "timestamp": int(time.time()), + "response_id": response_id, + "json_string": json_str + } + + # 保存到文件 + with open(f"{error_log_dir}/error_{response_id}.json", "w", encoding="utf-8") as f: + json.dump(error_info, f, ensure_ascii=False, indent=2) + + logging.info(f"已保存错误JSON到 {error_log_dir}/error_{response_id}.json") + except Exception as e: + logging.error(f"保存错误JSON失败: {e}") + def _create_fallback_result(self, content, error_msg="解析失败"): """创建回退结果""" if isinstance(content, str): @@ -328,4 +533,86 @@ class ContentJudger: ## 运营生成的文案(需要审核的内容): {content_str} -""" \ No newline at end of file +""" + + def judge_content_with_retry(self, product_info, content, max_retries=3, temperature=0.2, top_p=0.5, presence_penalty=0.0): + """ + 带重试机制的内容审核方法,当检测到空内容时自动重试 + + Args: + product_info: 产品资料 + content: 需要审核的内容 + max_retries: 最大重试次数 + temperature, top_p, presence_penalty: AI生成参数 + + Returns: + dict: 审核结果,如果所有重试都失败,则返回最后一次的失败结果 + """ + retry_count = 0 + last_result = None + + logging.info(f"开始内容审核流程,最大重试次数: {max_retries},初始温度参数: {temperature}") + + while retry_count <= max_retries: + current_attempt = retry_count + 1 + + if retry_count > 0: + # 每次重试增加温度参数,增加多样性 + adjusted_temperature = min(temperature + (retry_count * 0.1), 0.9) + logging.info(f"🔄 内容审核重试 ({current_attempt}/{max_retries+1}),调整温度参数为: {adjusted_temperature:.2f}") + else: + adjusted_temperature = temperature + logging.info(f"⏳ 内容审核首次尝试 (1/{max_retries+1}),使用默认温度: {adjusted_temperature:.2f}") + + # 调用基本的审核方法 + result = self.judge_content( + product_info, + content, + temperature=adjusted_temperature, + top_p=top_p, + presence_penalty=presence_penalty + ) + + last_result = result + + # 检查结果是否为空内容 + if result.get("judge_success", False) and result.get("title") and result.get("content"): + # 成功获取有效内容,返回结果 + if retry_count > 0: + logging.info(f"✅ 成功!在第{retry_count}次重试后获取有效内容(共尝试{current_attempt}次)") + else: + logging.info(f"✅ 成功!首次尝试已获取有效内容") + + # 添加审核内容长度统计 + title_len = len(result.get("title", "")) + content_len = len(result.get("content", "")) + logging.info(f"📊 审核结果统计:标题长度={title_len}字符,内容长度={content_len}字符") + + return result + else: + # 记录当前尝试的结果状态 + title_len = len(result.get("title", "")) + content_len = len(result.get("content", "")) + logging.warning(f"❌ 审核尝试 {current_attempt}/{max_retries+1} 失败,judge_success={result.get('judge_success')},标题长度={title_len},内容长度={content_len}") + + # 重试次数增加 + retry_count += 1 + + if retry_count <= max_retries: + # 在重试前稍微等待,避免过快请求 + delay = 1 + random.random() * 2 # 1-3秒随机延迟 + remaining = max_retries - retry_count + 1 + logging.info(f"⏱️ 等待{delay:.1f}秒后进行第{retry_count+1}次尝试,剩余{remaining}次尝试机会") + time.sleep(delay) + else: + logging.warning(f"⛔ 已达到最大重试次数,共尝试{current_attempt}次均未获取满意结果") + + # 所有重试都失败,返回最后一次结果 + logging.warning(f"⚠️ {max_retries+1}次尝试后仍未获取有效内容,将返回最后一次结果") + + # 记录最后返回内容的基本信息 + title_len = len(last_result.get("title", "")) + content_len = len(last_result.get("content", "")) + logging.info(f"📄 最终返回内容:judge_success={last_result.get('judge_success')},标题长度={title_len}字符,内容长度={content_len}字符") + + return last_result \ No newline at end of file diff --git a/utils/output_handler.py b/utils/output_handler.py index b6d9b14..784d5e1 100644 --- a/utils/output_handler.py +++ b/utils/output_handler.py @@ -164,10 +164,19 @@ class FileSystemOutputHandler(OutputHandler): if "tags" in input_data and "original_tags" not in input_data: input_data["original_tags"] = input_data["tags"] + # 统一审核分析字段,优先使用judge_analysis,其次使用不良内容分析 + if "judge_analysis" not in input_data and "不良内容分析" in input_data: + input_data["judge_analysis"] = input_data["不良内容分析"] + elif "不良内容分析" not in input_data and "judge_analysis" in input_data: + input_data["不良内容分析"] = input_data["judge_analysis"] + # 保存原始值用于txt文件生成和调试 - original_title = input_data.get("title", "") - original_content = input_data.get("content", "") - original_tags = input_data.get("tags", "") + original_title = input_data.get("original_title", input_data.get("title", "")) + original_content = input_data.get("original_content", input_data.get("content", "")) + original_tags = input_data.get("original_tags", input_data.get("tags", "")) + judge_title = input_data.get("title", "") + judge_content = input_data.get("content", "") + judge_tags = input_data.get("tags", "") original_judge_analysis = input_data.get("judge_analysis", "") # 创建一个只包含元数据和base64编码的输出数据对象 @@ -201,9 +210,10 @@ class FileSystemOutputHandler(OutputHandler): if "original_tags" in input_data and input_data["original_tags"]: output_data["original_tags_base64"] = base64.b64encode(input_data["original_tags"].encode('utf-8')).decode('ascii') - # 5. 审核分析 - if "judge_analysis" in input_data and input_data["judge_analysis"]: - output_data["judge_analysis_base64"] = base64.b64encode(input_data["judge_analysis"].encode('utf-8')).decode('ascii') + # 5. 审核分析 - 检查judge_analysis和不良内容分析两个字段 + judge_analysis = input_data.get("judge_analysis", input_data.get("不良内容分析", "")) + if judge_analysis: + output_data["judge_analysis_base64"] = base64.b64encode(judge_analysis.encode('utf-8')).decode('ascii') logging.info("成功添加Base64编码内容") except Exception as e: @@ -226,20 +236,47 @@ class FileSystemOutputHandler(OutputHandler): # 创建一份article.txt文件以便直接查看 txt_path = os.path.join(variant_dir, "article.txt") try: - # 使用原始内容,保留所有换行符 + # 重新组织内容显示,明确区分原始内容和审核后内容 with open(txt_path, "w", encoding="utf-8") as f: - if original_title: + # 根据审核状态决定显示哪些内容 + is_judged = input_data.get("judged", False) + is_judge_success = input_data.get("judge_success", False) + + if is_judged and is_judge_success: + # 显示审核后的内容 + f.write(f"{judge_title}\n\n") + if judge_content: + f.write(judge_content) + if judge_tags: + f.write(f"\n\n{judge_tags}") + + # 在最后添加原始内容作为参考 + if original_title != judge_title or original_content != judge_content: + f.write("\n\n=== 原始内容 ===\n") + f.write(f"{original_title}\n\n") + if original_content: + f.write(original_content) + if original_tags and original_tags != judge_tags: + f.write(f"\n\n{original_tags}") + elif is_judged and not is_judge_success: + # 审核失败,显示审核失败信息和原始内容 + f.write("审核失败\n\n") f.write(f"{original_title}\n\n") + if original_content: + f.write(original_content) + if original_tags: + f.write(f"\n\n{original_tags}") + else: + # 未审核,直接显示原始内容 + f.write(f"{original_title}\n\n") + if original_content: + f.write(original_content) + if original_tags: + f.write(f"\n\n{original_tags}") - # 保持原始内容的所有换行符 - if original_content: - f.write(original_content) - - if original_tags: - f.write(f"\n\n{original_tags}") - + # 添加审核分析信息(如果有) if original_judge_analysis: - f.write(f"\n\n审核分析:\n{original_judge_analysis}") + f.write(f"\n\n=== 审核分析 ===\n{original_judge_analysis}") logging.info(f"Article text saved to: {txt_path}") except Exception as e: @@ -253,8 +290,16 @@ class FileSystemOutputHandler(OutputHandler): f.write(f"原始内容: {original_content}\n\n") if original_tags: f.write(f"原始标签: {original_tags}\n\n") + + if is_judged: + f.write(f"审核状态: {'成功' if is_judge_success else '失败'}\n") + if is_judge_success: + f.write(f"审核后标题: {judge_title}\n\n") + f.write(f"审核后内容: {judge_content}\n\n") + if original_judge_analysis: f.write(f"审核分析: {original_judge_analysis}\n\n") + f.write("---处理后---\n\n") for key, value in output_data.items(): if isinstance(value, str): @@ -335,7 +380,7 @@ class FileSystemOutputHandler(OutputHandler): # 保存配置到JSON文件 config_file_path = os.path.join(variant_dir, f"topic_{topic_index}_poster_configs.json") with open(config_file_path, 'w', encoding='utf-8') as f: - json.dump(processed_configs, f, ensure_ascii=False, indent=4, cls=self.SafeJSONEncoder) + json.dump(processed_configs, f, ensure_ascii=False, indent=4) logging.info(f"Successfully saved poster configs to {config_file_path}") except Exception as e: logging.error(f"Error saving poster configs: {e}") diff --git a/utils/tweet_generator.py b/utils/tweet_generator.py index 23b90c8..e03f417 100644 --- a/utils/tweet_generator.py +++ b/utils/tweet_generator.py @@ -133,52 +133,94 @@ def generate_topics(ai_agent, system_prompt, user_prompt, run_id, temperature=0. def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id, - article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5): + article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5, + max_retries=3): """Generates single content variant data. Returns (content_json, user_prompt) or (None, None).""" logging.info(f"Generating content for topic {article_index}, variant {variant_index}") - try: - if not system_prompt or not user_prompt: - logging.error("System or User prompt is empty. Cannot generate content.") - return None, None + + if not system_prompt or not user_prompt: + logging.error("System or User prompt is empty. Cannot generate content.") + return None, None + + logging.debug(f"Using pre-constructed prompts. User prompt length: {len(user_prompt)}") + + # 实现重试逻辑 + retry_count = 0 + last_result = None + last_tokens = None + last_time_cost = None + + while retry_count <= max_retries: + try: + # 只有重试时增加延迟和调整参数 + if retry_count > 0: + # 添加随机延迟避免频繁请求 + delay = 1 + random.random() * 2 # 1-3秒随机延迟 + logging.info(f"内容生成重试 ({retry_count}/{max_retries}),等待{delay:.1f}秒后尝试...") + time.sleep(delay) + + # 调整温度参数,增加多样性 + adjusted_temperature = min(temperature + (retry_count * 0.1), 0.9) + logging.info(f"调整温度参数为: {adjusted_temperature}") + else: + adjusted_temperature = temperature - logging.debug(f"Using pre-constructed prompts. User prompt length: {len(user_prompt)}") - - time.sleep(random.random() * 0.5) - - # Generate content (non-streaming work returns result, tokens, time_cost) - result, tokens, time_cost = ai_agent.work( - system_prompt, user_prompt, "", temperature, top_p, presence_penalty - ) - - if result is None: # Check if AI call failed - logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.") - return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段 + # Generate content (non-streaming work returns result, tokens, time_cost) + result, tokens, time_cost = ai_agent.work( + system_prompt, user_prompt, "", adjusted_temperature, top_p, presence_penalty + ) - logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}") + last_result = result + last_tokens = tokens + last_time_cost = time_cost + + if result is None: # Check if AI call failed completely + logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.") + retry_count += 1 + continue + + logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}") - # --- Create tweetContent object (handles parsing) --- - # Pass user_prompt instead of full prompt? Yes, user_prompt is what we need later. - tweet_content = tweetContent(result, user_prompt, run_id, article_index, variant_index) - - # --- Remove Saving Logic --- - # run_specific_output_dir = os.path.join(output_dir, run_id) # output_dir no longer available - # variant_result_dir = os.path.join(run_specific_output_dir, f"{article_index}_{variant_index}") - # os.makedirs(variant_result_dir, exist_ok=True) - # content_save_path = os.path.join(variant_result_dir, "article.json") - # prompt_save_path = os.path.join(variant_result_dir, "tweet_prompt.txt") - # tweet_content.save_content(content_save_path) # Method removed - # tweet_content.save_prompt(prompt_save_path) # Method removed - # --- End Remove Saving Logic --- - - # Return the data needed by the output handler - content_json = tweet_content.get_json_data() - prompt_data = tweet_content.get_prompt() # Get the stored user prompt - - return content_json, prompt_data # Return data pair - - except Exception as e: - logging.exception(f"Error generating single content for {article_index}_{variant_index}:") - return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt # 添加judge_success字段 + # --- Create tweetContent object (handles parsing) --- + tweet_content = tweetContent(result, user_prompt, run_id, article_index, variant_index) + content_json = tweet_content.get_json_data() + + # 检查是否成功解析到有效内容 + if not content_json.get("error", False) and content_json.get("title") and content_json.get("content"): + # 成功获取有效内容 + if retry_count > 0: + logging.info(f"在第{retry_count}次重试后成功获取有效内容") + # 返回成功结果 + return content_json, user_prompt + else: + logging.warning(f"内容解析失败或内容不完整,结果: {content_json.get('error')}, 标题长度: {len(content_json.get('title', ''))}, 内容长度: {len(content_json.get('content', ''))}") + + # 如果到这里,说明内容生成或解析有问题,需要重试 + retry_count += 1 + + except Exception as e: + logging.exception(f"Error during content generation attempt {retry_count+1} for {article_index}_{variant_index}:") + retry_count += 1 + + if retry_count <= max_retries: + logging.info(f"将尝试第{retry_count}次重试...") + else: + logging.error(f"达到最大重试次数({max_retries}),无法生成有效内容") + + # 所有重试都失败,返回最后一次的结果(即使不完整) + logging.warning(f"在{max_retries}次尝试后仍未生成有效内容,返回最后一次结果") + + # 如果有最后一次结果,尝试使用它 + if last_result: + try: + tweet_content = tweetContent(last_result, user_prompt, run_id, article_index, variant_index) + content_json = tweet_content.get_json_data() + return content_json, user_prompt + except Exception as e: + logging.exception(f"Error processing last result: {e}") + + # 完全失败的情况,返回空内容 + return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir, variants=2, temperature=0.3, start_index=0, end_index=None): @@ -457,8 +499,8 @@ def generate_content_for_topic(ai_agent: AI_Agent, logging.info("成功获取产品资料,初始化ContentJudger...") # 从配置中读取系统提示词路径(脚本级别无法直接获取,需要传递) # 使用ai_agent的model_name或api_url判断是否使用主AI模型,避免额外资源占用 - content_judger_system_prompt_path = prompt_manager._system_prompt_cache.get("judger_system_prompt") - content_judger = ContentJudger(ai_agent, system_prompt_path=content_judger_system_prompt_path) + content_judger_system_prompt = prompt_manager._system_prompt_cache.get("judger_system_prompt") + content_judger = ContentJudger(ai_agent, system_prompt=content_judger_system_prompt) else: logging.warning("未能获取产品资料,内容审核功能将被跳过") enable_content_judge = False @@ -521,9 +563,9 @@ def generate_content_for_topic(ai_agent: AI_Agent, content_json["judged"] = True # 添加judge_success状态 content_json["judge_success"] = judged_result.get("judge_success", False) - # 可选:保存审核分析结果 - if "不良内容分析" in judged_result: - content_json["judge_analysis"] = judged_result["不良内容分析"] + # 处理分析结果,优先使用"analysis"字段,兼容"不良内容分析"字段 + if "analysis" in judged_result: + content_json["judge_analysis"] = judged_result["analysis"] else: logging.warning(f" 审核结果缺少title或content字段,保留原内容") content_json["judge_success"] = False