From c8c4031696d5f2c0fcb223ff9ab5e2613f02e318 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Sun, 18 May 2025 22:29:29 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E5=AD=98=E5=82=A8?= =?UTF-8?q?=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../output_handler.cpython-312.pyc | Bin 17085 -> 22584 bytes utils/output_handler.py | 156 +++++++++++++++--- 2 files changed, 130 insertions(+), 26 deletions(-) diff --git a/utils/__pycache__/output_handler.cpython-312.pyc b/utils/__pycache__/output_handler.cpython-312.pyc index df72c069ad3b96600a9e005a8a0548c971b63eb4..6896c913beafba709918f6b76c50f3ddccf184cc 100644 GIT binary patch delta 10168 zcmb6<2~<>9mjBnUom#1#VkvftO%xCcS!7e=8Utv@C5ZJ)1rf}zA_x}2?wADAb_@BP zfE_cE?xa&5O+sR3n(ic%eI{G?R9Qu!E1qflw75*qIYDJ6iO!ib_x@S{gXuHxz^%93 zckg}gzU97q|2vP9<71@abGckfK>F2xTKBD4Go&z*PY-1l@1_ZwY!`16^N7_1O&uj@ zzJu2mi--%;St?ZCCgm^%fGKncqO~zxtq5wxj+khztcSEphK^_V^TNb#B|>JHa1h%_ zn%Ksp$!!$PJ4$Th(*j^lZWHwIt-_$Pp=FO_ZNsLG^&R$3+QD>HK(Fank4-=Bx%$ha z*l_jjXRnU_c>1L0>$i_jpLp%^TQ5z&c=YPv6IYHMxpL~*)z{ye{)e9e>hvqaSI_E$-tX-p*8l=%d^eEqR1+8fmFZ zjvO($sR(~N?+|Z)96>NbItG88!Q`u|1Ysuz^2<4ZzciLOpot61fR27hL;)4G68u9HB z;wHgMDfyE&ajJ^g6N`PC;u!&vo-t}4ZStge8eiLfX8X9#pI7D0T^UHJzJVRS z14(2A@mN+4d7JFJO>Spr%Juq{Ky}`1>2Riw@nBx`tSV33AD#{aOb3C}afaN;D4>g# zVo<0czt!b9z~K6D;_iT9GFtd$fDZBh7$BlMVIhVGcpn)x%IRW)R3}42qqDOk7&Gt4V3bIR&y5CfcDDt^{Z2Slnz0YxzORMhMA!w&3=IvD{4(0-~Y5M#Nd(T`|O8&4WL2z9D4AWw<*_b1oAM+K7W{H8jertUqbPqWr5TYFJ% z0K|rXJo%ErG_YpMXojUCPaot@X$&K!K255Jcg{YRcD6N;RXJYxWme^sHhz@v)20or znGpcuteDUmCpD>nIjPC^X|iiPMPo+4rpzlV12Go1L6gR>DfY^WZ^1ALd-$-&Ja)**4k^umz$2x;9>{x$`j5N(q{yB_ci@Naa) z?2*x?qk?TpY~RD9;{aPln}M3TM`TSr|GqSi%xVDMK~a0>-o33Id*M{>*wbkZPaHPQ zO&ld#LT+FSWlKVmmz>`xladJx?goYry%d`q?wh2^H)|tw(+DQDPwc|eflF2&^>E!3|~?jXlEUsP#_B}0;toY9BJy58ID zZP*&v**>8<@C`-EJJ{#+9#IE5LtuWbfoUxu-WOQv(x~^-Ep-LN2ic1H9O8r0>X`Z@ z>Lbbmg^$t%b$OzXtRiUss6bL5Pkm%F)f=df4Fbr2oFu5v5`CN|0?dzdB&(pl$h0b! z`j=P%tPST40mL1`NHk4F9c zUTtzzjz$qtdE~cEiS>r)Qs|3Wl+rwIKTN4&j*`qB;}+8@JV)Fz$f+?$i`?Qz2&NLs zB0P&2+ui&m;g%ofvn0v~D8+J=#Zk)Qds60FeweamK4lG3c5xd=rj5)QNl4f)j7GJd z7Dkn#a1*eDVB!nK5wt$BTLHr9IpGYPa1@jc6$EXJmW&l)@^Vw8A4$04*z2ZvURx3p zvILl8-)HYuwdEk&7zsQb(%n)ZOtF88Gl$L(O@>?j z!>qERtYR0iioFLbAB@oI=9oPneYnAKhGJk_Hi|%}FV?2anVWF8xGfLfpb1MJ3BQ9h zeOeb9GhA|OU3gz|DIyOupFOAB}wu=NQkywIZ6EOp2J@QfEmM%1TYFys1r%s42Q z63_+gL|jrxsY{F8`h_fdc&UVI;WZ(@`sjK29YX5s3wrr^$?Q#s1>^f*t_t_75Bi&9Qq zK$*KFQ!{@++I3Ny3F(W=n%bwkdp=W_xpj-or|wbRAbFetijafxC#07yDr5Qr8M)CH z7NcJ8J^CJANX|JeS(meC5})o53S6zOb_Wv=OEW04cXqfOU_`cINzmf7x3&gl976N% z7N=u*QBV;^iIjDnOe+{@E$z(<&5nR;Pg|cul)9{>8Cc1)^K#go~$Px zpMLdsT?v5m{pu^j|1|yN@2~&jWSJ!>Z|kDL3U_wd?GC4tiGdD`3;??#$*yk2(tZR7 z05mck?3+n7{V^?!tJU7_$afubFD>>u8lo*?X_n@U8aBg1~`wVUm2Mm7=zx}94aqBdhzvZZ@$Ft zwlr*tXZ8RJJnK6T_5{VW!*P(yQF|S(pfIdH$?oMvp(iTm8VCw$2j^Q1S!d9l=|O-K z%6cscr8}@RXZdzx5iL9B5CS2BUId2`;BCfO5TvtJ$+~{S+%qR^T4UQv1l0h7F^{w` ztt}m{X0Xkfr@5NWF4w^>sKDpdQAB(k!A}t!NAM_u699s;$ih0<;@ZdbVU-wk(Tv5Q z6m?X)mn$$h?F{;IgOZTy!1cxSL)mEnfKRqXQs_lo%YL0)D8eamekx$CDIfO}ap|X% zP9}K_V<~=9@ql8=oN>D9WR=G@w%c#EdBw&Vp+-`5*_=FS&heRZJnLTDbY|1|>h~V- z=dPK|-Q>&NuC2U92nRLFX)_-fpsGapDc4Kw`^dY*O=v# zS)p2G8C~I1=8vuLR&Baac>l$cipi4IzLM2b+9a=~z^7e0sV((sOUDg62yQkPIU*qsnw(Ju32Q zv!;~hk;6V^HkM^xZQf;;mzvfulr)hS)w;p<(b}=hfVyl3yhZSIXt{i^rvo^NI>VT|fR!V5~!zDu{!{tNeqcNVHe$9#j@eEHWsk{s<_Ki}Wy#e)dd~wv0 zDvvNHkLEjm!3=d6!gq^H+L})d5uvK2U1O%Q6i&3BaRtEZb|U2*@gdhu%26 zb!hAGj-ef+Io>5TeqHT=bcQFF)Nuo88%-S(1=OW8;6qq3UX4tUN4_V~n^ii__Zlk# zs>*0>zt?K>W|fVnd5u*8)yiqSHIX!qg(G@TZm6pa>+rjH(evv92IeA!s;xZP{12^ed`jV~5?1RmPs^u+j6i@YhtV_UtJ ziV0)orBL5et}}Bk&C_)@$CEI&+iS5+7|U+N0K0D_FoIcaWGR@7$J)krcuh3{S?#Qp z?M&UlTg_v?NR5|WkX7E9p-4&9o!M-xNRC$A{ziuC0+%AdJK)<>nQjuKq=}WJnfoVY zjTdB%T+b%*_BRTg@*A9P?tp$nn3Wen2OExU@Wz*ptH&vCT=}G|;)1M#>s1NlLBtz4 z(|2a|;Vz(tNT20nJ!AX5aW#{&+6%H;u1_6s0-17e5bUyaTmLOMDn3Z9vu@5L{#9Nb z^MIcC#HMJ7B|a5d8U)m*nHI=@mZfM^6Q3l7 zKMtX-Xi#NoK|wmiyrzGDVtQbB`uMY)1+yT2g5F0o^l#k{AR0XB2MeSRq9!g9YPsYl zBR2fJAQBl4A{jA=)WLU?;jn=P!UjApib64H%P>g*AZEhggAE*eD)@o}T(a4ENWkP#DsBXSnj9x4I zmr7IJ-PO3Lo#Ypuj|?;r3&{E)cTz0%HtXaa?N4g2!|mJ6e!z#s$MA z@D^6|Y38;_TXwi_;v)S-tZnHx3OjU!z!ZAN8<}lv<+2m{#nc7X_+OXVHlen<&lGLb?=jBz=;TfxxdquCB_#r=iJaf_)rWX-ZK z6vh>!oodxCYLap@yAImtrk82P;wg5fFpu*>G7dJiC?k)vGkFIboFVGOMX`wO7z$4? z>%n+TW4DWxLiDaX_2*wJ%HbtD$@d<Gx4f0FF|(T>3GhbN2-EE1iQ z{dRew$cc*t^24ASlRxK}j}9s4ry8Ht?xG@C6$rY+~tX(%kyr2!n_m4BRTmXaTE| z#Yy+zyRNm@QA@I{DBrW3HbR%O~LO%mGmI0d}1;!8$q->RQ5lvW#tp^UaJMgQKkW;6Lt*+7Zzl9P!g8+{1 zu#*P^#hg@kg^_QI%MWzYUG0ua#t$g4KnW*WP4Ijp$*%~>SA^*+Lh)xJ^(#X46(OG$ rnn}wbJ*m&|=`&^tD9n~6lk&mZSpt4$*N_m1hA%@7{1--~LlXZViXSfV delta 5678 zcmaht3vg3ccK7P(NqYKOwj}E>%Rgb+!8SGqgN+@GKg2c!FrVP%6Gu;A8(VgsWE^|s zIPJ1Cq(d9b-CfKk89e)Goq?UYHPi7<+a1Dh3D6y}n8GT|mhL3kZFi^ZENn?gr|mgU zmW_dJuSVyccka38+ z0r;@7SCzHwzIJT}k{=-JTVzf}1j7TuUeLX>LJyRHN;1GBDJTOmGXe_&D}aa!fGo`? z8QE`j>vd_^WkQh7D)om7Whu(1rPy(-a(=921SDZ)Ti03HFZKC4S$a~$bcW&IX}v5% z^Rx_kCN3PR?dt6~7}_s(L^~um5F`(P2(CLhH6Kpy#kBP{2h!(PYJrtX=|Bi7Sgpa! z{w{5c0~LLsSX2~I$TDDbmMPzy4=?jeLq57WU}SG=iZ|gp5?mxIC6ql~@IVtJxUO(C zp%lA=s8J3q@9YST_MtT zPIR#^(>>J9Y#F=Slp&?438_VMNC@eMVQB|cqUDIP-5|OlaFTrj^jiVXgugAMJD?Zs zNBDLl_8(A)4&Y7`okvK!X-FOLBy4QLY@7Z?2Boxw-8Wk^)d~U71jJ_jQFOWKC##I}U}!E-4b=7R{MZO9SEkNX2*t-+*C1u&$|d>w zdK9%;N(R|Qz7q@X|VXNR0!$)r27FpI& z*T(;s=f7P+%CXnnIqYrsART7Z(^ag0p5YS8zWz`cJQe8!C=T@YODjN`RL0)(r>$d5YM;tZmX6L+<>fLmXARf$;MvKUYysJKo`_SjBFCrK6jSYTsp?tlr!O2~$ z@xjZbEWW-#eJM{3-It1l1{-&2rK2H(yPUxTe%Z-4N(F!DrNk(olt!Na}&YDtrx`K@8qAs?lz-^3aVmeVJri&S8&1ZP#&Ua8* zlUKCFwC|Gl6@Ogc3YD0(pwF~KpXphBFxcp^K^tL|@FbRMn6omqgLY za)WTp84-c3Zsi14Ox><`H&yHV&YICkz0ZZ{dEOXVD=QZ-6SN$&Py zVCPzKsbZ!qSowc0*tW1>{gMUipR-_1cQ$*y-dR>p_8ul=5A4wq!-z4On^GP#M)Oj5 zO02Yg-cIH|15C*t4Wu|?#?-`s?s)%+Y2t_!*Wxo;l)__1Q6V~=VDlse#=D;M>#1tq&7;ZPFhl9o6iK^T)fl?;LNn7 zGiyX|ENv;X?98+`)502XCOik}AW6zkx%oIfc7MUqp8UUjo}Z=sXT4mWjZyZjms4_w z6iKOz8Us1(M$rjYv^<#E2qux55Nt%y3}Cy|!cH%*%SabPuMHgBAM6W9L*Zy~^o=Nc zqu5)$6gf)7bjpY-%ev<>^)5Nt=VQ|9c7hI%7Vmqfb4k!VLa7?Loeq+Qs_^Ig^i=#&n~l)iyz{{RrMQR=@odhwHTMle#oI-1WaaPB8;Y4t%Y1UdjM^x#x=jmPliaY+*H~p(``PclO@YdQ> zquOz9e8)Xk{zT|(+pKHFUAO0S)2XJ3$V6ze>Re=|VtRP0=X`EFuW{DhG~fCC=w!oG z-cK`VMAGdi^<(;Y*6JDUR4nePzin#x;-1<4dpJsA4JO-3!-~)k^0IqHSk|4*0!wVu2fdFlyE;;X>6`i zTvgO*n=|=q1_55KW%A8g!ZlYG;%g}|U8|}STKxQVrM)GazwYLNbUm9#il0Ngga>)o zSM#k@eZ5)(`fn(O)-vvf!PUB)yRn=HT(YyT);P3BA+au;-X2!9c5G>lrWN6r%J2&{ znyIQ{Ni|h4qJF00id2o~EKNO)soVpm+dN#-h8cy0tBcqx)lEA@AO>{LtG1-4lEc!3 zB6Z`yXNp+78$rSKWpzce;BvzI@N#(zZdsS9dLLdRv#e*`OIaeF`5J7rla4XRc-5rt zd_F{ceBW!g(}P%TiR{k09JMHK74byP=5;14w(68nY~|LO$OR=|XHZ`d)QFpe4V1g! z@YYpx7pi%{CA?TIUZ#SY6KgH>FQzAI{+6RZWm&aZ(^B0ssyyq2>gBICM5vFs>K!yk zE?76#dv*CRTyEWdIE|~I|A+MsW-GL?#529wEU)2_@kL-y@bXP=yR?c08sB*;{U?0@ z>HnvVYbw#^19++Z^xYqS`t^4{JvtSz%9U(FJ=h_IyTS*b>yIi{+ms3Sjh#)|)~9@f zDPHPkV@=Kq%;5pcbF;E+mMmmlxM)h%_}b6zH!WAmcaJ(|-smrrQ?Q~pB&X*H-f0(d z>k~-w`sah*g)FOKf3`7AH3_7MdHVFmB87J!J^%VWyZf|oN|-2^TtCxrH9P*wp4;}l z;AZPS_M7H%^*(en;Q1}@ZE2|?AK8t~733qot2w}3qq%@z3-HZl>TAVn#4CiBD(+fM zS<5Q!dZ`lludm{Pa=nT}yhhlh7!XCEfx$!UwVsENp=`EMr)e82b)j1lew2O==VwC-j#lqanbULV)3m z0p6t~F~tc*Hv8SCecV=&vMrlETd1f=ZRHUvawDo3g#r(@#)vwm7L|udByDjU$JD6; zO5_jif!WeynK4yNJq-7o5!Erbk_hbcf~*%~Dp7S-osvsQOw~qWv<>D@;XlF;5-6e2 zb8>@vcr9Gd@HQ{EZaYT1RTI-Jt))b++l4(9!V!>IPtMHqr{)w?K<%R)(Y&c?n}0r0 z`{<^7w&-cruwqYG+6n^`<2-yfHwfR(KAqXFu6mX#I5R{a^p}w?2FK+$S?X z_-x`Ie)XLrpUr&Z#5R`^_eKkph?pGr7O=p!yit+L7Md?b39K?T8&NTDq5K&AHa$rx zIaa77L);I!K^jnY;q6xXCJ2Z4Cb&LH3@8$c;?e{SJ{bp(h3bQ$aOjPGsb;vL z5z4H#p$L>gFGHomj&EB%ngQA)_;KLZ2YW&t z;gFb620J^X-8+eTru>Wk!E}(`@bWD4kJHF$tzwnN~ z;ikXgeCU?H^|*07@-tH|9?zTTeRDS3xxKYQMQo0vO?T~ZrgMh8xQjSzYvT1TRG&TJ0Om$_thD4R&(T zF4ViYi`gahncO841^iMb4-i5?C50jbJy8jVK=nF`$zJj!z#EZNfB^ju(ohj@^4y-9^kYqRZ=X2O6RMGZ04%us7g>48jf#xNZ|2sBkks>eZ2P%1fTNU{ulbaA0Pk# diff --git a/utils/output_handler.py b/utils/output_handler.py index c97185e..7855812 100644 --- a/utils/output_handler.py +++ b/utils/output_handler.py @@ -3,6 +3,37 @@ import simplejson as json import logging from abc import ABC, abstractmethod import traceback +import base64 + +# 自定义JSON编码器,强制处理所有可能的JSON序列化问题 +class SafeJSONEncoder(json.JSONEncoder): + """安全的JSON编码器,可以处理所有类型的字符串""" + + def encode(self, obj): + """重写encode方法,确保任何字符串都能被安全编码""" + if isinstance(obj, dict): + # 处理字典:递归处理每个值 + return '{' + ','.join(f'"{key}":{self.encode(value)}' + for key, value in obj.items() + if key not in ["error", "raw_result"]) + '}' + elif isinstance(obj, list): + # 处理列表:递归处理每个项 + return '[' + ','.join(self.encode(item) for item in obj) + ']' + elif isinstance(obj, str): + # 安全处理字符串:移除可能导致问题的字符 + safe_str = '' + for char in obj: + if char in '\n\r\t' or (32 <= ord(char) <= 126): + safe_str += char + # 跳过所有其他字符 + return json.JSONEncoder.encode(self, safe_str) + else: + # 其他类型:使用默认处理 + return json.JSONEncoder.encode(self, obj) + + def iterencode(self, obj, _one_shot=False): + """重写iterencode方法,确保能处理迭代编码""" + return self.encode(obj) class OutputHandler(ABC): """Abstract base class for handling the output of the generation pipeline.""" @@ -70,7 +101,7 @@ class FileSystemOutputHandler(OutputHandler): topics_path = os.path.join(run_dir, f"tweet_topic_{run_id}.json") try: with open(topics_path, "w", encoding="utf-8") as f: - json.dump(topics_list, f, ensure_ascii=False, indent=4, ignore_nan=True) + json.dump(topics_list, f, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder) logging.info(f"Topics list saved successfully to: {topics_path}") except Exception as e: logging.exception(f"Error saving topic JSON file to {topics_path}:") @@ -115,9 +146,49 @@ class FileSystemOutputHandler(OutputHandler): if "tags" in output_data and "original_tags" not in output_data: output_data["original_tags"] = output_data["tags"] + # 保存原始值用于调试 + original_title = output_data.get("title", "") + original_content = output_data.get("content", "") + + # 添加Base64编码内容 + try: + # 编码标题和内容 + title_base64 = base64.b64encode(output_data.get("title", "").encode('utf-8')).decode('ascii') + content_base64 = base64.b64encode(output_data.get("content", "").encode('utf-8')).decode('ascii') + + # 添加到输出数据 + output_data["title_base64"] = title_base64 + output_data["content_base64"] = content_base64 + + # 如果有原始内容,也编码 + if "original_title" in output_data and output_data["original_title"]: + output_data["original_title_base64"] = base64.b64encode( + output_data["original_title"].encode('utf-8')).decode('ascii') + if "original_content" in output_data and output_data["original_content"]: + output_data["original_content_base64"] = base64.b64encode( + output_data["original_content"].encode('utf-8')).decode('ascii') + + logging.info("成功添加Base64编码内容") + except Exception as e: + logging.error(f"Base64编码内容时出错: {e}") + # 对内容进行深度清理,确保安全序列化 try: + # 暂存judge_success状态 + judge_success = output_data.get("judge_success", False) + + # 深度清理 output_data = self._sanitize_content_for_json(output_data) + + # 恢复judge_success状态 + output_data["judge_success"] = judge_success + + # 移除可能的错误标志 - 我们通过尝试序列化来决定是否设置它 + if "error" in output_data: + del output_data["error"] + if "raw_result" in output_data: + del output_data["raw_result"] + logging.info("内容已经过安全清理,可以序列化") except Exception as e: logging.error(f"内容清理过程中出错: {e}") @@ -126,23 +197,38 @@ class FileSystemOutputHandler(OutputHandler): content_path = os.path.join(variant_dir, "article.json") try: with open(content_path, "w", encoding="utf-8") as f: - json.dump(output_data, f, ensure_ascii=False, indent=4, ignore_nan=True) + # 使用自定义的SafeJSONEncoder + json.dump(output_data, f, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder) logging.info(f"Content JSON saved to: {content_path}") except Exception as e: logging.exception(f"Failed to save content JSON to {content_path}: {e}") - # 如果序列化失败,记录原始内容用于调试 - debug_path = os.path.join(variant_dir, "debug_content.txt") - try: - with open(debug_path, "w", encoding="utf-8") as f: - for key, value in output_data.items(): - if isinstance(value, str): - f.write(f"{key}: (length: {len(value)})\n") - f.write(f"{repr(value[:200])}...\n\n") - else: - f.write(f"{key}: {type(value)}\n") - logging.info(f"Debug content saved to: {debug_path}") - except Exception as debug_err: - logging.error(f"Failed to save debug content: {debug_err}") + + # 创建一份article.txt文件以便直接查看 + txt_path = os.path.join(variant_dir, "article.txt") + try: + # 使用原始内容 + with open(txt_path, "w", encoding="utf-8") as f: + f.write(f"{original_title}\n\n{original_content}") + logging.info(f"Article text saved to: {txt_path}") + except Exception as e: + logging.error(f"Failed to save article.txt: {e}") + + # 记录调试信息,无论是否成功 + debug_path = os.path.join(variant_dir, "debug_content.txt") + try: + with open(debug_path, "w", encoding="utf-8") as f: + f.write(f"原始标题: {original_title}\n\n") + f.write(f"原始内容: {original_content}\n\n") + f.write("---处理后---\n\n") + for key, value in output_data.items(): + if isinstance(value, str): + f.write(f"{key}: (length: {len(value)})\n") + f.write(f"{repr(value[:200])}...\n\n") + else: + f.write(f"{key}: {type(value)}\n") + logging.info(f"调试内容已保存到: {debug_path}") + except Exception as debug_err: + logging.error(f"保存调试内容失败: {debug_err}") # Save content prompt prompt_path = os.path.join(variant_dir, "tweet_prompt.txt") @@ -153,6 +239,12 @@ class FileSystemOutputHandler(OutputHandler): logging.info(f"Content prompt saved to: {prompt_path}") except Exception as e: logging.exception(f"Failed to save content prompt to {prompt_path}: {e}") + + def _ultra_safe_clean(self, text): + """执行最严格的字符清理,确保100%可序列化""" + if not isinstance(text, str): + return "" + return ''.join(c for c in text if 32 <= ord(c) <= 126) def handle_poster_configs(self, run_id: str, topic_index: int, config_data: list | dict): """Saves the complete poster configuration list/dict for a topic.""" @@ -160,7 +252,7 @@ class FileSystemOutputHandler(OutputHandler): config_path = os.path.join(run_dir, f"topic_{topic_index}_poster_configs.json") try: with open(config_path, 'w', encoding='utf-8') as f_cfg_topic: - json.dump(config_data, f_cfg_topic, ensure_ascii=False, indent=4, ignore_nan=True) + json.dump(config_data, f_cfg_topic, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder) logging.info(f"Saved complete poster configurations for topic {topic_index} to: {config_path}") except Exception as save_err: logging.error(f"Failed to save complete poster configurations for topic {topic_index} to {config_path}: {save_err}") @@ -216,7 +308,7 @@ class FileSystemOutputHandler(OutputHandler): metadata_path = os.path.join(os.path.dirname(save_path), metadata_filename) try: with open(metadata_path, 'w', encoding='utf-8') as f: - json.dump(metadata, f, ensure_ascii=False, indent=4, ignore_nan=True) + json.dump(metadata, f, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder) logging.info(f"保存{image_type}元数据到: {metadata_path}") except Exception as me: logging.error(f"无法保存{image_type}元数据到{metadata_path}: {me}") @@ -245,6 +337,11 @@ class FileSystemOutputHandler(OutputHandler): # 处理字典类型 sanitized_dict = {} for key, value in data.items(): + # 移除error标志,我们会在最终验证后重新设置它 + if key == "error": + continue + if key == "raw_result": + continue sanitized_dict[key] = self._sanitize_content_for_json(value) return sanitized_dict elif isinstance(data, list): @@ -256,22 +353,29 @@ class FileSystemOutputHandler(OutputHandler): # 1. 首先,替换所有字面的"\n"为真正的换行符 if r'\n' in data: data = data.replace(r'\n', '\n') - - # 2. 移除所有控制字符(ASCII 0-31,除了\n, \r, \t) - cleaned = '' + + # 2. 使用更强的处理方式 - 只保留绝对安全的字符 + # - ASCII 32-126 (标准可打印ASCII字符) + # - 换行、回车、制表符 + # - 去除所有其他控制字符和潜在问题字符 + safe_chars = [] for char in data: - # 允许常见的空白字符 - if char in '\n\r\t' or ord(char) >= 32: - cleaned += char + if char in '\n\r\t' or (32 <= ord(char) <= 126): + safe_chars.append(char) + elif ord(char) > 127: # 非ASCII字符 (包括emoji) + # 转换为Unicode转义序列 + safe_chars.append(f"\\u{ord(char):04x}".encode().decode('unicode-escape')) + + cleaned = ''.join(safe_chars) # 3. 验证字符串可以被安全序列化 try: json.dumps(cleaned, ensure_ascii=False) return cleaned except Exception as e: - logging.warning(f"字符串清理后仍无法序列化,尝试更严格的清理: {e}") - # 如果仍然无法序列化,使用更严格的清理 - return ''.join(c for c in cleaned if ord(c) < 65536 and (c in '\n\r\t' or ord(c) >= 32)) + logging.warning(f"字符串清理后仍无法序列化,使用保守处理: {e}") + # 最保守的处理 - 只保留ASCII字符 + return ''.join(c for c in cleaned if ord(c) < 128) else: # 其他类型(数字、布尔值等)原样返回 return data \ No newline at end of file