From c8e049fc680e6de158267c963296f27910f76213 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Mon, 12 May 2025 15:44:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86tags=E7=9A=84?= =?UTF-8?q?=E9=87=8D=E5=A4=8D=E9=81=BF=E5=85=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/extract_and_render.py | 7 ++++++- .../resource_loader.cpython-312.pyc | Bin 8341 -> 8642 bytes .../tweet_generator.cpython-312.pyc | Bin 38150 -> 38297 bytes utils/output_handler.py | 14 ++++++++++---- utils/tweet_generator.py | 10 ++++++++-- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/scripts/extract_and_render.py b/scripts/extract_and_render.py index 0dcf16d..f8d06bc 100644 --- a/scripts/extract_and_render.py +++ b/scripts/extract_and_render.py @@ -39,25 +39,30 @@ def convert_json_to_txt_content(json_path, prefer_original=False): # 优先使用原始内容 title = data.get('original_title', '未找到原始标题') content = data.get('original_content', '未找到原始内容') + # 优先使用原始标签 + tags = data.get('original_tags', data.get('tags', '未找到标签')) print(f" - 优先使用原始内容 (prefer_original=True)") elif judge_success is True and not prefer_original: # 使用审核后的内容 title = data.get('title', '未找到标题') content = data.get('content', '未找到内容') + tags = data.get('tags', '未找到标签') print(f" - 使用审核后内容 (judge_success=True)") elif 'original_title' in data and 'original_content' in data: # 使用原始内容 title = data.get('original_title', '未找到原始标题') content = data.get('original_content', '未找到原始内容') + # 优先使用原始标签 + tags = data.get('original_tags', data.get('tags', '未找到标签')) print(f" - 使用原始内容 (judge_success={judge_success})") else: # 若无original字段,使用常规字段 title = data.get('title', '未找到标题') content = data.get('content', '未找到内容') + tags = data.get('tags', '未找到标签') print(f" - 使用常规内容 (无judge结果)") # 解决tag/tags字段重复问题,按照修正后的处理逻辑,只使用tags字段 - tags = data.get('tags', '') if not tags and 'tag' in data: tags = data.get('tag', '未找到标签') print(f" - 使用tag字段作为标签 (该字段将在后续版本中统一为tags)") diff --git a/utils/__pycache__/resource_loader.cpython-312.pyc b/utils/__pycache__/resource_loader.cpython-312.pyc index 815842692f4e8c0d130367eeda53f8b1ec5f2947..dbb3de494603de3007177a64941d0250421dfd1b 100644 GIT binary patch delta 2115 zcmZ`)YfMvD96zV0_qMmaZK0*_2Ze%w1Lgq7LtZU1K@g^F3aB8V$U`1_TEa9i%%;gq zTog|>=7-IF*bvS4GNBZ7`j{Xsy9p;UsxLrXIgx#nSDZnleRGa}pbX~zz05mE`>Q__& z2*4;1$gQ~TtQ1oxi}&zH913Q5aEM^s4I3=v=M_` zbn1{YY>Ivfo5>x{r?p1?E*rUJFpo$qnAE zjbESUmBoMXSyuaNZs1atOJD7uvM#b-+cH1Y5)V!2f|yU%hS5rGNgOWLBCR$ByUtE4(VU59REvw4`y00YXi(` zbe``F9qsIH!v@C0Fh^N zD7*YYcKJm6Tz1W(!eMSArMiI4ccu1nt&~|dQ8}?gs`*e#A6^E)?N<<@8#gCy`q(3- z_day>kpudK;U@Ul7JtQX*&uB#j5I{5CQeEF8l}AlrPM=@Y=Vrbjxw_X zVzww$=Av1%tuhPEW^vSRWgQkB0yX50`-%49C>R7-F$hEOBox3P>|*UT-fgH?pwuCr zJmEa#LF@_ML0N1k6-HkItJ^dYZ(+=O=5ac6SwrGva^C3HvJ|hC0l8_cGMcF$Vy*36 z9oRxdQyyQ#Xgk?$T8iKB?u-pYPDYN+IHdR9m+JOQt^;#oJ=q_Z72`$Id7g47`q9ai zd+Dvn31{5+jkr%Bbdl|rT$oOJEY&frxkFrZFS^sLr4V)!-ufLpKYrU9fcC^8?$srI z$=*emI%YrX;joUbfY|}@hRaYljmP_l;P6@4K3KjOPp$QP1sqwi{gepZrEnT7@`B)p zYoM( zL-MuKHGbV03%SH0A#5PGV_am`Rad&3ZZ)rN!QsxCvj zeLcs+1zzluUECJx?`E!(9C9bTBdq3g57V=DUQOz2!=S?c3CQyl+= zhLy-_Xc*FD{Nv8#Kc-~pdy?(>#we#<@@yY#8KeD@KPMKD-#xjyA~fZ+1@h2T9v3Jg zY4J7r?1{!j3@RAZF=$|Lgu*qjL=j*M1No(4z-l1$AsLH5GVF&-2=K;5g_iTfMF%yX zDU6&=CS?n^N2jK7t8v)Mhzthub0)8Pl)18!?etPz*wYv6?`|u`9n@v56LM3a*@SQb Y2n#^B0JIB0{~|`QR{?F$0YiEIzo{J1Bme*a delta 1778 zcmZuxUrbY17(b`C_qLbzw!OW*rR}A)r3_Y4Pzn@~iR~odL>9r#>9!!F6)=eA-0MGt zxfmC>WSN?iZI8OdB^XR>BulzCsVp|x%QdTU0hgIA_x_LjG9kfW1cI5A8xr)7xb7w4@jFXfHX5_qE-M7GFJ|*!vOvW zevd`dR?%KklZ5~&GmK`#qoy0sf<YLRlzJYiYb-}=2?~qBQMTl zq;Dn2kh%IbE}GwAT$%;9up>ARB|OIpHsfNb;}HQvwhI5j@;J{*c%9fps;5Jfu*602 zC3g@Haw7EOB}>W%z*tWMjj__cW0 zdI&$THYy9JnFHVyi~{Y=as*Px^=lR%g>(GJND|*Kv5XP zTZNl)QEte#Ka6+Zk9Xf0Eynv-OaXZxek!EIh+fm4i{%u3r>0k*C{~@seU5pt`lWV6 zuTDRx%3zN(a&8}dHA`QdqZ#*H>65?}O{lIHPe* zyq7et?uloU*|_db6zy$~B$+r7!7+^hUwmz!rjwHb>7ku;};pG%|~gsf|Rq z=ZC~_PjOOS|FI!#Hn5THX|vpGwot=1zBkMi%wn&?6l9hxC@3t|gc-`C{4O(7*yQN# zVhUX>;Ug~^i);@7Po~l9x91htaT6ON%_igP?lYX9Sdb4#rD!BdXv~IaBfTmvs;44- z@TdllNotr8|3X<;L zQ?&QvPPs|&GK+1VZjt#}WV^ldXXUF9Dp;vZzC!d-5wb25_}p(s0&zz)0aVJ9uo#E-VyhDM(s;wSCc`U(Syxua@RZK|EgM;xhUCg02wK0Yb|c3*-+$f&?>+#m9}=E6W6#6N=OW{5Id#En(>hSd4mJUymTx)1U19zS% z)t~UdhU*T}$@R0zS<=akMf(qxpdeVRctD2QHCJPHcpl!C<>g)sn?K6dj9w2nUSX7#rhP z*hK9_{W@h^M2d^Gh7{T3;zdssSCK;HLK*JU>X8D!tu17V{qloKJhmq;A?A@5QAewV z)X`Uj)Ph=QlahrKQ>Im7wwNrPAkP(`=0%iTH7QmeQw}Kk zq_q>`BCOTtsAWegyDNu^-lX|u1>>o!czt2m&&O_<5N^c#^t!FeWAXv{PhaZ@p$UUI&jPpFs>Kl6u(QY{&3!}|M_M6$ZyIw_AkRyk0~YjTD?;n;V) z!}f4q_=Ru(=hfVeIs18~OSJ&%g&=1jZ(1tzxH2Agp+W*878#+-#v|_lS_;IZ$ z*jSBfa7|+}8o`Ge>yQlJXski`Tv}5aLMEKnq{jyiPNC~u`Jr!^n2*UWY&_r-z z8Ar0{mEDFw4_0`;MkCyh-aMi9Ply-*e-U`e%R{B6vRVlIjkDW!qJQv_wgTA&2z&~- z$X#huBGk*>e5sj6Qm(8c4r%ij`hGhILI4k76z~OZ?KH@EFvkEaeyejL`3x~5jKke( zXCt(8e_MoU^4XnU_9_l`oxnK(b651Ld{FU!_w~fnfaGVuFMuh)9l)=EyMQ3zH^A?J zX}~>z2`~eIHjczO-1}{I8+5A=PX;8Y15X8<=qBFQU5otqT(@5o0r5q=vgZO);d?!W z5*Z?=wwXk6UhjImZle&plSR0_S1a8`GHb|rdIz~s?{|{;hi~FqzNnI4`z)VaH6h=^ z4Gsg82i?ilAjLhdkqhp0&B-89tQCBpp2}g^q3YUetDi;u56EAV+Q`huXk=rc1 zZnP?i_f>(FCew|lRfN1e>Wsz6i8Gc`nn=~8BwT7P2B)AIbnF0oEi^QEp z&x*9}bb23-=hIn_ND&AKWki onl{4Jlh*J>ga_Pi2YU;`CRmj5wKO~I>@FM%rl3ada0bemDwoP+YRwB@#)Xv#}a zAwlOnCgPlCE?d@Vmgug_)QFjL)A^WQq**qG+aD~NZOFqJm$)U{`QbDs+qvoIe9!N3 z&*?emclRGC0`0wMJ-E(!j$?n_s~fLHID*I(JKLjsu_gZbZwFTf721F<$}6e&{;0fpD&57rV~vokv3*jrIRD=VbxQlKQXg|?|uFqgKW+m zHrnKbtYcbPT@+RnPic`+kQK8=4JPmzYIdPqyTrRLhY|!>ZDIN)!{nyOku~MLeJ1>loEW%dx2;97_6#eK!ztgc?HY7G}H z`=rbBid1+9QDrQRdxJ)dM0!O}v)AuiU*mDtHP(xp$cBstCAh?0fr3HX1R1Hs#*m6an?;-{EkMVKo zw*6%mw@SsiHiPmoWDw%3$irvJQA&}FRRl9T!Tbpz9UuWt09pa-09Al$z^7#AvmMGo zkY`EE^3|Glkc$C3iFf%yJSIgx*Qvw-@@C}@Oi28y?KodLv?>&hOQrJKHB4d+jh$(= z)xpvO=-}gvi&0R~G90(wYQml3yv5bT!eT^rR@hNpVEc(Y0I$OS|g@F_f}) zW-z4J(!r3qB#w4J7G`*j=%@6O*|ZxgD&boam|GE!DB9glC@rR4$GE_`QuuL;icUUm zUFhbxJMk)DqToi{1mdWkQ1R|8b|QL~djUJKfCHV)d*-qeg(i=goiuY)Po{8m!pU^r z8^=y&CwQaTsc4SsDI?!9i=CRA)}m*p^&HjH3A||Jrjs(nSnlrfSdI?TSn&XCEYym4 zh$4->ojucrm(;(#AKLdz9y%1_JqW6%n=8V_dqLDge1>?5==Y>*7J_}|UL})~J!>pR zIv!EfctoeDf;<8IJg6*CC&;NiMOE+_7U2v>Sok*g!SFZ$F5d`O=kjduk0@UEcwFKb nnA6~;oIl`oIYl_-hqW*VTu=lLRE6|I7H~O>#YU-;R51Sl1#ss) diff --git a/utils/output_handler.py b/utils/output_handler.py index c902004..247d8e3 100644 --- a/utils/output_handler.py +++ b/utils/output_handler.py @@ -95,11 +95,14 @@ class FileSystemOutputHandler(OutputHandler): import copy output_data = copy.deepcopy(content_data) - # 确保tag和tags字段保持一致 - if "tag" not in output_data and output_data.get("tags"): - output_data["tag"] = output_data["tags"] - elif "tags" not in output_data and output_data.get("tag"): + # 统一使用tags字段,避免tag和tags重复 + if "tag" in output_data and "tags" not in output_data: + # 只有tag字段存在,复制到tags output_data["tags"] = output_data["tag"] + del output_data["tag"] + elif "tag" in output_data and "tags" in output_data: + # 两个字段都存在,保留tags并删除tag + del output_data["tag"] # 确保即使在未启用审核的情况下,字段也保持一致 if not output_data.get("judged", False): @@ -108,6 +111,9 @@ class FileSystemOutputHandler(OutputHandler): output_data["original_title"] = None output_data["original_content"] = None output_data["judge_analysis"] = None + # 添加original_tags字段 + if "tags" in output_data and "original_tags" not in output_data: + output_data["original_tags"] = output_data["tags"] # 保存统一格式的article.json content_path = os.path.join(variant_dir, "article.json") diff --git a/utils/tweet_generator.py b/utils/tweet_generator.py index 85146ed..1c85b45 100644 --- a/utils/tweet_generator.py +++ b/utils/tweet_generator.py @@ -518,14 +518,20 @@ content: {content_json.get('content', '')} judged_result = content_judger.judge_content(product_info, content_to_judge) if judged_result and isinstance(judged_result, dict): if "title" in judged_result and "content" in judged_result: - # 使用审核后的内容替换原内容 - logging.info(f" 内容审核成功,使用审核后的内容替换原内容") # 保存原始标题和内容 content_json["original_title"] = content_json.get("title", "") content_json["original_content"] = content_json.get("content", "") + # 保存原始标签(优先使用tags,如果没有则使用tag) + original_tags = content_json.get("tags", content_json.get("tag", "")) + content_json["original_tags"] = original_tags # 更新为审核后的内容 content_json["title"] = judged_result["title"] content_json["content"] = judged_result["content"] + # 保留原始标签,避免重复 + content_json["tags"] = original_tags + # 删除可能存在的重复tag字段 + if "tag" in content_json: + del content_json["tag"] # 添加审核标记 content_json["judged"] = True # 添加judge_success状态