From 202ca013165c074215ca88dd57c8da2774e49636 Mon Sep 17 00:00:00 2001 From: jinye_huang Date: Wed, 23 Apr 2025 19:47:20 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86json=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E8=AF=BB=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SelectPrompt/systemPrompt.txt | 2 +- core/__pycache__/contentGen.cpython-312.pyc | Bin 17387 -> 22965 bytes core/contentGen.py | 237 +++++++++++++++--- poster_gen_config.json | 2 +- .../tweet_generator.cpython-312.pyc | Bin 29250 -> 29250 bytes utils/tweet_generator.py | 2 +- 6 files changed, 212 insertions(+), 31 deletions(-) diff --git a/SelectPrompt/systemPrompt.txt b/SelectPrompt/systemPrompt.txt index cc486a7..763147b 100644 --- a/SelectPrompt/systemPrompt.txt +++ b/SelectPrompt/systemPrompt.txt @@ -14,7 +14,7 @@ - `date`: 选题日期 (例如 "2024-06-15") - `logic`: 选定逻辑内容 (描述性文本) - `object`: 选定对象 (例如 "泰宁古城") - - `product`: 选定产品内容 (如果没有单独产品,则为景点本身,例如 "尚书第建筑群+古城游览区") + - `product`: 选定产品内容 (如果没有提供单独产品,则为"None") - `product_logic`: 选定产品的逻辑内容 (描述性文本) - `style`: 选题风格的文件名。**必须是从 Style 文件夹中选择的完整文件名,例如 "攻略风文案提示词.txt"。** - `style_logic`: 选题风格的逻辑内容 (描述性文本) diff --git a/core/__pycache__/contentGen.cpython-312.pyc b/core/__pycache__/contentGen.cpython-312.pyc index 62f1d66a89ee992350c09f79fb826fbcf18f2d95..e49a4c485b269087eefe9e7f5138c5503b21b160 100644 GIT binary patch delta 6763 zcmeHMeNa?amVd9mzi7JYroX>Iu=!|d5D-ui1vL;gj5LHIp>~p?$_@get!!8#bsQ2J1a|s!P9p`uy6F>ur#dcfzk-piS5P- zY&T1YF+V~!{4Zu)KK!3$A|m@Fwp;c?o;~MsIexknw_hH2XmGIrPWtJVl1IuExSVcu>Q&^WGFdU#ot17^ zroI;5Pe`kgG|naEPPI{+;`49`F2TFVzksZQrxF|Bkt&wS>*08Kgn$rU6kXsjO*lq~ z(eDB)Rd(ad7lsiM5BFW{J=TR}4UT!Z+p#>%iS^+}SvdAQhQn`2dyM@&b`;OU+@_Rd zKIZz@0X}$cN!8GA@^~4K+-6X&$nx+PD<-5wHX-X`5pp8u47&>-5Io^&I!<-0cQHZ% zW+hD?_&<)B;M-%?EBWtH>18uf$%v-%$!K=4o`?7k@Q?GiC*Mc$qZlJ;&+Y#OM2-BF z^DE^X8ZA4^bqZehc>8;a&R#cpy*CDbR$ZOKJSK51e zUT*I=;OOo<)Z3p2m5jE8@9o6j|BUG|aZY|}_MPC&k3%!3FG0t(7JCjEFE^`IZLhZX z>?hjYoo(&CMB6L-kF`Oeo%}<2wcND=^Vx-f&I0czh?dswJv> zcCckqBK=XzTPGGVH!1X$09EmG3~HBBYMVYvwULs|63YPRf)c z!eOt_?+9AM%WmO!WX)f*Q4_Fd(-{z9)t-xb^qtNCB7;-l|+;if|NuLSE= zn{4b~n{=CS?ypw~;rk}e-6Rs;-1*m;kZFL^Q5%69+2}Z zRXGIiVH2#^pe@yasx<_g;G`Pp-7aYbYV@KFn&5Y(H!(tRh7B9pLU*u*qy79z)BBb! z!9|~B$hedRE-^X;kOo57#X{d`VfXTxUOnDq#lmTmVoyba5FKcQr3ObpQjM4p>EHlJ zD%$*1N$}KF{WK2o0mM>3D<@Bssmlprs$(2`njkHQ4;6S=GJR}M+PK&jW>k?aDQO%g^3Wv?(B7(ysNhRX;#u;rOeH38A~hMw zLo&4(X0I$cnNa!vC+weP!pbuSw6{JrLF$&8Ao7e!k=~!^Ns%$g^rTRl2o@n*Y7Tf< zeQJi~<^P#wJ7&-6|3{dBB6NPDXYQv%_dj_(arMgdjZ2Ahr7Hr$L4y(olAHvy7+jKuEFZBMrZi-s;mzznybTCv`g0OGokJV#*2if5&%JSa z_VY_~V^=MY9Zdz1*hdH?f^GpFC54quxIoJyQ}lWBUKL(9;P zPlr#>yz|M-(CdJw&}xN1fY|isHvj?6edfP^>0IK(b(arX`qe-(W0oOm7rQojq1m+= zk!OM2;}H8h-1pdBoo-hds(c2KEr`${=@`x9{^8+HJI{pxHV8nB?}-3aB@vTrH{yN~ zwIyr!yE_l|q2r1kPK(Qo7)T(m6>d1>?R5iu+|=3I>1uZ$ayga;Yp38S>hr<{;siFY z#8jF=5ghXpm5(^nct>oV)aM6wyj%YjX3tU&^L@B)<+Q@+C(bp-6m|euwu1ATF-@>A zk zHkGgWkrXo&1iC}&NI}i@l`;LgN&T`^u_C6gk5{Y-cSR~T-jY(~yZp_8qZ5V|Qxy&4 zYpL?@p<=|a0!-HE{k4Ivh~5!ZuLxNu)D>~BV^vrkaWstE#+{U-$uA02PiTrKwFdw0 zAU|R(i)x*rh6!!uq{$j63hs*7Dx;>VuzbQ)8wY!k+Pj9Th@mQUJZ9MVh>e+TnQT#0 zEd(Ifd{?7L3$4$9g9Jr%0{fN^rw4_j~;M77U?E}e7`ji=J&?+$v^5tH-h zD`SdPlQ3txgLJVXrl^h=SBAPG#cRi?C0hT3a zfVnv$F;ljh>-k~#XGLKH)wG+kybzPT7?-FRfw)!|HU4|f3_CD6L8uzEYtH8Az0L2rOZdR9(z0QK_7mnuyhz_ypk6O}3%K-rok zl;y*PnoAmFM$-@=2f?ejtB~M&01YD4iomHyKyUydk`JmZ4*o&|A;<(mgpd}^BSeH4 zKqVv+k|Jf9B9cWSS+q!gN+k$oC54QcLefkj53gH`@*1Ur1voSGK?YtIxSz!0^`xoF zT%V~kdy&qhvxF>VO44mPfH=rG3mz9gaTGE4x zC3_`u2^G`pf`0(lR&y{9udfO9Sgc-Xaiep9emv<^Oe40(1~(Lb%4Y9(?q?{UgSo{i zjos;#O{xYi3>r6@Z1m&drR)F?wTt4(+m0=ah|0rtr_)vGJ4iKhWs5GQ0&-0p4;SVh zm(T<9NAnGCCv**;2?^AqDFygD$;X2lG+*H20ZAdAheys=S(^+QLy!hk^blk~qd$Mx zJm4~9xghY&16eNDfG8uF`KgwLH(~UMz%mou7TAPY2WyS6EKHQ=;Ii}dv9K-#4;Hxi zsZNQRRkP)PH5WW$Itj%d5FHmm5{iCjB%y4Vt5)T+tTYsnP)!4~emfK`v)8XBLckKhIjo41SQU$ zob|oQAVdbjB?d?Cj|Bmr&3<$ZpcjL?NK>7W$Dm2Fp{@k4CwuEOPK5DJ|9Cq5)5OOk zi8sPaqC7?&1J??WR5rtwhH9k0Zh)*@y|U7E9+`(w7z4^aLSJq~`VgVf8cJ#16Luc! z=sfJ+f2h|kAp@(+^C69ZEkHBZMN~wYE7w~<`cO^|vQ=NMF5>?lL9It@QorU0-ZosA z7k@Hir8B_!31^?qUwqDeC%a_Q3@N0jx$-N_BG(UZ@-_I}(`suV{~{-*E}haB2Hg>T zMZ5wfjGVtzjB_dHMt@_#5HUC=4S8p`jcyBehvi}CRYjOftvNuI^u!DYr}Va>Ar$jXk2V2lMY}R!nR20*x2* zVp_+f#!Tnb5Edf;V@0_!{d^6C_rXRS)7U1J`jKtJ z+o=5S$CSG!jkZ8-)aZDKadY*4{JZI;)jp=xvOx^skX$n&8J1Ay=VGlpsGV)m)|aT~ z+9_}Q0m|4DlO2q!t(43P!=7XCw_H@l+1dL(-|o3gdXWF3G?l$O9>L=@%mvbDaJ zfFza?bdUGbsUSW}^)~$!aKdozH?Prn24Q){P>xMn79v1m!!(YL_S9;fPF#iOtJ9GhZ zeTvA>5xIg$7?Eq_?e!*nFZt#AI%O%>{BSS+GS1aK+{}Iz=jJ^;%qGh=nDKYW=QcFz o#t}a{9_c&PH-gm%4-x%6ooih;$x9m?*@O^xK7>OJchXG%1$62$3;+NC delta 1797 zcmZuxYfMx}6rQWO1PY|+ zmeITt+s}{I(4e$9FEFUqaO@>!BTEm9hNuNC1bk%B&Ox(cRx&IdBC+vE2Tlg`{8LZMlp9TZe(Aymd95nhp)j(NP=;r!YVy+uoqTkZ+ zh6Z({o6tyl##)P*GvaM-4~9uOyOXyy@GX07lSa|u{!7ZY|>Mke(= zKwo66I2>SN9x;gzm_+ZE31(A|`bh-iZgMrb+gxp)v8qx4O1^iYN3bzO;w3cV9?=CbVyLzujBySEtVcDBtW0nNsOa79B6@ z+v_PGt$e7;dP(DLY$?|Wx_HkAp4BCSW}RQQKA<%SGQ&$b*3L6u+g13WKPN}!ndw_* zah^rmC_=7{dc`$dW;ejSg~8q;X~7M2ufUkkjq_ocG(Q~TVEytzJ+4?C?wnKNl=3A1 z1q&&!56Zzl*iGn2p=pcAAz{Ubu<|ruB@QX>hA8L_g5Gpi>X3c}xmB!e(QbfpNLlG- z%po39mwJ#7(2HzVLAZ9QC}@7LvOqYIOL5~ zEnsP<>|R<6?n7@% zlSCG5gnlXx4}I&W$NhZJyWj_%-s&`SpJ4;;D1l=bEHp>@G#`M=!V+T2(l$(r*p^0~ z>m(XGfg%i(BEIW@jY}q?+}7D>JHWT>-^cMpUxLA+Ku5~fG26+yjzB7b&oS6ZvglD+ zqO=`<{q~gqz8s1Z~cTJtEU)$K{`YK>T|qbn>1xg@J3IV(&vCG z#0Fi~zMMXODtd*79nSYhr%o}ZGul&HZ{leAX#3@g(E?#p>0SQrZXu!4&r}5%^Q;6g z@!qrtk;#GN&AxJ9`*?+~KvW>rT(9QyM%=Nd?6-=T34)ASYCWTUnsAec)wmK{!lPC_;ouThZ(WWt0`2p?OMtjT(+C$TwOsd zxRshC!_zOB%Q4=Lm0KD5_R5@9)-d{x8sf#BFtL?EDV3j4BM@Dw%!7mIRb>%%8KS(Z zT)a3|)j(Z==vh?*URbN^sqY{xDD silgV&Wr^eF4ng#~#spo+P@4}&P(v*PucFS{wc7m>m^udta?3FL9|OPgz5oCK diff --git a/core/contentGen.py b/core/contentGen.py index 7c17e00..d8ca58b 100644 --- a/core/contentGen.py +++ b/core/contentGen.py @@ -354,17 +354,139 @@ class ContentGenerator: # 生成时间戳 print(full_response) date_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - full_response = self.split_content(full_response) - # 创建结果文件路径 - result_path = os.path.join(self.output_dir, f"{date_time}.json") - os.makedirs(os.path.dirname(result_path), exist_ok=True) - # 保存结果到文件 - with open(result_path, "w", encoding="utf-8") as f: - json.dump(full_response, f, ensure_ascii=False) + try: + # 解析内容为JSON格式 + parsed_data = self.split_content(full_response) + + # 验证内容格式并修复 + validated_data = self._validate_and_fix_data(parsed_data) + + # 创建结果文件路径 + result_path = os.path.join(self.output_dir, f"{date_time}.json") + os.makedirs(os.path.dirname(result_path), exist_ok=True) + + # 保存结果到文件 + with open(result_path, "w", encoding="utf-8") as f: + json.dump(validated_data, f, ensure_ascii=False, indent=4) + + print(f"结果已保存到: {result_path}") + return result_path - print(f"结果已保存到: {result_path}") - return result_path + except Exception as e: + self.logger.error(f"保存结果到文件时出错: {e}") + # 尝试创建一个简单的备用配置 + fallback_data = [{"main_title": "景点风光", "texts": ["自然美景", "人文体验"], "index": 1}] + + # 保存备用数据 + result_path = os.path.join(self.output_dir, f"{date_time}_fallback.json") + os.makedirs(os.path.dirname(result_path), exist_ok=True) + + with open(result_path, "w", encoding="utf-8") as f: + json.dump(fallback_data, f, ensure_ascii=False, indent=4) + + print(f"出错后已保存备用数据到: {result_path}") + return result_path + + def _validate_and_fix_data(self, data): + """ + 验证并修复数据格式,确保符合预期结构 + + 参数: + data: 需要验证的数据 + + 返回: + 修复后的数据 + """ + fixed_data = [] + + # 如果数据是列表 + if isinstance(data, list): + for i, item in enumerate(data): + # 检查项目是否为字典 + if isinstance(item, dict): + # 确保必需字段存在 + fixed_item = { + "index": item.get("index", i + 1), + "main_title": item.get("main_title", f"景点风光 {i+1}"), + "texts": item.get("texts", ["自然美景", "人文体验"]) + } + + # 确保texts是列表格式 + if not isinstance(fixed_item["texts"], list): + if isinstance(fixed_item["texts"], str): + fixed_item["texts"] = [fixed_item["texts"], "美景体验"] + else: + fixed_item["texts"] = ["自然美景", "人文体验"] + + # 限制texts最多包含两个元素 + if len(fixed_item["texts"]) > 2: + fixed_item["texts"] = fixed_item["texts"][:2] + elif len(fixed_item["texts"]) < 2: + while len(fixed_item["texts"]) < 2: + fixed_item["texts"].append("美景体验") + + fixed_data.append(fixed_item) + + # 如果项目是字符串(可能是错误格式的texts值) + elif isinstance(item, str): + self.logger.warning(f"配置项 {i+1} 是字符串格式,将转换为标准格式") + fixed_item = { + "index": i + 1, + "main_title": f"景点风光 {i+1}", + "texts": [item, "美景体验"] + } + fixed_data.append(fixed_item) + else: + self.logger.warning(f"配置项 {i+1} 格式不支持: {type(item)},将使用默认值") + fixed_data.append({ + "index": i + 1, + "main_title": f"景点风光 {i+1}", + "texts": ["自然美景", "人文体验"] + }) + + # 如果数据是字典 + elif isinstance(data, dict): + fixed_item = { + "index": data.get("index", 1), + "main_title": data.get("main_title", "景点风光"), + "texts": data.get("texts", ["自然美景", "人文体验"]) + } + + # 确保texts是列表格式 + if not isinstance(fixed_item["texts"], list): + if isinstance(fixed_item["texts"], str): + fixed_item["texts"] = [fixed_item["texts"], "美景体验"] + else: + fixed_item["texts"] = ["自然美景", "人文体验"] + + # 限制texts最多包含两个元素 + if len(fixed_item["texts"]) > 2: + fixed_item["texts"] = fixed_item["texts"][:2] + elif len(fixed_item["texts"]) < 2: + while len(fixed_item["texts"]) < 2: + fixed_item["texts"].append("美景体验") + + fixed_data.append(fixed_item) + + # 如果数据是字符串或其他格式 + else: + self.logger.warning(f"数据格式不支持: {type(data)},将使用默认值") + fixed_data.append({ + "index": 1, + "main_title": "景点风光", + "texts": ["自然美景", "人文体验"] + }) + + # 确保至少有一个配置项 + if not fixed_data: + fixed_data.append({ + "index": 1, + "main_title": "景点风光", + "texts": ["自然美景", "人文体验"] + }) + + return fixed_data def run(self, info_directory, poster_num, tweet_content): """ @@ -392,31 +514,90 @@ class ContentGenerator: try: result_data = self.split_content(full_response) # This should return the list/dict - # 验证结果数据格式 + # 验证并修复结果数据格式 + fixed_data = [] + + # 如果结果是列表,检查每个项目 if isinstance(result_data, list): for i, item in enumerate(result_data): - if not isinstance(item, dict): - logging.warning(f"配置项 {i+1} 不是字典格式: {item}") - continue - - # 检查并确保必需字段存在 - if 'main_title' not in item: - item['main_title'] = f"景点标题 {i+1}" - logging.warning(f"配置项 {i+1} 缺少 main_title 字段,已添加默认值") - - if 'texts' not in item: - item['texts'] = ["景点特色", "游玩体验"] - logging.warning(f"配置项 {i+1} 缺少 texts 字段,已添加默认值") - - logging.info(f"成功生成并解析海报配置数据,包含 {len(result_data)} 个项目") - else: - logging.warning(f"生成的配置数据不是列表格式: {type(result_data)}") + # 如果项目是字典并且有required_fields,按原样添加或修复 + if isinstance(item, dict): + # 检查并确保必需字段存在 + if 'main_title' not in item: + item['main_title'] = f"景点标题 {i+1}" + logging.warning(f"配置项 {i+1} 缺少 main_title 字段,已添加默认值") + + if 'texts' not in item: + item['texts'] = ["景点特色", "游玩体验"] + logging.warning(f"配置项 {i+1} 缺少 texts 字段,已添加默认值") + + if 'index' not in item: + item['index'] = i + 1 + logging.warning(f"配置项 {i+1} 缺少 index 字段,已添加默认值") + + fixed_data.append(item) + # 如果项目是字符串(可能是错误格式的texts值) + elif isinstance(item, str): + logging.warning(f"配置项 {i+1} 是字符串格式,将转换为标准格式") + fixed_item = { + "index": i + 1, + "main_title": f"景点风光 {i+1}", + "texts": [item, "美景体验"] + } + fixed_data.append(fixed_item) + else: + logging.warning(f"配置项 {i+1} 格式不支持: {type(item)},将使用默认值") + fixed_data.append({ + "index": i + 1, + "main_title": f"景点风光 {i+1}", + "texts": ["自然美景", "人文体验"] + }) + + # 如果处理后的列表为空(极端情况),则使用默认值 + if not fixed_data: + logging.warning("处理后的配置列表为空,使用默认值") + for i in range(poster_num): + fixed_data.append({ + "index": i + 1, + "main_title": f"景点风光 {i+1}", + "texts": ["自然美景", "人文体验"] + }) + + logging.info(f"成功生成并修复海报配置数据,包含 {len(fixed_data)} 个项目") + return fixed_data + + # 如果结果是单个字典(不常见但可能),将其转换为列表 + elif isinstance(result_data, dict): + logging.warning(f"生成的配置数据是单个字典格式,将转换为列表") + + # 检查并确保必需字段存在 + if 'main_title' not in result_data: + result_data['main_title'] = "景点风光" + + if 'texts' not in result_data: + result_data['texts'] = ["自然美景", "人文体验"] + + if 'index' not in result_data: + result_data['index'] = 1 + + fixed_data = [result_data] + return fixed_data + + # 如果结果是其他格式(如字符串),创建默认配置 + else: + logging.warning(f"生成的配置数据格式不支持: {type(result_data)},将使用默认值") + for i in range(poster_num): + fixed_data.append({ + "index": i + 1, + "main_title": f"景点风光 {i+1}", + "texts": ["自然美景", "人文体验"] + }) + return fixed_data - return result_data # Return the actual data except Exception as e: logging.exception(f"Failed to parse JSON from AI response in ContentGenerator: {e}\nRaw Response:\n{full_response[:500]}...") # Log error and partial response - # 失败后尝试创建一个默认配置 + # 失败后创建一个默认配置 logging.info("创建默认海报配置数据") default_configs = [] for i in range(poster_num): diff --git a/poster_gen_config.json b/poster_gen_config.json index 2bcc3c8..d786f8b 100644 --- a/poster_gen_config.json +++ b/poster_gen_config.json @@ -1,6 +1,6 @@ { "date": "5月15日, 5月16日, 5月17日, 6月1日", - "num": 30, + "num": 2, "model": "qwenQWQ", "api_url": "http://localhost:8000/v1/", "api_key": "EMPTY", diff --git a/utils/__pycache__/tweet_generator.cpython-312.pyc b/utils/__pycache__/tweet_generator.cpython-312.pyc index cbb8a89f90141209e61a019433797c298bfc29ac..8826b2b6364059cb42596159600dd065e7b68b44 100644 GIT binary patch delta 21 bcmX@~gz?Z5My}Jmyj%=GP`GC!mqif(Rx<|L delta 21 bcmX@~gz?Z5My}Jmyj%=Gpu21%mqif(R4oR8 diff --git a/utils/tweet_generator.py b/utils/tweet_generator.py index 7a25fc5..7697a70 100644 --- a/utils/tweet_generator.py +++ b/utils/tweet_generator.py @@ -57,7 +57,7 @@ class tweetContent: self.variant_index = variant_index try: - self.title, self.content = self.split_content(result) + self.title, self.content = self.split_content(result) self.json_data = self.gen_result_json() except Exception as e: logging.error(f"Failed to parse AI result for {article_index}_{variant_index}: {e}")