优化了存储内容,所有结果的json中存储为base64
This commit is contained in:
parent
c8c4031696
commit
3b6a01d3a4
Binary file not shown.
Binary file not shown.
@ -62,7 +62,7 @@ class ContentJudger:
|
||||
关键字词:价、元、r、人民币、rmb、优惠、活动、福利、赠、免费、折、DIY、跟拍、送、摄影、兑、服务、¥、包、课、提供、选、专业、补、差
|
||||
4. 字数控制:每个文案的标题字数都必须少于20个字(计数包括文字、符号、数字和emoji)。如果标题超过20个字,请在符合文案风格的前提下修改标题到20个字以内,尽量保留emoji,必须保证标题流畅通顺。
|
||||
5. 敏感字词替换:请删去标题中的数字后面的"元"和"r",并将正文中数字后面的"元"字修改为"r"。例如:标题中的399元修改为399,正文中的399元修改为399r
|
||||
6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除。请保留文案中的换行符"\n",不要修改或删除。
|
||||
6. 特征语句保留:请保留文案中原本的引流语句,不要修改或删除。请保留文案中的换行符 \\n,不要修改或删除换行符。
|
||||
7. 面向人群保留:请尽量保留文案原本的面向人群和风格,这是同一产品面向多种人群营销的策略。例如产品资料中写明亲子游时,文案写"为情侣定制的山水秘境"是可以接受的。
|
||||
8. 案例如下,请参考案例评判真假信息的尺度,逐行逐句仔细分析不符点和修改思路,并按照分析思路落实对每一处不符的修改措施,严格审查每一篇文案:
|
||||
{
|
||||
@ -118,9 +118,9 @@ class ContentJudger:
|
||||
8、产品材料中未提及房内配有加厚床垫/卡通洗漱杯/尿布台(无需额外购买),因此属于不符内容。应回顾产品资料中关于房内配置的内容,修改为:房内配置:55英寸超大纯平电视+独立的浴缸+超大的落地玻璃窗,尽览蕉门河风景,尽享亲子度假时光。
|
||||
9、产品材料中未提及五一专属加码,但文案中提到5月1-5日期间入住,凭房卡可免费领取儿童防晒冰袖+湿巾礼包,因此属于不符内容。应回顾产品资料,找到现有文案未提及的产品特色,修改为:套餐专属福利:1、豪华客房一间一晚(周一至四只开放双床房) 2、2大1小自助早晚餐 3、赠送2大1小水鸟世界门票(酒店前台领取),无需额外购买
|
||||
10、产品资料中未提及水鸟世界门票领取有时间限制,但文案中提到水鸟世界门票需提前1小时至前台领取纸质票,因此属于不符内容。应修改为:酒店前台领取水鸟世界纸质门票
|
||||
综合以上分析结果,将修改应用到原文案中,得到修改后的文案。"
|
||||
综合以上分析结果,将修改应用到原文案中,得到修改后的文案。",
|
||||
"title": "五一遛娃👶必囤!喜来登1088景观房",
|
||||
"content": "五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088r住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:酒店设有免费儿童乐园,提供丰富的游乐设施,让孩子们尽情玩耍\n✅ 户外泳池:酒店配有户外无边泳池,供大人小孩一同享受清凉时光 \n✅ 健身房:酒店提供免费健身中心,适合家庭成员共同锻炼。\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50r吃到撑 \n\n🍽️【家长友好细节】 \n• 自助餐厅:供应鲜美海鲜、精美甜品等任君选择,大人小孩都爱吃 \n• 房内配置:55英寸超大纯平电视+独立的浴缸+超大的落地玻璃窗,尽览蕉门河风景,尽享亲子度假时光 \n• 安全保障:酒店设有完善的监控系统和安保措施,全力保障您与家人的安全 \n\n🎁【套餐专属福利】\n1、豪华客房一间一晚(周一至四只开放双床房) \n2、2大1小自助早晚餐 \n3、赠送2大1小水鸟世界门票(酒店前台领取),无需额外购买 \n\n📌Tips: \n1. 周一至周四仅限双床房型,周五起可选大床房 \n2. 酒店前台领取水鸟世界纸质门票 \n3. 地铁四号线金洲站下车,打车15分钟直达酒店 \n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~\n
|
||||
"content": "五一不想挤人潮?南沙这家酒店直接承包遛娃+度假双重快乐‼️\n地铁直达!2大1小1088r住景观房,含双早+自助晚餐+水鸟世界门票,儿童乐园/泳池/健身房全开放!\n🌟【遛娃刚需全配齐】\n✅ 儿童乐园:酒店设有免费儿童乐园,提供丰富的游乐设施,让孩子们尽情玩耍\n✅ 户外泳池:酒店配有户外无边泳池,供大人小孩一同享受清凉时光 \n✅ 健身房:酒店提供免费健身中心,适合家庭成员共同锻炼。\n\n📍【1小时玩转南沙】\n① 南沙天后宫(车程20分钟):穿汉服拍大片,听妈祖传说涨知识\n② 南沙湿地公园(40分钟):5月芦苇摇曳,带娃认鸟类+乘船探秘\n③ 十九涌海鲜街(45分钟):现捞现煮生猛海鲜,人均50r吃到撑 \n\n🍽️【家长友好细节】 \n• 自助餐厅:供应鲜美海鲜、精美甜品等任君选择,大人小孩都爱吃 \n• 房内配置:55英寸超大纯平电视+独立的浴缸+超大的落地玻璃窗,尽览蕉门河风景,尽享亲子度假时光 \n• 安全保障:酒店设有完善的监控系统和安保措施,全力保障您与家人的安全 \n\n🎁【套餐专属福利】\n1、豪华客房一间一晚(周一至四只开放双床房) \n2、2大1小自助早晚餐 \n3、赠送2大1小水鸟世界门票(酒店前台领取),无需额外购买 \n\n📌Tips: \n1. 周一至周四仅限双床房型,周五起可选大床房 \n2. 酒店前台领取水鸟世界纸质门票 \n3. 地铁四号线金洲站下车,打车15分钟直达酒店 \n\n这个五一,南沙喜来登让你躺着遛娃!不用长途跋涉,家门口就能玩出仪式感~\n"
|
||||
}
|
||||
|
||||
8. 必须按照以下格式输出修改后内容,不需要输出无关内容
|
||||
|
||||
@ -101,8 +101,27 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
topics_path = os.path.join(run_dir, f"tweet_topic_{run_id}.json")
|
||||
try:
|
||||
with open(topics_path, "w", encoding="utf-8") as f:
|
||||
json.dump(topics_list, f, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder)
|
||||
# 不使用自定义编码器,直接使用标准json
|
||||
json.dump(topics_list, f, ensure_ascii=False, indent=4, ignore_nan=True)
|
||||
logging.info(f"Topics list saved successfully to: {topics_path}")
|
||||
|
||||
# 额外创建txt格式输出
|
||||
txt_path = os.path.join(run_dir, f"tweet_topic_{run_id}.txt")
|
||||
with open(txt_path, "w", encoding="utf-8") as f:
|
||||
f.write(f"# 选题列表 (run_id: {run_id})\n\n")
|
||||
for topic in topics_list:
|
||||
f.write(f"## 选题 {topic.get('index', 'N/A')}\n")
|
||||
f.write(f"- 日期: {topic.get('date', 'N/A')}\n")
|
||||
f.write(f"- 对象: {topic.get('object', 'N/A')}\n")
|
||||
f.write(f"- 产品: {topic.get('product', 'N/A')}\n")
|
||||
f.write(f"- 产品策略: {topic.get('product_logic', 'N/A')}\n")
|
||||
f.write(f"- 风格: {topic.get('style', 'N/A')}\n")
|
||||
f.write(f"- 风格策略: {topic.get('style_logic', 'N/A')}\n")
|
||||
f.write(f"- 目标受众: {topic.get('target_audience', 'N/A')}\n")
|
||||
f.write(f"- 受众策略: {topic.get('target_audience_logic', 'N/A')}\n")
|
||||
f.write(f"- 逻辑: {topic.get('logic', 'N/A')}\n\n")
|
||||
logging.info(f"选题文本版本已保存到: {txt_path}")
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(f"Error saving topic JSON file to {topics_path}:")
|
||||
|
||||
@ -124,81 +143,82 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
|
||||
# 创建输出数据的副本,避免修改原始数据
|
||||
import copy
|
||||
output_data = copy.deepcopy(content_data)
|
||||
input_data = copy.deepcopy(content_data)
|
||||
|
||||
# 统一使用tags字段,避免tag和tags重复
|
||||
if "tag" in output_data and "tags" not in output_data:
|
||||
if "tag" in input_data and "tags" not in input_data:
|
||||
# 只有tag字段存在,复制到tags
|
||||
output_data["tags"] = output_data["tag"]
|
||||
del output_data["tag"]
|
||||
elif "tag" in output_data and "tags" in output_data:
|
||||
# 两个字段都存在,保留tags并删除tag
|
||||
del output_data["tag"]
|
||||
input_data["tags"] = input_data["tag"]
|
||||
elif "tag" in input_data and "tags" in input_data:
|
||||
# 两个字段都存在,保留tags
|
||||
pass
|
||||
|
||||
# 确保即使在未启用审核的情况下,字段也保持一致
|
||||
if not output_data.get("judged", False):
|
||||
output_data["judged"] = False
|
||||
# 添加original_title、original_content和judge_analysis字段,值为null
|
||||
output_data["original_title"] = None
|
||||
output_data["original_content"] = None
|
||||
output_data["judge_analysis"] = None
|
||||
# 添加original_tags字段
|
||||
if "tags" in output_data and "original_tags" not in output_data:
|
||||
output_data["original_tags"] = output_data["tags"]
|
||||
if not input_data.get("judged", False):
|
||||
input_data["judged"] = False
|
||||
# 添加original字段(临时),值为当前值
|
||||
if "title" in input_data and "original_title" not in input_data:
|
||||
input_data["original_title"] = input_data["title"]
|
||||
if "content" in input_data and "original_content" not in input_data:
|
||||
input_data["original_content"] = input_data["content"]
|
||||
if "tags" in input_data and "original_tags" not in input_data:
|
||||
input_data["original_tags"] = input_data["tags"]
|
||||
|
||||
# 保存原始值用于调试
|
||||
original_title = output_data.get("title", "")
|
||||
original_content = output_data.get("content", "")
|
||||
# 保存原始值用于txt文件生成和调试
|
||||
original_title = input_data.get("title", "")
|
||||
original_content = input_data.get("content", "")
|
||||
original_tags = input_data.get("tags", "")
|
||||
original_judge_analysis = input_data.get("judge_analysis", "")
|
||||
|
||||
# 添加Base64编码内容
|
||||
# 创建一个只包含元数据和base64编码的输出数据对象
|
||||
output_data = {
|
||||
# 保留元数据字段
|
||||
"judged": input_data.get("judged", False),
|
||||
"judge_success": input_data.get("judge_success", False)
|
||||
}
|
||||
|
||||
# 为所有内容字段创建base64编码版本
|
||||
try:
|
||||
# 编码标题和内容
|
||||
title_base64 = base64.b64encode(output_data.get("title", "").encode('utf-8')).decode('ascii')
|
||||
content_base64 = base64.b64encode(output_data.get("content", "").encode('utf-8')).decode('ascii')
|
||||
# 1. 标题和内容
|
||||
if "title" in input_data and input_data["title"]:
|
||||
output_data["title_base64"] = base64.b64encode(input_data["title"].encode('utf-8')).decode('ascii')
|
||||
|
||||
# 添加到输出数据
|
||||
output_data["title_base64"] = title_base64
|
||||
output_data["content_base64"] = content_base64
|
||||
if "content" in input_data and input_data["content"]:
|
||||
output_data["content_base64"] = base64.b64encode(input_data["content"].encode('utf-8')).decode('ascii')
|
||||
|
||||
# 如果有原始内容,也编码
|
||||
if "original_title" in output_data and output_data["original_title"]:
|
||||
output_data["original_title_base64"] = base64.b64encode(
|
||||
output_data["original_title"].encode('utf-8')).decode('ascii')
|
||||
if "original_content" in output_data and output_data["original_content"]:
|
||||
output_data["original_content_base64"] = base64.b64encode(
|
||||
output_data["original_content"].encode('utf-8')).decode('ascii')
|
||||
# 2. 标签
|
||||
if "tags" in input_data and input_data["tags"]:
|
||||
output_data["tags_base64"] = base64.b64encode(input_data["tags"].encode('utf-8')).decode('ascii')
|
||||
|
||||
# 3. 原始内容
|
||||
if "original_title" in input_data and input_data["original_title"]:
|
||||
output_data["original_title_base64"] = base64.b64encode(input_data["original_title"].encode('utf-8')).decode('ascii')
|
||||
|
||||
if "original_content" in input_data and input_data["original_content"]:
|
||||
output_data["original_content_base64"] = base64.b64encode(input_data["original_content"].encode('utf-8')).decode('ascii')
|
||||
|
||||
# 4. 原始标签
|
||||
if "original_tags" in input_data and input_data["original_tags"]:
|
||||
output_data["original_tags_base64"] = base64.b64encode(input_data["original_tags"].encode('utf-8')).decode('ascii')
|
||||
|
||||
# 5. 审核分析
|
||||
if "judge_analysis" in input_data and input_data["judge_analysis"]:
|
||||
output_data["judge_analysis_base64"] = base64.b64encode(input_data["judge_analysis"].encode('utf-8')).decode('ascii')
|
||||
|
||||
logging.info("成功添加Base64编码内容")
|
||||
except Exception as e:
|
||||
logging.error(f"Base64编码内容时出错: {e}")
|
||||
|
||||
# 对内容进行深度清理,确保安全序列化
|
||||
try:
|
||||
# 暂存judge_success状态
|
||||
judge_success = output_data.get("judge_success", False)
|
||||
# 保存可能有用的额外字段
|
||||
if "error" in input_data:
|
||||
output_data["error"] = input_data["error"]
|
||||
|
||||
# 深度清理
|
||||
output_data = self._sanitize_content_for_json(output_data)
|
||||
|
||||
# 恢复judge_success状态
|
||||
output_data["judge_success"] = judge_success
|
||||
|
||||
# 移除可能的错误标志 - 我们通过尝试序列化来决定是否设置它
|
||||
if "error" in output_data:
|
||||
del output_data["error"]
|
||||
if "raw_result" in output_data:
|
||||
del output_data["raw_result"]
|
||||
|
||||
logging.info("内容已经过安全清理,可以序列化")
|
||||
except Exception as e:
|
||||
logging.error(f"内容清理过程中出错: {e}")
|
||||
|
||||
# 保存统一格式的article.json
|
||||
# 保存统一格式的article.json (只包含base64编码和元数据)
|
||||
content_path = os.path.join(variant_dir, "article.json")
|
||||
try:
|
||||
with open(content_path, "w", encoding="utf-8") as f:
|
||||
# 使用自定义的SafeJSONEncoder
|
||||
json.dump(output_data, f, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder)
|
||||
# 使用标准json
|
||||
json.dump(output_data, f, ensure_ascii=False, indent=4, ignore_nan=True)
|
||||
logging.info(f"Content JSON saved to: {content_path}")
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to save content JSON to {content_path}: {e}")
|
||||
@ -206,19 +226,35 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
# 创建一份article.txt文件以便直接查看
|
||||
txt_path = os.path.join(variant_dir, "article.txt")
|
||||
try:
|
||||
# 使用原始内容
|
||||
# 使用原始内容,保留所有换行符
|
||||
with open(txt_path, "w", encoding="utf-8") as f:
|
||||
f.write(f"{original_title}\n\n{original_content}")
|
||||
if original_title:
|
||||
f.write(f"{original_title}\n\n")
|
||||
|
||||
# 保持原始内容的所有换行符
|
||||
if original_content:
|
||||
f.write(original_content)
|
||||
|
||||
if original_tags:
|
||||
f.write(f"\n\n{original_tags}")
|
||||
|
||||
if original_judge_analysis:
|
||||
f.write(f"\n\n审核分析:\n{original_judge_analysis}")
|
||||
|
||||
logging.info(f"Article text saved to: {txt_path}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to save article.txt: {e}")
|
||||
|
||||
# 记录调试信息,无论是否成功
|
||||
# 记录调试信息,无论是否成功 (包含原始数据的完整副本以便调试)
|
||||
debug_path = os.path.join(variant_dir, "debug_content.txt")
|
||||
try:
|
||||
with open(debug_path, "w", encoding="utf-8") as f:
|
||||
f.write(f"原始标题: {original_title}\n\n")
|
||||
f.write(f"原始内容: {original_content}\n\n")
|
||||
if original_tags:
|
||||
f.write(f"原始标签: {original_tags}\n\n")
|
||||
if original_judge_analysis:
|
||||
f.write(f"审核分析: {original_judge_analysis}\n\n")
|
||||
f.write("---处理后---\n\n")
|
||||
for key, value in output_data.items():
|
||||
if isinstance(value, str):
|
||||
@ -234,11 +270,10 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
prompt_path = os.path.join(variant_dir, "tweet_prompt.txt")
|
||||
try:
|
||||
with open(prompt_path, "w", encoding="utf-8") as f:
|
||||
# Assuming prompt_data is the user prompt used for this variant
|
||||
f.write(prompt_data + "\n")
|
||||
f.write(prompt_data)
|
||||
logging.info(f"Content prompt saved to: {prompt_path}")
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to save content prompt to {prompt_path}: {e}")
|
||||
logging.error(f"Failed to save content prompt to {prompt_path}: {e}")
|
||||
|
||||
def _ultra_safe_clean(self, text):
|
||||
"""执行最严格的字符清理,确保100%可序列化"""
|
||||
@ -252,7 +287,8 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
config_path = os.path.join(run_dir, f"topic_{topic_index}_poster_configs.json")
|
||||
try:
|
||||
with open(config_path, 'w', encoding='utf-8') as f_cfg_topic:
|
||||
json.dump(config_data, f_cfg_topic, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder)
|
||||
# 不使用自定义编码器,使用标准json
|
||||
json.dump(config_data, f_cfg_topic, ensure_ascii=False, indent=4, ignore_nan=True)
|
||||
logging.info(f"Saved complete poster configurations for topic {topic_index} to: {config_path}")
|
||||
except Exception as save_err:
|
||||
logging.error(f"Failed to save complete poster configurations for topic {topic_index} to {config_path}: {save_err}")
|
||||
@ -308,7 +344,8 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
metadata_path = os.path.join(os.path.dirname(save_path), metadata_filename)
|
||||
try:
|
||||
with open(metadata_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=4, ignore_nan=True, cls=SafeJSONEncoder)
|
||||
# 不使用自定义编码器,使用标准json
|
||||
json.dump(metadata, f, ensure_ascii=False, indent=4, ignore_nan=True)
|
||||
logging.info(f"保存{image_type}元数据到: {metadata_path}")
|
||||
except Exception as me:
|
||||
logging.error(f"无法保存{image_type}元数据到{metadata_path}: {me}")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user