基本修复了错误出现,全部json改为base64
This commit is contained in:
parent
881a33786b
commit
b17b4b6d58
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -145,173 +145,155 @@ class FileSystemOutputHandler(OutputHandler):
|
|||||||
import copy
|
import copy
|
||||||
input_data = copy.deepcopy(content_data)
|
input_data = copy.deepcopy(content_data)
|
||||||
|
|
||||||
# 统一使用tags字段,避免tag和tags重复
|
# 简化输出数据结构,只保留必要的元数据和base64编码内容
|
||||||
if "tag" in input_data and "tags" not in input_data:
|
|
||||||
# 只有tag字段存在,复制到tags
|
|
||||||
input_data["tags"] = input_data["tag"]
|
|
||||||
elif "tag" in input_data and "tags" in input_data:
|
|
||||||
# 两个字段都存在,保留tags
|
|
||||||
pass
|
|
||||||
|
|
||||||
# 确保即使在未启用审核的情况下,字段也保持一致
|
|
||||||
if not input_data.get("judged", False):
|
|
||||||
input_data["judged"] = False
|
|
||||||
# 添加original字段(临时),值为当前值
|
|
||||||
if "title" in input_data and "original_title" not in input_data:
|
|
||||||
input_data["original_title"] = input_data["title"]
|
|
||||||
if "content" in input_data and "original_content" not in input_data:
|
|
||||||
input_data["original_content"] = input_data["content"]
|
|
||||||
if "tags" in input_data and "original_tags" not in input_data:
|
|
||||||
input_data["original_tags"] = input_data["tags"]
|
|
||||||
|
|
||||||
# 统一审核分析字段,优先使用judge_analysis,其次使用不良内容分析
|
|
||||||
if "judge_analysis" not in input_data and "不良内容分析" in input_data:
|
|
||||||
input_data["judge_analysis"] = input_data["不良内容分析"]
|
|
||||||
elif "不良内容分析" not in input_data and "judge_analysis" in input_data:
|
|
||||||
input_data["不良内容分析"] = input_data["judge_analysis"]
|
|
||||||
|
|
||||||
# 保存原始值用于txt文件生成和调试
|
|
||||||
original_title = input_data.get("original_title", input_data.get("title", ""))
|
|
||||||
original_content = input_data.get("original_content", input_data.get("content", ""))
|
|
||||||
original_tags = input_data.get("original_tags", input_data.get("tags", ""))
|
|
||||||
judge_title = input_data.get("title", "")
|
|
||||||
judge_content = input_data.get("content", "")
|
|
||||||
judge_tags = input_data.get("tags", "")
|
|
||||||
original_judge_analysis = input_data.get("judge_analysis", "")
|
|
||||||
|
|
||||||
# 创建一个只包含元数据和base64编码的输出数据对象
|
|
||||||
output_data = {
|
output_data = {
|
||||||
# 保留元数据字段
|
|
||||||
"judged": input_data.get("judged", False),
|
"judged": input_data.get("judged", False),
|
||||||
"judge_success": input_data.get("judge_success", False)
|
"judge_success": input_data.get("judge_success", False),
|
||||||
|
"error": input_data.get("error", False)
|
||||||
}
|
}
|
||||||
|
|
||||||
# 为所有内容字段创建base64编码版本
|
# 检查并处理内容字段,确保全部以base64编码保存
|
||||||
try:
|
try:
|
||||||
# 1. 标题和内容
|
# 检查是否已经是base64编码的字段
|
||||||
if "title" in input_data and input_data["title"]:
|
def is_base64(s):
|
||||||
output_data["title_base64"] = base64.b64encode(input_data["title"].encode('utf-8')).decode('ascii')
|
if not isinstance(s, str):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
# 尝试解码看是否成功
|
||||||
|
base64.b64decode(s).decode('utf-8')
|
||||||
|
# 如果能成功解码,而且是标准base64长度(4的倍数),则可能是base64
|
||||||
|
return len(s) % 4 == 0 and all(c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=' for c in s)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
if "content" in input_data and input_data["content"]:
|
# 1. 处理标题和内容
|
||||||
output_data["content_base64"] = base64.b64encode(input_data["content"].encode('utf-8')).decode('ascii')
|
if "title" in input_data:
|
||||||
|
if is_base64(input_data["title"]):
|
||||||
|
# 已经是base64编码,直接使用
|
||||||
|
output_data["title_base64"] = input_data["title"]
|
||||||
|
# 尝试解码用于txt文件
|
||||||
|
try:
|
||||||
|
title_text = base64.b64decode(input_data["title"]).decode('utf-8')
|
||||||
|
except:
|
||||||
|
title_text = input_data["title"]
|
||||||
|
else:
|
||||||
|
# 未编码,进行base64编码
|
||||||
|
title_text = input_data["title"]
|
||||||
|
output_data["title_base64"] = base64.b64encode(title_text.encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
# 2. 标签
|
if "content" in input_data:
|
||||||
if "tags" in input_data and input_data["tags"]:
|
if is_base64(input_data["content"]):
|
||||||
output_data["tags_base64"] = base64.b64encode(input_data["tags"].encode('utf-8')).decode('ascii')
|
# 已经是base64编码,直接使用
|
||||||
|
output_data["content_base64"] = input_data["content"]
|
||||||
|
# 尝试解码用于txt文件
|
||||||
|
try:
|
||||||
|
content_text = base64.b64decode(input_data["content"]).decode('utf-8')
|
||||||
|
except:
|
||||||
|
content_text = input_data["content"]
|
||||||
|
else:
|
||||||
|
# 未编码,进行base64编码
|
||||||
|
content_text = input_data["content"]
|
||||||
|
output_data["content_base64"] = base64.b64encode(content_text.encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
# 3. 原始内容
|
# 2. 处理标签
|
||||||
if "original_title" in input_data and input_data["original_title"]:
|
tags_text = input_data.get("tags", input_data.get("tag", ""))
|
||||||
output_data["original_title_base64"] = base64.b64encode(input_data["original_title"].encode('utf-8')).decode('ascii')
|
if tags_text:
|
||||||
|
if is_base64(tags_text):
|
||||||
|
output_data["tags_base64"] = tags_text
|
||||||
|
try:
|
||||||
|
tags_text = base64.b64decode(tags_text).decode('utf-8')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
output_data["tags_base64"] = base64.b64encode(tags_text.encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
if "original_content" in input_data and input_data["original_content"]:
|
# 3. 处理分析
|
||||||
output_data["original_content_base64"] = base64.b64encode(input_data["original_content"].encode('utf-8')).decode('ascii')
|
analysis_text = input_data.get("analysis", input_data.get("judge_analysis", ""))
|
||||||
|
if analysis_text:
|
||||||
|
if is_base64(analysis_text):
|
||||||
|
output_data["analysis_base64"] = analysis_text
|
||||||
|
try:
|
||||||
|
analysis_text = base64.b64decode(analysis_text).decode('utf-8')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
output_data["analysis_base64"] = base64.b64encode(analysis_text.encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
# 4. 原始标签
|
logging.info("成功处理内容并添加Base64编码")
|
||||||
if "original_tags" in input_data and input_data["original_tags"]:
|
|
||||||
output_data["original_tags_base64"] = base64.b64encode(input_data["original_tags"].encode('utf-8')).decode('ascii')
|
|
||||||
|
|
||||||
# 5. 审核分析 - 检查judge_analysis和不良内容分析两个字段
|
|
||||||
judge_analysis = input_data.get("judge_analysis", input_data.get("不良内容分析", ""))
|
|
||||||
if judge_analysis:
|
|
||||||
output_data["judge_analysis_base64"] = base64.b64encode(judge_analysis.encode('utf-8')).decode('ascii')
|
|
||||||
|
|
||||||
logging.info("成功添加Base64编码内容")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Base64编码内容时出错: {e}")
|
logging.error(f"处理内容或Base64编码时出错: {e}")
|
||||||
|
|
||||||
# 保存可能有用的额外字段
|
# 保存处理后的article.json
|
||||||
if "error" in input_data:
|
|
||||||
output_data["error"] = input_data["error"]
|
|
||||||
|
|
||||||
# 保存统一格式的article.json (只包含base64编码和元数据)
|
|
||||||
content_path = os.path.join(variant_dir, "article.json")
|
content_path = os.path.join(variant_dir, "article.json")
|
||||||
try:
|
try:
|
||||||
with open(content_path, "w", encoding="utf-8") as f:
|
with open(content_path, "w", encoding="utf-8") as f:
|
||||||
# 使用标准json
|
# 使用标准json并确保正确处理中文和特殊字符
|
||||||
json.dump(output_data, f, ensure_ascii=False, indent=4, ignore_nan=True)
|
json.dump(output_data, f, ensure_ascii=False, indent=4)
|
||||||
logging.info(f"Content JSON saved to: {content_path}")
|
logging.info(f"Content JSON saved to: {content_path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.exception(f"Failed to save content JSON to {content_path}: {e}")
|
logging.exception(f"Failed to save content JSON to {content_path}: {e}")
|
||||||
|
|
||||||
# 创建一份article.txt文件以便直接查看
|
# 创建一份article.txt文件,使用解码后的文本
|
||||||
txt_path = os.path.join(variant_dir, "article.txt")
|
txt_path = os.path.join(variant_dir, "article.txt")
|
||||||
try:
|
try:
|
||||||
# 重新组织内容显示,明确区分原始内容和审核后内容
|
is_judged = output_data.get("judged", False)
|
||||||
|
is_judge_success = output_data.get("judge_success", False)
|
||||||
|
|
||||||
|
# 确保我们有可用的文本版本
|
||||||
|
title_text = title_text if 'title_text' in locals() else "未找到标题"
|
||||||
|
content_text = content_text if 'content_text' in locals() else "未找到内容"
|
||||||
|
tags_text = tags_text if 'tags_text' in locals() else ""
|
||||||
|
|
||||||
with open(txt_path, "w", encoding="utf-8") as f:
|
with open(txt_path, "w", encoding="utf-8") as f:
|
||||||
# 根据审核状态决定显示哪些内容
|
# 根据审核状态决定显示内容
|
||||||
is_judged = input_data.get("judged", False)
|
|
||||||
is_judge_success = input_data.get("judge_success", False)
|
|
||||||
|
|
||||||
if is_judged and is_judge_success:
|
if is_judged and is_judge_success:
|
||||||
# 显示审核后的内容
|
f.write(f"{title_text}\n\n")
|
||||||
f.write(f"{judge_title}\n\n")
|
f.write(content_text)
|
||||||
if judge_content:
|
if tags_text:
|
||||||
f.write(judge_content)
|
f.write(f"\n\n{tags_text}")
|
||||||
if judge_tags:
|
|
||||||
f.write(f"\n\n{judge_tags}")
|
|
||||||
|
|
||||||
# 在最后添加原始内容作为参考
|
|
||||||
if original_title != judge_title or original_content != judge_content:
|
|
||||||
f.write("\n\n=== 原始内容 ===\n")
|
|
||||||
f.write(f"{original_title}\n\n")
|
|
||||||
if original_content:
|
|
||||||
f.write(original_content)
|
|
||||||
if original_tags and original_tags != judge_tags:
|
|
||||||
f.write(f"\n\n{original_tags}")
|
|
||||||
elif is_judged and not is_judge_success:
|
|
||||||
# 审核失败,显示审核失败信息和原始内容
|
|
||||||
f.write("审核失败\n\n")
|
|
||||||
f.write(f"{original_title}\n\n")
|
|
||||||
if original_content:
|
|
||||||
f.write(original_content)
|
|
||||||
if original_tags:
|
|
||||||
f.write(f"\n\n{original_tags}")
|
|
||||||
else:
|
else:
|
||||||
# 未审核,直接显示原始内容
|
# 未审核或审核未通过
|
||||||
f.write(f"{original_title}\n\n")
|
if not is_judged:
|
||||||
if original_content:
|
f.write(f"{title_text}\n\n")
|
||||||
f.write(original_content)
|
else:
|
||||||
if original_tags:
|
# 审核失败
|
||||||
f.write(f"\n\n{original_tags}")
|
f.write(f"审核失败\n\n{title_text}\n\n")
|
||||||
|
f.write(content_text)
|
||||||
|
if tags_text:
|
||||||
|
f.write(f"\n\n{tags_text}")
|
||||||
|
|
||||||
# 添加审核分析信息(如果有)
|
# 添加审核分析
|
||||||
if original_judge_analysis:
|
if 'analysis_text' in locals() and analysis_text:
|
||||||
f.write(f"\n\n=== 审核分析 ===\n{original_judge_analysis}")
|
f.write(f"\n\n=== 审核分析 ===\n{analysis_text}")
|
||||||
|
|
||||||
logging.info(f"Article text saved to: {txt_path}")
|
logging.info(f"Article text saved to: {txt_path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to save article.txt: {e}")
|
logging.error(f"Failed to save article.txt: {e}")
|
||||||
|
|
||||||
# 记录调试信息,无论是否成功 (包含原始数据的完整副本以便调试)
|
# 保存调试信息
|
||||||
debug_path = os.path.join(variant_dir, "debug_content.txt")
|
debug_path = os.path.join(variant_dir, "debug_content.txt")
|
||||||
try:
|
try:
|
||||||
with open(debug_path, "w", encoding="utf-8") as f:
|
with open(debug_path, "w", encoding="utf-8") as f:
|
||||||
f.write(f"原始标题: {original_title}\n\n")
|
f.write(f"处理前内容信息:\n")
|
||||||
f.write(f"原始内容: {original_content}\n\n")
|
f.write(f"标题: {input_data.get('title', '未提供')[:200]}...\n\n")
|
||||||
if original_tags:
|
f.write(f"内容: {input_data.get('content', '未提供')[:200]}...\n\n")
|
||||||
f.write(f"原始标签: {original_tags}\n\n")
|
f.write(f"标签: {input_data.get('tags', input_data.get('tag', '未提供'))}\n\n")
|
||||||
|
f.write(f"审核状态: judged={input_data.get('judged', False)}, judge_success={input_data.get('judge_success', False)}\n\n")
|
||||||
|
|
||||||
if is_judged:
|
f.write("处理后JSON输出字段:\n")
|
||||||
f.write(f"审核状态: {'成功' if is_judge_success else '失败'}\n")
|
|
||||||
if is_judge_success:
|
|
||||||
f.write(f"审核后标题: {judge_title}\n\n")
|
|
||||||
f.write(f"审核后内容: {judge_content}\n\n")
|
|
||||||
|
|
||||||
if original_judge_analysis:
|
|
||||||
f.write(f"审核分析: {original_judge_analysis}\n\n")
|
|
||||||
|
|
||||||
f.write("---处理后---\n\n")
|
|
||||||
for key, value in output_data.items():
|
for key, value in output_data.items():
|
||||||
if isinstance(value, str):
|
value_preview = str(value)[:100] + "..." if isinstance(value, str) and len(str(value)) > 100 else value
|
||||||
f.write(f"{key}: (length: {len(value)})\n")
|
f.write(f"{key}: {value_preview}\n")
|
||||||
f.write(f"{repr(value[:200])}...\n\n")
|
|
||||||
else:
|
f.write("\n解码后文本内容:\n")
|
||||||
f.write(f"{key}: {type(value)}\n")
|
f.write(f"标题: {title_text if 'title_text' in locals() else '未解码'}\n\n")
|
||||||
|
f.write(f"内容: {content_text[:200] if 'content_text' in locals() else '未解码'}...\n")
|
||||||
|
|
||||||
logging.info(f"调试内容已保存到: {debug_path}")
|
logging.info(f"调试内容已保存到: {debug_path}")
|
||||||
except Exception as debug_err:
|
except Exception as debug_err:
|
||||||
logging.error(f"保存调试内容失败: {debug_err}")
|
logging.error(f"保存调试内容失败: {debug_err}")
|
||||||
|
|
||||||
# Save content prompt
|
# 保存提示词
|
||||||
prompt_path = os.path.join(variant_dir, "tweet_prompt.txt")
|
prompt_path = os.path.join(variant_dir, "tweet_prompt.txt")
|
||||||
try:
|
try:
|
||||||
with open(prompt_path, "w", encoding="utf-8") as f:
|
with open(prompt_path, "w", encoding="utf-8") as f:
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import sys
|
|||||||
import traceback
|
import traceback
|
||||||
import logging # Add logging
|
import logging # Add logging
|
||||||
import re
|
import re
|
||||||
|
import base64
|
||||||
# sys.path.append('/root/autodl-tmp') # No longer needed if running as a module or if path is set correctly
|
# sys.path.append('/root/autodl-tmp') # No longer needed if running as a module or if path is set correctly
|
||||||
# 从本地模块导入
|
# 从本地模块导入
|
||||||
# from TravelContentCreator.core.ai_agent import AI_Agent # Remove project name prefix
|
# from TravelContentCreator.core.ai_agent import AI_Agent # Remove project name prefix
|
||||||
@ -546,26 +547,21 @@ def generate_content_for_topic(ai_agent: AI_Agent,
|
|||||||
if judged_result and isinstance(judged_result, dict):
|
if judged_result and isinstance(judged_result, dict):
|
||||||
if "title" in judged_result and "content" in judged_result:
|
if "title" in judged_result and "content" in judged_result:
|
||||||
# 保存原始标题和内容
|
# 保存原始标题和内容
|
||||||
content_json["original_title"] = content_json.get("title", "")
|
# content_json["original_title"] = content_json.get("title", "")
|
||||||
content_json["original_content"] = content_json.get("content", "")
|
# content_json["original_content"] = content_json.get("content", "")
|
||||||
# 保存原始标签(优先使用tags,如果没有则使用tag)
|
# 保存原始标签(优先使用tags,如果没有则使用tag)
|
||||||
original_tags = content_json.get("tags", content_json.get("tag", ""))
|
tags = content_json.get("tags", content_json.get("tag", ""))
|
||||||
content_json["original_tags"] = original_tags
|
content_json["tags"] = base64.b64encode(tags.encode('utf-8')).decode('utf-8')
|
||||||
# 更新为审核后的内容
|
# 更新为审核后的内容
|
||||||
content_json["title"] = judged_result["title"]
|
content_json["title"] = judged_result["title_base64"]
|
||||||
content_json["content"] = judged_result["content"]
|
content_json["content"] = judged_result["content_base64"]
|
||||||
# 保留原始标签,避免重复
|
|
||||||
content_json["tags"] = original_tags
|
|
||||||
# 删除可能存在的重复tag字段
|
|
||||||
if "tag" in content_json:
|
|
||||||
del content_json["tag"]
|
|
||||||
# 添加审核标记
|
# 添加审核标记
|
||||||
content_json["judged"] = True
|
content_json["judged"] = True
|
||||||
# 添加judge_success状态
|
# 添加judge_success状态
|
||||||
content_json["judge_success"] = judged_result.get("judge_success", False)
|
content_json["judge_success"] = judged_result.get("judge_success", False)
|
||||||
# 处理分析结果,优先使用"analysis"字段,兼容"不良内容分析"字段
|
# 处理分析结果,优先使用"analysis"字段,兼容"不良内容分析"字段
|
||||||
if "analysis" in judged_result:
|
if "analysis" in judged_result:
|
||||||
content_json["judge_analysis"] = judged_result["analysis"]
|
content_json["analysis"] = judged_result["analysis_base64"]
|
||||||
else:
|
else:
|
||||||
logging.warning(f" 审核结果缺少title或content字段,保留原内容")
|
logging.warning(f" 审核结果缺少title或content字段,保留原内容")
|
||||||
content_json["judge_success"] = False
|
content_json["judge_success"] = False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user