增加了tags的重复避免

This commit is contained in:
jinye_huang 2025-05-12 15:44:54 +08:00
parent 2c39d981f4
commit c8e049fc68
5 changed files with 24 additions and 7 deletions

View File

@ -39,25 +39,30 @@ def convert_json_to_txt_content(json_path, prefer_original=False):
# 优先使用原始内容
title = data.get('original_title', '未找到原始标题')
content = data.get('original_content', '未找到原始内容')
# 优先使用原始标签
tags = data.get('original_tags', data.get('tags', '未找到标签'))
print(f" - 优先使用原始内容 (prefer_original=True)")
elif judge_success is True and not prefer_original:
# 使用审核后的内容
title = data.get('title', '未找到标题')
content = data.get('content', '未找到内容')
tags = data.get('tags', '未找到标签')
print(f" - 使用审核后内容 (judge_success=True)")
elif 'original_title' in data and 'original_content' in data:
# 使用原始内容
title = data.get('original_title', '未找到原始标题')
content = data.get('original_content', '未找到原始内容')
# 优先使用原始标签
tags = data.get('original_tags', data.get('tags', '未找到标签'))
print(f" - 使用原始内容 (judge_success={judge_success})")
else:
# 若无original字段使用常规字段
title = data.get('title', '未找到标题')
content = data.get('content', '未找到内容')
tags = data.get('tags', '未找到标签')
print(f" - 使用常规内容 (无judge结果)")
# 解决tag/tags字段重复问题按照修正后的处理逻辑只使用tags字段
tags = data.get('tags', '')
if not tags and 'tag' in data:
tags = data.get('tag', '未找到标签')
print(f" - 使用tag字段作为标签 (该字段将在后续版本中统一为tags)")

View File

@ -95,11 +95,14 @@ class FileSystemOutputHandler(OutputHandler):
import copy
output_data = copy.deepcopy(content_data)
# 确保tag和tags字段保持一致
if "tag" not in output_data and output_data.get("tags"):
output_data["tag"] = output_data["tags"]
elif "tags" not in output_data and output_data.get("tag"):
# 统一使用tags字段避免tag和tags重复
if "tag" in output_data and "tags" not in output_data:
# 只有tag字段存在复制到tags
output_data["tags"] = output_data["tag"]
del output_data["tag"]
elif "tag" in output_data and "tags" in output_data:
# 两个字段都存在保留tags并删除tag
del output_data["tag"]
# 确保即使在未启用审核的情况下,字段也保持一致
if not output_data.get("judged", False):
@ -108,6 +111,9 @@ class FileSystemOutputHandler(OutputHandler):
output_data["original_title"] = None
output_data["original_content"] = None
output_data["judge_analysis"] = None
# 添加original_tags字段
if "tags" in output_data and "original_tags" not in output_data:
output_data["original_tags"] = output_data["tags"]
# 保存统一格式的article.json
content_path = os.path.join(variant_dir, "article.json")

View File

@ -518,14 +518,20 @@ content: {content_json.get('content', '')}
judged_result = content_judger.judge_content(product_info, content_to_judge)
if judged_result and isinstance(judged_result, dict):
if "title" in judged_result and "content" in judged_result:
# 使用审核后的内容替换原内容
logging.info(f" 内容审核成功,使用审核后的内容替换原内容")
# 保存原始标题和内容
content_json["original_title"] = content_json.get("title", "")
content_json["original_content"] = content_json.get("content", "")
# 保存原始标签优先使用tags如果没有则使用tag
original_tags = content_json.get("tags", content_json.get("tag", ""))
content_json["original_tags"] = original_tags
# 更新为审核后的内容
content_json["title"] = judged_result["title"]
content_json["content"] = judged_result["content"]
# 保留原始标签,避免重复
content_json["tags"] = original_tags
# 删除可能存在的重复tag字段
if "tag" in content_json:
del content_json["tag"]
# 添加审核标记
content_json["judged"] = True
# 添加judge_success状态