增加了tags的重复避免
This commit is contained in:
parent
2c39d981f4
commit
c8e049fc68
@ -39,25 +39,30 @@ def convert_json_to_txt_content(json_path, prefer_original=False):
|
||||
# 优先使用原始内容
|
||||
title = data.get('original_title', '未找到原始标题')
|
||||
content = data.get('original_content', '未找到原始内容')
|
||||
# 优先使用原始标签
|
||||
tags = data.get('original_tags', data.get('tags', '未找到标签'))
|
||||
print(f" - 优先使用原始内容 (prefer_original=True)")
|
||||
elif judge_success is True and not prefer_original:
|
||||
# 使用审核后的内容
|
||||
title = data.get('title', '未找到标题')
|
||||
content = data.get('content', '未找到内容')
|
||||
tags = data.get('tags', '未找到标签')
|
||||
print(f" - 使用审核后内容 (judge_success=True)")
|
||||
elif 'original_title' in data and 'original_content' in data:
|
||||
# 使用原始内容
|
||||
title = data.get('original_title', '未找到原始标题')
|
||||
content = data.get('original_content', '未找到原始内容')
|
||||
# 优先使用原始标签
|
||||
tags = data.get('original_tags', data.get('tags', '未找到标签'))
|
||||
print(f" - 使用原始内容 (judge_success={judge_success})")
|
||||
else:
|
||||
# 若无original字段,使用常规字段
|
||||
title = data.get('title', '未找到标题')
|
||||
content = data.get('content', '未找到内容')
|
||||
tags = data.get('tags', '未找到标签')
|
||||
print(f" - 使用常规内容 (无judge结果)")
|
||||
|
||||
# 解决tag/tags字段重复问题,按照修正后的处理逻辑,只使用tags字段
|
||||
tags = data.get('tags', '')
|
||||
if not tags and 'tag' in data:
|
||||
tags = data.get('tag', '未找到标签')
|
||||
print(f" - 使用tag字段作为标签 (该字段将在后续版本中统一为tags)")
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@ -95,11 +95,14 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
import copy
|
||||
output_data = copy.deepcopy(content_data)
|
||||
|
||||
# 确保tag和tags字段保持一致
|
||||
if "tag" not in output_data and output_data.get("tags"):
|
||||
output_data["tag"] = output_data["tags"]
|
||||
elif "tags" not in output_data and output_data.get("tag"):
|
||||
# 统一使用tags字段,避免tag和tags重复
|
||||
if "tag" in output_data and "tags" not in output_data:
|
||||
# 只有tag字段存在,复制到tags
|
||||
output_data["tags"] = output_data["tag"]
|
||||
del output_data["tag"]
|
||||
elif "tag" in output_data and "tags" in output_data:
|
||||
# 两个字段都存在,保留tags并删除tag
|
||||
del output_data["tag"]
|
||||
|
||||
# 确保即使在未启用审核的情况下,字段也保持一致
|
||||
if not output_data.get("judged", False):
|
||||
@ -108,6 +111,9 @@ class FileSystemOutputHandler(OutputHandler):
|
||||
output_data["original_title"] = None
|
||||
output_data["original_content"] = None
|
||||
output_data["judge_analysis"] = None
|
||||
# 添加original_tags字段
|
||||
if "tags" in output_data and "original_tags" not in output_data:
|
||||
output_data["original_tags"] = output_data["tags"]
|
||||
|
||||
# 保存统一格式的article.json
|
||||
content_path = os.path.join(variant_dir, "article.json")
|
||||
|
||||
@ -518,14 +518,20 @@ content: {content_json.get('content', '')}
|
||||
judged_result = content_judger.judge_content(product_info, content_to_judge)
|
||||
if judged_result and isinstance(judged_result, dict):
|
||||
if "title" in judged_result and "content" in judged_result:
|
||||
# 使用审核后的内容替换原内容
|
||||
logging.info(f" 内容审核成功,使用审核后的内容替换原内容")
|
||||
# 保存原始标题和内容
|
||||
content_json["original_title"] = content_json.get("title", "")
|
||||
content_json["original_content"] = content_json.get("content", "")
|
||||
# 保存原始标签(优先使用tags,如果没有则使用tag)
|
||||
original_tags = content_json.get("tags", content_json.get("tag", ""))
|
||||
content_json["original_tags"] = original_tags
|
||||
# 更新为审核后的内容
|
||||
content_json["title"] = judged_result["title"]
|
||||
content_json["content"] = judged_result["content"]
|
||||
# 保留原始标签,避免重复
|
||||
content_json["tags"] = original_tags
|
||||
# 删除可能存在的重复tag字段
|
||||
if "tag" in content_json:
|
||||
del content_json["tag"]
|
||||
# 添加审核标记
|
||||
content_json["judged"] = True
|
||||
# 添加judge_success状态
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user