diff --git a/scripts/extract_and_render.py b/scripts/extract_and_render.py index 0dcf16d..f8d06bc 100644 --- a/scripts/extract_and_render.py +++ b/scripts/extract_and_render.py @@ -39,25 +39,30 @@ def convert_json_to_txt_content(json_path, prefer_original=False): # 优先使用原始内容 title = data.get('original_title', '未找到原始标题') content = data.get('original_content', '未找到原始内容') + # 优先使用原始标签 + tags = data.get('original_tags', data.get('tags', '未找到标签')) print(f" - 优先使用原始内容 (prefer_original=True)") elif judge_success is True and not prefer_original: # 使用审核后的内容 title = data.get('title', '未找到标题') content = data.get('content', '未找到内容') + tags = data.get('tags', '未找到标签') print(f" - 使用审核后内容 (judge_success=True)") elif 'original_title' in data and 'original_content' in data: # 使用原始内容 title = data.get('original_title', '未找到原始标题') content = data.get('original_content', '未找到原始内容') + # 优先使用原始标签 + tags = data.get('original_tags', data.get('tags', '未找到标签')) print(f" - 使用原始内容 (judge_success={judge_success})") else: # 若无original字段,使用常规字段 title = data.get('title', '未找到标题') content = data.get('content', '未找到内容') + tags = data.get('tags', '未找到标签') print(f" - 使用常规内容 (无judge结果)") # 解决tag/tags字段重复问题,按照修正后的处理逻辑,只使用tags字段 - tags = data.get('tags', '') if not tags and 'tag' in data: tags = data.get('tag', '未找到标签') print(f" - 使用tag字段作为标签 (该字段将在后续版本中统一为tags)") diff --git a/utils/__pycache__/resource_loader.cpython-312.pyc b/utils/__pycache__/resource_loader.cpython-312.pyc index 8158426..dbb3de4 100644 Binary files a/utils/__pycache__/resource_loader.cpython-312.pyc and b/utils/__pycache__/resource_loader.cpython-312.pyc differ diff --git a/utils/__pycache__/tweet_generator.cpython-312.pyc b/utils/__pycache__/tweet_generator.cpython-312.pyc index 982c6ac..ea6567f 100644 Binary files a/utils/__pycache__/tweet_generator.cpython-312.pyc and b/utils/__pycache__/tweet_generator.cpython-312.pyc differ diff --git a/utils/output_handler.py b/utils/output_handler.py index c902004..247d8e3 100644 --- a/utils/output_handler.py +++ b/utils/output_handler.py @@ -95,11 +95,14 @@ class FileSystemOutputHandler(OutputHandler): import copy output_data = copy.deepcopy(content_data) - # 确保tag和tags字段保持一致 - if "tag" not in output_data and output_data.get("tags"): - output_data["tag"] = output_data["tags"] - elif "tags" not in output_data and output_data.get("tag"): + # 统一使用tags字段,避免tag和tags重复 + if "tag" in output_data and "tags" not in output_data: + # 只有tag字段存在,复制到tags output_data["tags"] = output_data["tag"] + del output_data["tag"] + elif "tag" in output_data and "tags" in output_data: + # 两个字段都存在,保留tags并删除tag + del output_data["tag"] # 确保即使在未启用审核的情况下,字段也保持一致 if not output_data.get("judged", False): @@ -108,6 +111,9 @@ class FileSystemOutputHandler(OutputHandler): output_data["original_title"] = None output_data["original_content"] = None output_data["judge_analysis"] = None + # 添加original_tags字段 + if "tags" in output_data and "original_tags" not in output_data: + output_data["original_tags"] = output_data["tags"] # 保存统一格式的article.json content_path = os.path.join(variant_dir, "article.json") diff --git a/utils/tweet_generator.py b/utils/tweet_generator.py index 85146ed..1c85b45 100644 --- a/utils/tweet_generator.py +++ b/utils/tweet_generator.py @@ -518,14 +518,20 @@ content: {content_json.get('content', '')} judged_result = content_judger.judge_content(product_info, content_to_judge) if judged_result and isinstance(judged_result, dict): if "title" in judged_result and "content" in judged_result: - # 使用审核后的内容替换原内容 - logging.info(f" 内容审核成功,使用审核后的内容替换原内容") # 保存原始标题和内容 content_json["original_title"] = content_json.get("title", "") content_json["original_content"] = content_json.get("content", "") + # 保存原始标签(优先使用tags,如果没有则使用tag) + original_tags = content_json.get("tags", content_json.get("tag", "")) + content_json["original_tags"] = original_tags # 更新为审核后的内容 content_json["title"] = judged_result["title"] content_json["content"] = judged_result["content"] + # 保留原始标签,避免重复 + content_json["tags"] = original_tags + # 删除可能存在的重复tag字段 + if "tag" in content_json: + del content_json["tag"] # 添加审核标记 content_json["judged"] = True # 添加judge_success状态