TravelContentCreator/utils/tweet/content_judger.py

129 lines
5.2 KiB
Python
Raw Normal View History

2025-07-08 18:24:23 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容审核模块
"""
import logging
import json
from json_repair import loads as json_repair_loads
from typing import Dict, Any
from core.ai import AIAgent
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
2025-07-08 18:24:23 +08:00
from utils.prompts import JudgerPromptBuilder
logger = logging.getLogger(__name__)
class ContentJudger:
"""内容审核类使用AI评估和修正内容"""
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager=None):
2025-07-08 18:24:23 +08:00
"""
初始化内容审核器
Args:
ai_agent: AIAgent实例
config_manager: 配置管理器
output_manager: 输出管理器用于保存提示词和响应
2025-07-08 18:24:23 +08:00
"""
self.ai_agent = ai_agent
self.config_manager = config_manager
self.topic_config = config_manager.get_config('topic_gen', GenerateTopicConfig)
self.content_config = config_manager.get_config('content_gen', GenerateContentConfig)
self.prompt_builder = JudgerPromptBuilder(config_manager)
self.output_manager = output_manager
2025-07-08 18:24:23 +08:00
async def judge_content(self, generated_content: str, topic: Dict[str, Any]) -> Dict[str, Any]:
2025-07-08 18:24:23 +08:00
"""
调用AI审核生成的内容
2025-07-08 18:24:23 +08:00
Args:
generated_content: 已生成的原始内容JSON字符串格式
topic: 与内容相关的原始选题字典
2025-07-08 18:24:23 +08:00
Returns:
一个包含审核结果的字典
2025-07-08 18:24:23 +08:00
"""
logger.info("开始审核生成的内容...")
# 获取主题索引,用于保存文件
topic_index = topic.get('index', 'unknown')
topic_dir = f"topic_{topic_index}"
2025-07-09 15:39:57 +08:00
# 从原始内容中提取tags
original_tags = []
try:
original_content = json_repair_loads(generated_content)
if isinstance(original_content, dict) and "tags" in original_content:
original_tags = original_content.get("tags", [])
logger.info(f"从原始内容中提取到标签: {original_tags}")
except Exception as e:
logger.warning(f"从原始内容提取标签失败: {e}")
2025-07-08 18:24:23 +08:00
# 1. 构建提示
system_prompt = self.prompt_builder.get_system_prompt()
user_prompt = self.prompt_builder.build_user_prompt(
generated_content=generated_content,
topic=topic
)
# 保存提示词
if self.output_manager:
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
2025-07-08 18:24:23 +08:00
# 获取模型参数
model_params = {}
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
model_params = {
'temperature': self.content_config.judger_model.get('temperature'),
'top_p': self.content_config.judger_model.get('top_p'),
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
}
# 移除None值
model_params = {k: v for k, v in model_params.items() if v is not None}
# 2. 调用AI进行审核
2025-07-08 18:24:23 +08:00
try:
raw_result, _, _, _ = await self.ai_agent.generate_text(
system_prompt=system_prompt,
user_prompt=user_prompt,
use_stream=False,
stage="内容审核",
**model_params
2025-07-08 18:24:23 +08:00
)
# 保存原始响应
if self.output_manager:
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
2025-07-08 18:24:23 +08:00
except Exception as e:
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
return {"judge_success": False, "error": str(e)}
# 3. 解析结果
try:
judged_data = json_repair_loads(raw_result)
if isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
judged_data["judge_success"] = True
2025-07-09 15:39:57 +08:00
# 直接使用原始内容中的标签
if original_tags:
judged_data["tags"] = original_tags
# 如果原始内容中没有标签,则使用默认标签
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tags', [])}")
# 保存审核后的内容
if self.output_manager:
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
2025-07-08 18:24:23 +08:00
return judged_data
else:
logger.warning(f"审核响应JSON格式不正确或缺少键: {judged_data}")
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
except (json.JSONDecodeError, ValueError) as e:
logger.error(f"解析审核响应JSON失败: {e}")
return {"judge_success": False, "error": "JSONDecodeError", "raw_response": raw_result}