212 lines
8.8 KiB
Python
Raw Permalink Normal View History

2025-07-31 15:35:23 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容审核模块
"""
import logging
import json
from typing import Dict, Any, Union
from core.ai import AIAgent
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
from utils.prompts import JudgerPromptBuilder
from utils.file_io import process_llm_json_text
logger = logging.getLogger(__name__)
class ContentJudger:
"""内容审核类使用AI评估和修正内容"""
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager=None):
"""
初始化内容审核器
Args:
ai_agent: AIAgent实例
config_manager: 配置管理器
output_manager: 输出管理器用于保存提示词和响应
"""
self.ai_agent = ai_agent
self.config_manager = config_manager
self.topic_config = config_manager.get_config('topic_gen', GenerateTopicConfig)
self.content_config = config_manager.get_config('content_gen', GenerateContentConfig)
self.prompt_builder = JudgerPromptBuilder(config_manager)
self.output_manager = output_manager
async def judge_content(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any]) -> Dict[str, Any]:
"""
调用AI审核生成的内容
Args:
generated_content: 已生成的原始内容JSON字符串或字典对象
topic: 与内容相关的原始选题字典
Returns:
一个包含审核结果的字典
"""
logger.info("开始审核生成的内容...")
# 获取主题索引,用于保存文件
topic_index = topic.get('index', 'unknown')
topic_dir = f"topic_{topic_index}"
# 从原始内容中提取tag
original_tag = []
original_content = process_llm_json_text(generated_content)
if original_content and isinstance(original_content, dict) and "tag" in original_content:
original_tag = original_content.get("tag", [])
logger.info(f"从原始内容中提取到标签: {original_tag}")
else:
logger.warning("从原始内容提取标签失败")
# 将字典转换为JSON字符串以便在提示中使用
if isinstance(generated_content, dict):
generated_content_str = json.dumps(generated_content, ensure_ascii=False, indent=2)
else:
generated_content_str = str(generated_content)
# 1. 构建提示
system_prompt = self.prompt_builder.get_system_prompt()
user_prompt = self.prompt_builder.build_user_prompt(
generated_content=generated_content_str,
topic=topic
)
# 保存提示词
if self.output_manager:
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
# 获取模型参数
model_params = {}
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
model_params = {
'temperature': self.content_config.judger_model.get('temperature'),
'top_p': self.content_config.judger_model.get('top_p'),
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
}
# 移除None值
model_params = {k: v for k, v in model_params.items() if v is not None}
# 2. 调用AI进行审核
try:
raw_result, _, _, _ = await self.ai_agent.generate_text(
system_prompt=system_prompt,
user_prompt=user_prompt,
use_stream=True,
stage="内容审核",
**model_params
)
# 保存原始响应
if self.output_manager:
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
except Exception as e:
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
return {"judge_success": False, "error": str(e)}
# 3. 解析结果
judged_data = process_llm_json_text(raw_result)
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
judged_data["judge_success"] = True
# 直接使用原始内容中的标签
if original_tag:
judged_data["tag"] = original_tag
# 如果原始内容中没有标签,则使用默认标签
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
# 保存审核后的内容
if self.output_manager:
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
return judged_data
else:
logger.warning(f"审核响应JSON格式不正确或缺少键")
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
async def judge_content_with_prompt(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Dict[str, Any]:
"""
使用预构建的提示词审核生成的内容
Args:
generated_content: 已生成的原始内容JSON字符串或字典对象
topic: 与内容相关的原始选题字典
system_prompt: 系统提示词
user_prompt: 用户提示词
Returns:
一个包含审核结果的字典
"""
logger.info("开始使用预构建提示词审核生成的内容...")
# 获取主题索引,用于保存文件
topic_index = topic.get('index', 'unknown')
topic_dir = f"topic_{topic_index}"
# 从原始内容中提取tag
original_tag = []
original_content = process_llm_json_text(generated_content)
if original_content and isinstance(original_content, dict) and "tag" in original_content:
original_tag = original_content.get("tag", [])
logger.info(f"从原始内容中提取到标签: {original_tag}")
else:
logger.warning("从原始内容提取标签失败")
# 保存提示词
if self.output_manager:
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
# 获取模型参数
model_params = {}
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
model_params = {
'temperature': self.content_config.judger_model.get('temperature'),
'top_p': self.content_config.judger_model.get('top_p'),
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
}
# 移除None值
model_params = {k: v for k, v in model_params.items() if v is not None}
# 2. 调用AI进行审核
try:
raw_result, _, _, _ = await self.ai_agent.generate_text(
system_prompt=system_prompt,
user_prompt=user_prompt,
use_stream=True,
stage="内容审核",
**model_params
)
# 保存原始响应
if self.output_manager:
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
except Exception as e:
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
return {"judge_success": False, "error": str(e)}
# 3. 解析结果
judged_data = process_llm_json_text(raw_result)
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
judged_data["judge_success"] = True
judged_data.pop("analysis")
# 直接使用原始内容中的标签
if original_tag:
judged_data["tag"] = original_tag
# 如果原始内容中没有标签,则使用默认标签
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
# 保存审核后的内容
if self.output_manager:
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
return judged_data
else:
logger.warning(f"审核响应JSON格式不正确或缺少键")
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}