212 lines
8.8 KiB
Python
212 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
内容审核模块
|
||
"""
|
||
|
||
import logging
|
||
import json
|
||
from typing import Dict, Any, Union
|
||
|
||
from core.ai import AIAgent
|
||
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
||
from utils.prompts import JudgerPromptBuilder
|
||
from utils.file_io import process_llm_json_text
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class ContentJudger:
|
||
"""内容审核类,使用AI评估和修正内容"""
|
||
|
||
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager=None):
|
||
"""
|
||
初始化内容审核器
|
||
|
||
Args:
|
||
ai_agent: AIAgent实例
|
||
config_manager: 配置管理器
|
||
output_manager: 输出管理器,用于保存提示词和响应
|
||
"""
|
||
self.ai_agent = ai_agent
|
||
self.config_manager = config_manager
|
||
self.topic_config = config_manager.get_config('topic_gen', GenerateTopicConfig)
|
||
self.content_config = config_manager.get_config('content_gen', GenerateContentConfig)
|
||
self.prompt_builder = JudgerPromptBuilder(config_manager)
|
||
self.output_manager = output_manager
|
||
|
||
async def judge_content(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""
|
||
调用AI审核生成的内容
|
||
|
||
Args:
|
||
generated_content: 已生成的原始内容(JSON字符串或字典对象)
|
||
topic: 与内容相关的原始选题字典
|
||
|
||
Returns:
|
||
一个包含审核结果的字典
|
||
"""
|
||
logger.info("开始审核生成的内容...")
|
||
|
||
# 获取主题索引,用于保存文件
|
||
topic_index = topic.get('index', 'unknown')
|
||
topic_dir = f"topic_{topic_index}"
|
||
|
||
# 从原始内容中提取tag
|
||
original_tag = []
|
||
original_content = process_llm_json_text(generated_content)
|
||
if original_content and isinstance(original_content, dict) and "tag" in original_content:
|
||
original_tag = original_content.get("tag", [])
|
||
logger.info(f"从原始内容中提取到标签: {original_tag}")
|
||
else:
|
||
logger.warning("从原始内容提取标签失败")
|
||
|
||
# 将字典转换为JSON字符串,以便在提示中使用
|
||
if isinstance(generated_content, dict):
|
||
generated_content_str = json.dumps(generated_content, ensure_ascii=False, indent=2)
|
||
else:
|
||
generated_content_str = str(generated_content)
|
||
|
||
# 1. 构建提示
|
||
system_prompt = self.prompt_builder.get_system_prompt()
|
||
user_prompt = self.prompt_builder.build_user_prompt(
|
||
generated_content=generated_content_str,
|
||
topic=topic
|
||
)
|
||
|
||
# 保存提示词
|
||
if self.output_manager:
|
||
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
|
||
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
|
||
|
||
# 获取模型参数
|
||
model_params = {}
|
||
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
|
||
model_params = {
|
||
'temperature': self.content_config.judger_model.get('temperature'),
|
||
'top_p': self.content_config.judger_model.get('top_p'),
|
||
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
|
||
}
|
||
# 移除None值
|
||
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||
|
||
# 2. 调用AI进行审核
|
||
try:
|
||
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||
system_prompt=system_prompt,
|
||
user_prompt=user_prompt,
|
||
use_stream=True,
|
||
stage="内容审核",
|
||
**model_params
|
||
)
|
||
|
||
# 保存原始响应
|
||
if self.output_manager:
|
||
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
|
||
|
||
except Exception as e:
|
||
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
|
||
return {"judge_success": False, "error": str(e)}
|
||
|
||
# 3. 解析结果
|
||
judged_data = process_llm_json_text(raw_result)
|
||
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
|
||
judged_data["judge_success"] = True
|
||
|
||
# 直接使用原始内容中的标签
|
||
if original_tag:
|
||
judged_data["tag"] = original_tag
|
||
# 如果原始内容中没有标签,则使用默认标签
|
||
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
|
||
|
||
# 保存审核后的内容
|
||
if self.output_manager:
|
||
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
|
||
|
||
return judged_data
|
||
else:
|
||
logger.warning(f"审核响应JSON格式不正确或缺少键")
|
||
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
|
||
|
||
async def judge_content_with_prompt(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Dict[str, Any]:
|
||
"""
|
||
使用预构建的提示词审核生成的内容
|
||
|
||
Args:
|
||
generated_content: 已生成的原始内容(JSON字符串或字典对象)
|
||
topic: 与内容相关的原始选题字典
|
||
system_prompt: 系统提示词
|
||
user_prompt: 用户提示词
|
||
|
||
Returns:
|
||
一个包含审核结果的字典
|
||
"""
|
||
logger.info("开始使用预构建提示词审核生成的内容...")
|
||
|
||
# 获取主题索引,用于保存文件
|
||
topic_index = topic.get('index', 'unknown')
|
||
topic_dir = f"topic_{topic_index}"
|
||
|
||
# 从原始内容中提取tag
|
||
original_tag = []
|
||
original_content = process_llm_json_text(generated_content)
|
||
if original_content and isinstance(original_content, dict) and "tag" in original_content:
|
||
original_tag = original_content.get("tag", [])
|
||
logger.info(f"从原始内容中提取到标签: {original_tag}")
|
||
else:
|
||
logger.warning("从原始内容提取标签失败")
|
||
|
||
# 保存提示词
|
||
if self.output_manager:
|
||
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
|
||
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
|
||
|
||
# 获取模型参数
|
||
model_params = {}
|
||
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
|
||
model_params = {
|
||
'temperature': self.content_config.judger_model.get('temperature'),
|
||
'top_p': self.content_config.judger_model.get('top_p'),
|
||
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
|
||
}
|
||
# 移除None值
|
||
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||
|
||
# 2. 调用AI进行审核
|
||
try:
|
||
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||
system_prompt=system_prompt,
|
||
user_prompt=user_prompt,
|
||
use_stream=True,
|
||
stage="内容审核",
|
||
**model_params
|
||
)
|
||
|
||
# 保存原始响应
|
||
if self.output_manager:
|
||
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
|
||
|
||
except Exception as e:
|
||
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
|
||
return {"judge_success": False, "error": str(e)}
|
||
|
||
# 3. 解析结果
|
||
judged_data = process_llm_json_text(raw_result)
|
||
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
|
||
judged_data["judge_success"] = True
|
||
judged_data.pop("analysis")
|
||
# 直接使用原始内容中的标签
|
||
if original_tag:
|
||
judged_data["tag"] = original_tag
|
||
# 如果原始内容中没有标签,则使用默认标签
|
||
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
|
||
|
||
# 保存审核后的内容
|
||
if self.output_manager:
|
||
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
|
||
|
||
return judged_data
|
||
else:
|
||
logger.warning(f"审核响应JSON格式不正确或缺少键")
|
||
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result} |