TravelContentCreator/tweet/content_judger.py

212 lines
8.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容审核模块
"""
import logging
import json
from typing import Dict, Any, Union
from core.ai import AIAgent
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
from utils.prompts import JudgerPromptBuilder
from utils.file_io import process_llm_json_text
logger = logging.getLogger(__name__)
class ContentJudger:
"""内容审核类使用AI评估和修正内容"""
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager=None):
"""
初始化内容审核器
Args:
ai_agent: AIAgent实例
config_manager: 配置管理器
output_manager: 输出管理器,用于保存提示词和响应
"""
self.ai_agent = ai_agent
self.config_manager = config_manager
self.topic_config = config_manager.get_config('topic_gen', GenerateTopicConfig)
self.content_config = config_manager.get_config('content_gen', GenerateContentConfig)
self.prompt_builder = JudgerPromptBuilder(config_manager)
self.output_manager = output_manager
async def judge_content(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any]) -> Dict[str, Any]:
"""
调用AI审核生成的内容
Args:
generated_content: 已生成的原始内容JSON字符串或字典对象
topic: 与内容相关的原始选题字典
Returns:
一个包含审核结果的字典
"""
logger.info("开始审核生成的内容...")
# 获取主题索引,用于保存文件
topic_index = topic.get('index', 'unknown')
topic_dir = f"topic_{topic_index}"
# 从原始内容中提取tag
original_tag = []
original_content = process_llm_json_text(generated_content)
if original_content and isinstance(original_content, dict) and "tag" in original_content:
original_tag = original_content.get("tag", [])
logger.info(f"从原始内容中提取到标签: {original_tag}")
else:
logger.warning("从原始内容提取标签失败")
# 将字典转换为JSON字符串以便在提示中使用
if isinstance(generated_content, dict):
generated_content_str = json.dumps(generated_content, ensure_ascii=False, indent=2)
else:
generated_content_str = str(generated_content)
# 1. 构建提示
system_prompt = self.prompt_builder.get_system_prompt()
user_prompt = self.prompt_builder.build_user_prompt(
generated_content=generated_content_str,
topic=topic
)
# 保存提示词
if self.output_manager:
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
# 获取模型参数
model_params = {}
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
model_params = {
'temperature': self.content_config.judger_model.get('temperature'),
'top_p': self.content_config.judger_model.get('top_p'),
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
}
# 移除None值
model_params = {k: v for k, v in model_params.items() if v is not None}
# 2. 调用AI进行审核
try:
raw_result, _, _, _ = await self.ai_agent.generate_text(
system_prompt=system_prompt,
user_prompt=user_prompt,
use_stream=True,
stage="内容审核",
**model_params
)
# 保存原始响应
if self.output_manager:
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
except Exception as e:
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
return {"judge_success": False, "error": str(e)}
# 3. 解析结果
judged_data = process_llm_json_text(raw_result)
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
judged_data["judge_success"] = True
# 直接使用原始内容中的标签
if original_tag:
judged_data["tag"] = original_tag
# 如果原始内容中没有标签,则使用默认标签
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
# 保存审核后的内容
if self.output_manager:
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
return judged_data
else:
logger.warning(f"审核响应JSON格式不正确或缺少键")
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
async def judge_content_with_prompt(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Dict[str, Any]:
"""
使用预构建的提示词审核生成的内容
Args:
generated_content: 已生成的原始内容JSON字符串或字典对象
topic: 与内容相关的原始选题字典
system_prompt: 系统提示词
user_prompt: 用户提示词
Returns:
一个包含审核结果的字典
"""
logger.info("开始使用预构建提示词审核生成的内容...")
# 获取主题索引,用于保存文件
topic_index = topic.get('index', 'unknown')
topic_dir = f"topic_{topic_index}"
# 从原始内容中提取tag
original_tag = []
original_content = process_llm_json_text(generated_content)
if original_content and isinstance(original_content, dict) and "tag" in original_content:
original_tag = original_content.get("tag", [])
logger.info(f"从原始内容中提取到标签: {original_tag}")
else:
logger.warning("从原始内容提取标签失败")
# 保存提示词
if self.output_manager:
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
# 获取模型参数
model_params = {}
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
model_params = {
'temperature': self.content_config.judger_model.get('temperature'),
'top_p': self.content_config.judger_model.get('top_p'),
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
}
# 移除None值
model_params = {k: v for k, v in model_params.items() if v is not None}
# 2. 调用AI进行审核
try:
raw_result, _, _, _ = await self.ai_agent.generate_text(
system_prompt=system_prompt,
user_prompt=user_prompt,
use_stream=True,
stage="内容审核",
**model_params
)
# 保存原始响应
if self.output_manager:
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
except Exception as e:
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
return {"judge_success": False, "error": str(e)}
# 3. 解析结果
judged_data = process_llm_json_text(raw_result)
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
judged_data["judge_success"] = True
judged_data.pop("analysis")
# 直接使用原始内容中的标签
if original_tag:
judged_data["tag"] = original_tag
# 如果原始内容中没有标签,则使用默认标签
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
# 保存审核后的内容
if self.output_manager:
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
return judged_data
else:
logger.warning(f"审核响应JSON格式不正确或缺少键")
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}