137 lines
5.1 KiB
Python
137 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
选题生成模块
|
||
"""
|
||
|
||
import logging
|
||
import json
|
||
import re
|
||
from typing import List, Dict, Any, Optional
|
||
from json_repair import loads as json_repair_loads
|
||
|
||
from core.ai import AIAgent
|
||
from core.config import GenerateTopicConfig
|
||
from utils.prompts import TopicPromptBuilder
|
||
from utils.file_io import OutputManager
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class TopicParser:
|
||
"""解析和验证由AI模型生成的选题列表"""
|
||
|
||
@staticmethod
|
||
def parse(raw_text: str) -> List[Dict[str, Any]]:
|
||
"""
|
||
从原始文本解析、修复和验证JSON
|
||
|
||
Args:
|
||
raw_text: AI模型返回的原始字符串
|
||
|
||
Returns:
|
||
一个字典列表,每个字典代表一个有效的选题
|
||
"""
|
||
logger.info("开始解析AI生成的选题...")
|
||
|
||
# 1. 移除 <think> 块
|
||
if "</think>" in raw_text:
|
||
raw_text = raw_text.split("</think>", 1)[-1]
|
||
|
||
# 2. 移除Markdown代码块标记
|
||
cleaned_text = re.sub(r'```json\s*|\s*```', '', raw_text.strip(), flags=re.MULTILINE)
|
||
|
||
if not cleaned_text:
|
||
logger.error("移除元数据后,解析内容为空")
|
||
return []
|
||
|
||
try:
|
||
# 3. 使用json_repair修复并解析JSON
|
||
parsed_json = json_repair_loads(cleaned_text)
|
||
|
||
if not isinstance(parsed_json, list):
|
||
logger.error(f"解析结果不是列表,而是 {type(parsed_json)}")
|
||
return []
|
||
|
||
logger.info(f"成功解析 {len(parsed_json)} 个选题对象。开始验证...")
|
||
|
||
# 4. 验证和清理每个选题
|
||
valid_topics = []
|
||
required_keys = {"index", "date", "logic", "object", "product", "style", "target_audience"}
|
||
|
||
for i, item in enumerate(parsed_json):
|
||
if isinstance(item, dict) and required_keys.issubset(item.keys()):
|
||
# 可选地清理或转换字段值
|
||
valid_topics.append(item)
|
||
else:
|
||
logger.warning(f"第 {i+1} 个选题缺少必需键或格式不正确: {item}")
|
||
|
||
logger.info(f"验证完成,获得 {len(valid_topics)} 个有效选题。")
|
||
return valid_topics
|
||
|
||
except (json.JSONDecodeError, ValueError) as e:
|
||
logger.error(f"解析JSON失败: {e}", exc_info=True)
|
||
return []
|
||
|
||
|
||
class TopicGenerator:
|
||
"""负责生成、解析和保存选题"""
|
||
|
||
def __init__(self, ai_agent: AIAgent, config: GenerateTopicConfig, output_manager: OutputManager):
|
||
self.ai_agent = ai_agent
|
||
self.config = config
|
||
self.output_manager = output_manager
|
||
self.prompt_builder = TopicPromptBuilder(config)
|
||
|
||
def generate(self) -> Optional[List[Dict[str, Any]]]:
|
||
"""
|
||
执行完整的选题生成流程:构建提示 -> 调用AI -> 解析结果 -> 保存产物
|
||
"""
|
||
logger.info("开始执行选题生成流程...")
|
||
|
||
# 1. 构建提示
|
||
prompts = self.prompt_builder.build_prompts()
|
||
self.output_manager.save_text(prompts["system"], "topic_system_prompt.txt")
|
||
self.output_manager.save_text(prompts["user"], "topic_user_prompt.txt")
|
||
|
||
# 2. 调用AI生成
|
||
try:
|
||
raw_result, _, _, _ = self.ai_agent.work(
|
||
system_prompt=prompts["system"],
|
||
user_prompt=prompts["user"]
|
||
)
|
||
self.output_manager.save_text(raw_result, "topics_raw_response.txt")
|
||
except Exception as e:
|
||
logger.critical(f"AI调用失败,无法生成选题: {e}", exc_info=True)
|
||
return None
|
||
|
||
# 3. 解析结果
|
||
topics = TopicParser.parse(raw_result)
|
||
if not topics:
|
||
logger.error("未能从AI响应中解析出任何有效选题")
|
||
return None
|
||
|
||
# 4. 保存解析后的结果
|
||
self.output_manager.save_json(topics, "topics_generated.json")
|
||
|
||
# 5. (可选)保存为易于阅读的 .txt 格式
|
||
topics_text = self._format_topics_to_text(topics)
|
||
self.output_manager.save_text(topics_text, "topics_generated.txt")
|
||
|
||
logger.info(f"选题生成流程成功完成,共生成 {len(topics)} 个选题。")
|
||
return topics
|
||
|
||
def _format_topics_to_text(self, topics: List[Dict[str, Any]]) -> str:
|
||
"""将选题列表格式化为人类可读的文本"""
|
||
text_parts = [f"# 选题列表 (Run ID: {self.output_manager.run_id})\n"]
|
||
for topic in topics:
|
||
text_parts.append(f"## 选题 {topic.get('index', 'N/A')}")
|
||
text_parts.append(f"- 日期: {topic.get('date', 'N/A')}")
|
||
text_parts.append(f"- 对象: {topic.get('object', 'N/A')}")
|
||
text_parts.append(f"- 产品: {topic.get('product', 'N/A')}")
|
||
text_parts.append(f"- 风格: {topic.get('style', 'N/A')}")
|
||
text_parts.append(f"- 目标受众: {topic.get('target_audience', 'N/A')}")
|
||
text_parts.append(f"- 逻辑: {topic.get('logic', 'N/A')}")
|
||
return "\n".join(text_parts)
|