TravelContentCreator/utils/tweet/topic_generator.py
2025-07-08 18:24:23 +08:00

137 lines
5.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
选题生成模块
"""
import logging
import json
import re
from typing import List, Dict, Any, Optional
from json_repair import loads as json_repair_loads
from core.ai import AIAgent
from core.config import GenerateTopicConfig
from utils.prompts import TopicPromptBuilder
from utils.file_io import OutputManager
logger = logging.getLogger(__name__)
class TopicParser:
"""解析和验证由AI模型生成的选题列表"""
@staticmethod
def parse(raw_text: str) -> List[Dict[str, Any]]:
"""
从原始文本解析、修复和验证JSON
Args:
raw_text: AI模型返回的原始字符串
Returns:
一个字典列表,每个字典代表一个有效的选题
"""
logger.info("开始解析AI生成的选题...")
# 1. 移除 <think> 块
if "</think>" in raw_text:
raw_text = raw_text.split("</think>", 1)[-1]
# 2. 移除Markdown代码块标记
cleaned_text = re.sub(r'```json\s*|\s*```', '', raw_text.strip(), flags=re.MULTILINE)
if not cleaned_text:
logger.error("移除元数据后,解析内容为空")
return []
try:
# 3. 使用json_repair修复并解析JSON
parsed_json = json_repair_loads(cleaned_text)
if not isinstance(parsed_json, list):
logger.error(f"解析结果不是列表,而是 {type(parsed_json)}")
return []
logger.info(f"成功解析 {len(parsed_json)} 个选题对象。开始验证...")
# 4. 验证和清理每个选题
valid_topics = []
required_keys = {"index", "date", "logic", "object", "product", "style", "target_audience"}
for i, item in enumerate(parsed_json):
if isinstance(item, dict) and required_keys.issubset(item.keys()):
# 可选地清理或转换字段值
valid_topics.append(item)
else:
logger.warning(f"{i+1} 个选题缺少必需键或格式不正确: {item}")
logger.info(f"验证完成,获得 {len(valid_topics)} 个有效选题。")
return valid_topics
except (json.JSONDecodeError, ValueError) as e:
logger.error(f"解析JSON失败: {e}", exc_info=True)
return []
class TopicGenerator:
"""负责生成、解析和保存选题"""
def __init__(self, ai_agent: AIAgent, config: GenerateTopicConfig, output_manager: OutputManager):
self.ai_agent = ai_agent
self.config = config
self.output_manager = output_manager
self.prompt_builder = TopicPromptBuilder(config)
def generate(self) -> Optional[List[Dict[str, Any]]]:
"""
执行完整的选题生成流程:构建提示 -> 调用AI -> 解析结果 -> 保存产物
"""
logger.info("开始执行选题生成流程...")
# 1. 构建提示
prompts = self.prompt_builder.build_prompts()
self.output_manager.save_text(prompts["system"], "topic_system_prompt.txt")
self.output_manager.save_text(prompts["user"], "topic_user_prompt.txt")
# 2. 调用AI生成
try:
raw_result, _, _, _ = self.ai_agent.work(
system_prompt=prompts["system"],
user_prompt=prompts["user"]
)
self.output_manager.save_text(raw_result, "topics_raw_response.txt")
except Exception as e:
logger.critical(f"AI调用失败无法生成选题: {e}", exc_info=True)
return None
# 3. 解析结果
topics = TopicParser.parse(raw_result)
if not topics:
logger.error("未能从AI响应中解析出任何有效选题")
return None
# 4. 保存解析后的结果
self.output_manager.save_json(topics, "topics_generated.json")
# 5. (可选)保存为易于阅读的 .txt 格式
topics_text = self._format_topics_to_text(topics)
self.output_manager.save_text(topics_text, "topics_generated.txt")
logger.info(f"选题生成流程成功完成,共生成 {len(topics)} 个选题。")
return topics
def _format_topics_to_text(self, topics: List[Dict[str, Any]]) -> str:
"""将选题列表格式化为人类可读的文本"""
text_parts = [f"# 选题列表 (Run ID: {self.output_manager.run_id})\n"]
for topic in topics:
text_parts.append(f"## 选题 {topic.get('index', 'N/A')}")
text_parts.append(f"- 日期: {topic.get('date', 'N/A')}")
text_parts.append(f"- 对象: {topic.get('object', 'N/A')}")
text_parts.append(f"- 产品: {topic.get('product', 'N/A')}")
text_parts.append(f"- 风格: {topic.get('style', 'N/A')}")
text_parts.append(f"- 目标受众: {topic.get('target_audience', 'N/A')}")
text_parts.append(f"- 逻辑: {topic.get('logic', 'N/A')}")
return "\n".join(text_parts)