TravelContentCreator/tweet/topic_parser.py

59 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AI响应解析器模块
"""
import logging
import json
from typing import List, Dict, Any
from utils.file_io import process_llm_json_text
logger = logging.getLogger(__name__)
class TopicParser:
"""
解析和验证由AI模型生成的选题列表
"""
@staticmethod
def parse(raw_text: str) -> List[Dict[str, Any]]:
"""
从原始文本解析、修复和验证JSON
Args:
raw_text: AI模型返回的原始字符串
Returns:
一个字典列表,每个字典代表一个有效的选题
"""
logger.info("开始解析AI生成的选题...")
# 使用通用JSON解析函数解析原始文本
parsed_json = process_llm_json_text(raw_text)
if not parsed_json:
logger.error("解析AI响应失败无法获取JSON数据")
return []
if not isinstance(parsed_json, list):
logger.error(f"解析结果不是列表,而是 {type(parsed_json)}")
return []
logger.info(f"成功解析 {len(parsed_json)} 个选题对象。开始验证...")
# 验证每个选题是否包含所有必需的键
valid_topics = []
required_keys = {"index", "date", "logic", "object", "product", "style", "targetAudience"}
optional_keys = {"productLogic", "styleLogic", "targetAudienceLogic"}
for i, item in enumerate(parsed_json):
if isinstance(item, dict) and required_keys.issubset(item.keys()):
valid_topics.append(item)
else:
logger.warning(f"{i+1} 个选题缺少必需键或格式不正确: {item}")
logger.info(f"验证完成,获得 {len(valid_topics)} 个有效选题。")
return valid_topics