71 lines
2.3 KiB
Python
71 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
AI响应解析器模块
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import re
|
|
from typing import List, Dict, Any
|
|
from json_repair import loads as json_repair_loads
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TopicParser:
|
|
"""
|
|
解析和验证由AI模型生成的选题列表
|
|
"""
|
|
|
|
@staticmethod
|
|
def parse(raw_text: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
从原始文本解析、修复和验证JSON
|
|
|
|
Args:
|
|
raw_text: AI模型返回的原始字符串
|
|
|
|
Returns:
|
|
一个字典列表,每个字典代表一个有效的选题
|
|
"""
|
|
logger.info("开始解析AI生成的选题...")
|
|
|
|
# 1. 移除AI思考过程的 <think> 块
|
|
if "</think>" in raw_text:
|
|
raw_text = raw_text.split("</think>", 1)[-1]
|
|
|
|
# 2. 移除Markdown代码块标记 (e.g., ```json ... ```)
|
|
cleaned_text = re.sub(r'```json\s*|\s*```', '', raw_text.strip(), flags=re.MULTILINE)
|
|
|
|
if not cleaned_text:
|
|
logger.error("移除元数据后,解析内容为空")
|
|
return []
|
|
|
|
try:
|
|
# 3. 使用json_repair修复可能不规范的JSON并解析
|
|
parsed_json = json_repair_loads(cleaned_text)
|
|
|
|
if not isinstance(parsed_json, list):
|
|
logger.error(f"解析结果不是列表,而是 {type(parsed_json)}")
|
|
return []
|
|
|
|
logger.info(f"成功解析 {len(parsed_json)} 个选题对象。开始验证...")
|
|
|
|
# 4. 验证每个选题是否包含所有必需的键
|
|
valid_topics = []
|
|
required_keys = {"index", "date", "logic", "object", "product", "style", "target_audience"}
|
|
|
|
for i, item in enumerate(parsed_json):
|
|
if isinstance(item, dict) and required_keys.issubset(item.keys()):
|
|
valid_topics.append(item)
|
|
else:
|
|
logger.warning(f"第 {i+1} 个选题缺少必需键或格式不正确: {item}")
|
|
|
|
logger.info(f"验证完成,获得 {len(valid_topics)} 个有效选题。")
|
|
return valid_topics
|
|
|
|
except (json.JSONDecodeError, ValueError) as e:
|
|
logger.error(f"解析JSON失败: {e}", exc_info=True)
|
|
return [] |