2025-07-08 18:24:23 +08:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
文件输入输出工具模块
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
import random
|
|
|
|
|
|
import json
|
2025-07-10 16:10:14 +08:00
|
|
|
|
import json_repair
|
2025-07-08 18:24:23 +08:00
|
|
|
|
import logging
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from typing import Optional, List, Dict, Any
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ResourceLoader:
|
|
|
|
|
|
"""资源加载器,用于加载文件内容"""
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def load_text_file(file_path: str) -> Optional[str]:
|
|
|
|
|
|
"""加载文本文件内容"""
|
|
|
|
|
|
if not os.path.exists(file_path):
|
|
|
|
|
|
logger.warning(f"文件不存在: {file_path}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
|
return f.read()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"加载文件 '{file_path}' 失败: {e}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def load_json_file(file_path: str) -> Optional[Dict[str, Any]]:
|
|
|
|
|
|
"""加载并解析JSON文件"""
|
|
|
|
|
|
content = ResourceLoader.load_text_file(file_path)
|
|
|
|
|
|
if content is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
try:
|
2025-07-10 16:10:14 +08:00
|
|
|
|
return json_repair.loads(content)
|
2025-07-08 18:24:23 +08:00
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
|
logger.error(f"解析JSON文件 '{file_path}' 失败: {e}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def find_file(directory: str, file_name: str, exact_match: bool = True) -> Optional[str]:
|
|
|
|
|
|
"""在目录中查找文件,支持精确和模糊匹配"""
|
|
|
|
|
|
if not os.path.isdir(directory):
|
|
|
|
|
|
logger.warning(f"目录不存在: {directory}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# 确保文件名有.txt后缀,如果需要
|
|
|
|
|
|
base, ext = os.path.splitext(file_name)
|
|
|
|
|
|
if not ext:
|
|
|
|
|
|
file_name_with_ext = f"{base}.txt"
|
|
|
|
|
|
else:
|
|
|
|
|
|
file_name_with_ext = file_name
|
|
|
|
|
|
|
|
|
|
|
|
# 精确匹配
|
|
|
|
|
|
exact_path = os.path.join(directory, file_name_with_ext)
|
|
|
|
|
|
if os.path.exists(exact_path):
|
|
|
|
|
|
return exact_path
|
|
|
|
|
|
|
|
|
|
|
|
# 模糊匹配
|
|
|
|
|
|
if not exact_match:
|
|
|
|
|
|
for f in os.listdir(directory):
|
|
|
|
|
|
if base in f:
|
|
|
|
|
|
return os.path.join(directory, f)
|
|
|
|
|
|
|
|
|
|
|
|
logger.warning(f"在 '{directory}' 中找不到文件 '{file_name}'")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OutputManager:
|
|
|
|
|
|
"""负责处理输出文件,如保存文章和生成汇总报告"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, output_dir: str, run_id: str):
|
|
|
|
|
|
self.base_output_dir = Path(output_dir)
|
|
|
|
|
|
self.run_id = run_id
|
|
|
|
|
|
self.run_dir = self.base_output_dir / self.run_id
|
|
|
|
|
|
self.run_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
logger.info(f"OutputManager initialized for run '{run_id}' in '{self.run_dir}'")
|
|
|
|
|
|
|
2025-07-09 11:51:49 +08:00
|
|
|
|
def get_topic_dir(self, topic_index: Any) -> Path:
|
|
|
|
|
|
"""为给定主题索引创建并返回一个唯一的目录"""
|
|
|
|
|
|
topic_dir = self.run_dir / f"topic_{topic_index}"
|
|
|
|
|
|
topic_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
return topic_dir
|
|
|
|
|
|
|
2025-07-08 18:24:23 +08:00
|
|
|
|
def get_variant_dir(self, topic_index: int, variant_index: int) -> Path:
|
|
|
|
|
|
"""获取并创建特定变体的目录"""
|
|
|
|
|
|
variant_dir = self.run_dir / f"{topic_index}_{variant_index}"
|
|
|
|
|
|
variant_dir.mkdir(exist_ok=True)
|
|
|
|
|
|
return variant_dir
|
|
|
|
|
|
|
|
|
|
|
|
def save_json(self, data: Any, filename: str, subdir: Optional[str] = None):
|
|
|
|
|
|
"""将数据保存为JSON文件"""
|
|
|
|
|
|
target_dir = self.run_dir / subdir if subdir else self.run_dir
|
|
|
|
|
|
target_dir.mkdir(exist_ok=True)
|
|
|
|
|
|
file_path = target_dir / filename
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
|
|
|
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
logger.info(f"JSON data saved to: {file_path}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to save JSON to {file_path}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def save_text(self, content: str, filename: str, subdir: Optional[str] = None):
|
|
|
|
|
|
"""将文本内容保存为文件"""
|
|
|
|
|
|
target_dir = self.run_dir / subdir if subdir else self.run_dir
|
|
|
|
|
|
target_dir.mkdir(exist_ok=True)
|
|
|
|
|
|
file_path = target_dir / filename
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
|
|
|
|
f.write(content)
|
|
|
|
|
|
logger.info(f"Text data saved to: {file_path}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to save text to {file_path}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def save_image(self, image_data, filename: str, subdir: Optional[str] = None):
|
|
|
|
|
|
"""保存图像文件 (需要Pillow库)"""
|
|
|
|
|
|
target_dir = self.run_dir / subdir if subdir else self.run_dir
|
|
|
|
|
|
target_dir.mkdir(exist_ok=True)
|
|
|
|
|
|
file_path = target_dir / filename
|
|
|
|
|
|
try:
|
|
|
|
|
|
image_data.save(file_path)
|
|
|
|
|
|
logger.info(f"Image saved to: {file_path}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Failed to save image to {file_path}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
def finalize(self):
|
|
|
|
|
|
"""完成运行的最终操作"""
|
|
|
|
|
|
logger.info(f"Finalizing run: {self.run_id}")
|
|
|
|
|
|
# 目前没有特殊操作,但可以用于未来的扩展,如创建清单文件
|
2025-07-10 16:10:14 +08:00
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_llm_json_text(text: Any) -> Optional[Dict[str, Any]]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
处理LLM返回的JSON字符串,支持多种格式提取:
|
|
|
|
|
|
1. 提取</think>后的内容
|
|
|
|
|
|
2. 提取```json和```之间的内容
|
|
|
|
|
|
3. 尝试直接解析整个文本
|
|
|
|
|
|
4. 使用json_repair修复格式问题
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
text: LLM返回的原始文本或已解析的对象
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
解析后的JSON对象,解析失败则返回None
|
|
|
|
|
|
"""
|
|
|
|
|
|
# 如果输入已经是字典类型,直接返回
|
|
|
|
|
|
if isinstance(text, dict):
|
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
# 如果输入是列表类型,且要求返回字典,则返回None
|
|
|
|
|
|
if isinstance(text, list):
|
|
|
|
|
|
logger.warning("输入是列表类型,但期望返回字典类型")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# 确保输入是字符串类型
|
|
|
|
|
|
if not isinstance(text, str):
|
|
|
|
|
|
try:
|
|
|
|
|
|
text = str(text)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"无法将输入转换为字符串: {e}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
if not text or not text.strip():
|
|
|
|
|
|
logger.warning("收到空的LLM响应")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# 存储可能的JSON文本
|
|
|
|
|
|
json_candidates = []
|
|
|
|
|
|
|
|
|
|
|
|
# 1. 尝试提取</think>后的内容
|
|
|
|
|
|
if "</think>" in text:
|
|
|
|
|
|
think_parts = text.split("</think>", 1)
|
|
|
|
|
|
if len(think_parts) > 1:
|
|
|
|
|
|
json_candidates.append(think_parts[1].strip())
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 尝试提取```json和```之间的内容
|
|
|
|
|
|
json_code_blocks = []
|
|
|
|
|
|
# 匹配```json和```之间的内容
|
|
|
|
|
|
import re
|
|
|
|
|
|
json_blocks = re.findall(r"```(?:json)?\s*([\s\S]*?)```", text)
|
|
|
|
|
|
if json_blocks:
|
|
|
|
|
|
json_candidates.extend([block.strip() for block in json_blocks])
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 直接使用json_repair解析
|
|
|
|
|
|
try:
|
|
|
|
|
|
return json_repair.loads(text)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# 4. 添加原始文本作为候选
|
|
|
|
|
|
json_candidates.append(text.strip())
|
|
|
|
|
|
|
|
|
|
|
|
# 尝试解析每个候选文本
|
|
|
|
|
|
for candidate in json_candidates:
|
|
|
|
|
|
# 直接尝试解析
|
|
|
|
|
|
try:
|
|
|
|
|
|
return json.loads(candidate)
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# 使用json_repair尝试修复
|
|
|
|
|
|
try:
|
|
|
|
|
|
return json_repair.loads(candidate)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# 所有尝试都失败,记录错误并返回None
|
|
|
|
|
|
logger.error(f"无法解析LLM返回的JSON,尝试了{len(json_candidates)}种提取方式")
|
|
|
|
|
|
logger.debug(f"原始响应: {text[:200]}...") # 只记录前200个字符避免日志过大
|
|
|
|
|
|
return None
|