更改为全读取,加载的时候随机采样
This commit is contained in:
parent
bbaa6eee53
commit
45969c0189
@ -113,10 +113,32 @@ class PromptManager:
|
|||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
filename = os.path.basename(path)
|
filename = os.path.basename(path)
|
||||||
# print(filename)
|
# print(filename)
|
||||||
|
# 检测是否为JSON文件
|
||||||
|
if filename.lower().endswith('.json'):
|
||||||
|
# 直接加载和解析JSON文件
|
||||||
|
try:
|
||||||
|
with open(path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
# 移除可能的BOM头
|
||||||
|
if content.startswith('\ufeff'):
|
||||||
|
content = content[1:]
|
||||||
|
# 解析JSON
|
||||||
|
json_data = json.loads(content)
|
||||||
|
# 存储解析后的对象
|
||||||
|
self._refer_cache[filename] = json_data
|
||||||
|
logging.info(f"预加载并解析JSON参考文件: {filename}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"预加载JSON文件{filename}失败: {str(e)}")
|
||||||
|
# 失败时尝试以普通文本加载
|
||||||
|
content = ResourceLoader.load_file_content(path)
|
||||||
|
if content:
|
||||||
|
self._refer_cache[filename] = content
|
||||||
|
else:
|
||||||
|
# 非JSON文件使用普通加载方式
|
||||||
content = ResourceLoader.load_all_refer_files(path, 1)
|
content = ResourceLoader.load_all_refer_files(path, 1)
|
||||||
if content:
|
if content:
|
||||||
self._refer_cache[filename] = content
|
self._refer_cache[filename] = content
|
||||||
# print(content)
|
logging.info(f"预加载普通参考文件: {filename}")
|
||||||
|
|
||||||
def find_directory_fuzzy_match(self, name, directory=None, files=None):
|
def find_directory_fuzzy_match(self, name, directory=None, files=None):
|
||||||
"""
|
"""
|
||||||
@ -304,6 +326,73 @@ class PromptManager:
|
|||||||
logging.warning(f"未能找到Demand文件: '{demand_name}',尝试过以下位置: 缓存, {self.prompts_dir}/Demand/")
|
logging.warning(f"未能找到Demand文件: '{demand_name}',尝试过以下位置: 缓存, {self.prompts_dir}/Demand/")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _get_refer_content(self, refer_dir):
|
||||||
|
"""从Refer目录加载JSON和文本参考文件
|
||||||
|
|
||||||
|
Args:
|
||||||
|
refer_dir: Refer目录路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: 文件名到内容的映射
|
||||||
|
"""
|
||||||
|
results = {}
|
||||||
|
if not os.path.isdir(refer_dir):
|
||||||
|
return results
|
||||||
|
|
||||||
|
# 列出所有文件
|
||||||
|
refer_files = [f for f in os.listdir(refer_dir) if os.path.isfile(os.path.join(refer_dir, f))]
|
||||||
|
|
||||||
|
for refer_file in refer_files:
|
||||||
|
refer_path = os.path.join(refer_dir, refer_file)
|
||||||
|
content = None
|
||||||
|
|
||||||
|
# 对JSON文件特殊处理
|
||||||
|
if refer_file.lower().endswith('.json'):
|
||||||
|
try:
|
||||||
|
with open(refer_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
# 移除可能的BOM头
|
||||||
|
if content.startswith('\ufeff'):
|
||||||
|
content = content[1:]
|
||||||
|
|
||||||
|
# 验证是否为有效的JSON
|
||||||
|
try:
|
||||||
|
json.loads(content) # 只验证,不存储结果
|
||||||
|
logging.info(f"成功验证JSON文件: {refer_file}")
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logging.warning(f"文件{refer_file}内容不是有效的JSON: {str(e)}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"读取JSON文件{refer_file}时出错: {str(e)}")
|
||||||
|
content = None
|
||||||
|
else:
|
||||||
|
# 使用ResourceLoader加载非JSON文件
|
||||||
|
content = ResourceLoader.load_file_content(refer_path)
|
||||||
|
|
||||||
|
if content:
|
||||||
|
results[refer_file] = content
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _load_and_parse_json(self, file_path):
|
||||||
|
"""安全地加载和解析JSON文件
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: JSON文件路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict or None: 解析后的JSON对象,解析失败则返回None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
# 移除可能的BOM头
|
||||||
|
if content.startswith('\ufeff'):
|
||||||
|
content = content[1:]
|
||||||
|
return json.loads(content)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"加载和解析JSON文件{file_path}时出错: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _get_all_refer_contents(self, random_sample=True):
|
def _get_all_refer_contents(self, random_sample=True):
|
||||||
"""获取所有Refer文件内容,可选择随机抽样文件内容
|
"""获取所有Refer文件内容,可选择随机抽样文件内容
|
||||||
|
|
||||||
@ -318,56 +407,55 @@ class PromptManager:
|
|||||||
# 初始化结果字符串
|
# 初始化结果字符串
|
||||||
refer_content_all = ""
|
refer_content_all = ""
|
||||||
|
|
||||||
# 准备处理所有文件
|
# 如果缓存为空,尝试加载文件
|
||||||
all_refer_files = {}
|
if not self._refer_cache and self.prompts_dir:
|
||||||
|
|
||||||
# 1. 从缓存中获取文件
|
|
||||||
if self._refer_cache:
|
|
||||||
all_refer_files.update(self._refer_cache)
|
|
||||||
|
|
||||||
# 2. 从本地目录获取其他文件(如果有的话)
|
|
||||||
if self.prompts_dir:
|
|
||||||
refer_dir = os.path.join(self.prompts_dir, "Refer")
|
refer_dir = os.path.join(self.prompts_dir, "Refer")
|
||||||
if os.path.isdir(refer_dir):
|
if os.path.isdir(refer_dir):
|
||||||
dir_files = [f for f in os.listdir(refer_dir) if os.path.isfile(os.path.join(refer_dir, f))]
|
refer_files = [f for f in os.listdir(refer_dir) if os.path.isfile(os.path.join(refer_dir, f))]
|
||||||
for refer_file in dir_files:
|
for refer_file in refer_files:
|
||||||
if refer_file not in all_refer_files:
|
file_path = os.path.join(refer_dir, refer_file)
|
||||||
refer_path = os.path.join(refer_dir, refer_file)
|
|
||||||
content = ResourceLoader.load_file_content(refer_path)
|
# 对JSON文件特殊处理
|
||||||
|
if refer_file.lower().endswith('.json'):
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
# 移除可能的BOM头
|
||||||
|
if content.startswith('\ufeff'):
|
||||||
|
content = content[1:]
|
||||||
|
# 解析并存储JSON对象
|
||||||
|
json_data = json.loads(content)
|
||||||
|
self._refer_cache[refer_file] = json_data
|
||||||
|
logging.info(f"加载并解析JSON参考文件: {refer_file}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"加载JSON文件{refer_file}失败: {str(e)}")
|
||||||
|
# 失败时尝试以普通文本加载
|
||||||
|
content = ResourceLoader.load_file_content(file_path)
|
||||||
|
if content:
|
||||||
|
self._refer_cache[refer_file] = content
|
||||||
|
else:
|
||||||
|
# 非JSON文件使用普通加载方式
|
||||||
|
content = ResourceLoader.load_file_content(file_path)
|
||||||
if content:
|
if content:
|
||||||
all_refer_files[refer_file] = content
|
|
||||||
# 保存到缓存
|
|
||||||
self._refer_cache[refer_file] = content
|
self._refer_cache[refer_file] = content
|
||||||
|
|
||||||
if not all_refer_files:
|
if not self._refer_cache:
|
||||||
logging.warning("没有找到任何Refer文件")
|
logging.warning("没有找到任何Refer文件")
|
||||||
return refer_content_all
|
return refer_content_all
|
||||||
|
|
||||||
logging.info(f"找到{len(all_refer_files)}个Refer文件")
|
logging.info(f"找到{len(self._refer_cache)}个Refer文件")
|
||||||
|
|
||||||
# 3. 处理所有文件
|
|
||||||
for filename, content in all_refer_files.items():
|
|
||||||
# 检查是否为JSON格式,尝试解析
|
|
||||||
is_json = False
|
|
||||||
json_data = None
|
|
||||||
|
|
||||||
if filename.lower().endswith('.json'):
|
|
||||||
try:
|
|
||||||
json_data = json.loads(content)
|
|
||||||
is_json = True
|
|
||||||
logging.info(f"成功解析JSON格式的refer文件: {filename}")
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logging.warning(f"文件{filename}扩展名为.json但内容不是有效的JSON格式: {str(e)}")
|
|
||||||
logging.info(f"将以文本格式处理文件: {filename}")
|
|
||||||
|
|
||||||
|
# 处理所有文件
|
||||||
|
for filename, content in self._refer_cache.items():
|
||||||
# 添加文件头部信息
|
# 添加文件头部信息
|
||||||
refer_content_all += f"--- Refer File: {filename} ---\n"
|
refer_content_all += f"--- Refer File: {filename} ---\n"
|
||||||
|
|
||||||
if is_json and json_data:
|
# 检查内容类型
|
||||||
# 处理JSON格式文件
|
if isinstance(content, dict) and 'title' in content and 'examples' in content:
|
||||||
title = json_data.get("title", "未命名参考资料")
|
# 已解析的JSON对象
|
||||||
description = json_data.get("description", "")
|
title = content.get("title", "未命名参考资料")
|
||||||
examples = json_data.get("examples", [])
|
description = content.get("description", "")
|
||||||
|
examples = content.get("examples", [])
|
||||||
|
|
||||||
refer_content_all += f"标题: {title}\n"
|
refer_content_all += f"标题: {title}\n"
|
||||||
refer_content_all += f"描述: {description}\n\n"
|
refer_content_all += f"描述: {description}\n\n"
|
||||||
@ -390,8 +478,8 @@ class PromptManager:
|
|||||||
for idx, example in enumerate(examples, 1):
|
for idx, example in enumerate(examples, 1):
|
||||||
content_text = example.get("content", "")
|
content_text = example.get("content", "")
|
||||||
refer_content_all += f"{idx}. {content_text}\n"
|
refer_content_all += f"{idx}. {content_text}\n"
|
||||||
else:
|
elif isinstance(content, str):
|
||||||
# 处理普通文本文件
|
# 文本内容
|
||||||
if random_sample:
|
if random_sample:
|
||||||
lines = content.split('\n')
|
lines = content.split('\n')
|
||||||
if len(lines) > 10: # 只对较长的内容进行抽样
|
if len(lines) > 10: # 只对较长的内容进行抽样
|
||||||
@ -408,6 +496,9 @@ class PromptManager:
|
|||||||
else:
|
else:
|
||||||
# 不进行抽样
|
# 不进行抽样
|
||||||
refer_content_all += f"{content}\n"
|
refer_content_all += f"{content}\n"
|
||||||
|
else:
|
||||||
|
# 内容是其他类型
|
||||||
|
refer_content_all += f"未知内容类型: {type(content)}\n"
|
||||||
|
|
||||||
# 添加文件之间的分隔
|
# 添加文件之间的分隔
|
||||||
refer_content_all += "\n"
|
refer_content_all += "\n"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user