771 lines
37 KiB
Python
771 lines
37 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Manages the construction of prompts for different AI generation tasks.
|
||
"""
|
||
|
||
import os
|
||
import traceback
|
||
import logging # Add logging
|
||
import re # 添加正则表达式支持
|
||
import random # 添加随机模块支持
|
||
from .resource_loader import ResourceLoader # Use relative import within the same package
|
||
import json
|
||
|
||
class PromptManager:
|
||
"""Handles the loading and construction of prompts."""
|
||
|
||
def __init__(self,
|
||
topic_system_prompt_path: str,
|
||
topic_user_prompt_path: str,
|
||
content_system_prompt_path: str,
|
||
prompts_dir: str = None, # 兼容旧配置
|
||
prompts_config: list = None, # 新的配置方式
|
||
resource_dir_config: list = None,
|
||
topic_gen_num: int = 1, # Default values if needed
|
||
topic_gen_date: str = "",
|
||
content_judger_system_prompt_path: str = None # 添加内容审核系统提示词路径参数
|
||
):
|
||
self.topic_system_prompt_path = topic_system_prompt_path
|
||
self.topic_user_prompt_path = topic_user_prompt_path
|
||
self.content_system_prompt_path = content_system_prompt_path
|
||
self.content_judger_system_prompt_path = content_judger_system_prompt_path # 添加成员变量
|
||
self.prompts_dir = prompts_dir # 保留兼容旧配置
|
||
self.prompts_config = prompts_config or [] # 新的配置方式
|
||
self.resource_dir_config = resource_dir_config or []
|
||
self.topic_gen_num = topic_gen_num
|
||
self.topic_gen_date = topic_gen_date
|
||
|
||
# 缓存加载的文件内容
|
||
self._style_cache = {}
|
||
self._demand_cache = {}
|
||
self._refer_cache = {}
|
||
self._system_prompt_cache = {} # 新增:系统提示词缓存
|
||
self._user_prompt_cache = {} # 新增:用户提示词缓存
|
||
self._dateline_cache = None # 新增:日期线缓存
|
||
|
||
self._sample_rate = 0.1 # 提高随机抽样率
|
||
# 初始化时预加载配置的文件
|
||
self._preload_prompt_files()
|
||
|
||
def _preload_prompt_files(self):
|
||
"""预加载配置中的提示文件到缓存"""
|
||
# 预加载系统提示词和用户提示词文件
|
||
if self.topic_system_prompt_path and os.path.exists(self.topic_system_prompt_path):
|
||
content = ResourceLoader.load_file_content(self.topic_system_prompt_path)
|
||
if content:
|
||
self._system_prompt_cache["topic"] = content
|
||
logging.info(f"预加载系统提示词: {self.topic_system_prompt_path}")
|
||
|
||
if self.topic_user_prompt_path and os.path.exists(self.topic_user_prompt_path):
|
||
content = ResourceLoader.load_file_content(self.topic_user_prompt_path)
|
||
if content:
|
||
self._user_prompt_cache["topic"] = content
|
||
logging.info(f"预加载用户提示词: {self.topic_user_prompt_path}")
|
||
|
||
if self.content_system_prompt_path and os.path.exists(self.content_system_prompt_path):
|
||
content = ResourceLoader.load_file_content(self.content_system_prompt_path)
|
||
if content:
|
||
self._system_prompt_cache["content"] = content
|
||
logging.info(f"预加载内容系统提示词: {self.content_system_prompt_path}")
|
||
|
||
# 预加载内容审核系统提示词
|
||
if self.content_judger_system_prompt_path and os.path.exists(self.content_judger_system_prompt_path):
|
||
content = ResourceLoader.load_file_content(self.content_judger_system_prompt_path)
|
||
if content:
|
||
self._system_prompt_cache["judger_system_prompt"] = content
|
||
logging.info(f"预加载内容审核系统提示词: {self.content_judger_system_prompt_path}")
|
||
|
||
# 预加载日期线文件
|
||
if self.topic_user_prompt_path:
|
||
user_prompt_dir = os.path.dirname(self.topic_user_prompt_path)
|
||
dateline_path = os.path.join(user_prompt_dir, "2025各月节日宣传节点时间表.md")
|
||
if os.path.exists(dateline_path):
|
||
self._dateline_cache = ResourceLoader.load_file_content(dateline_path)
|
||
logging.info(f"预加载日期线文件: {dateline_path}")
|
||
|
||
|
||
# 加载prompts_config配置的文件
|
||
if not self.prompts_config:
|
||
return
|
||
|
||
for config_item in self.prompts_config:
|
||
prompt_type = config_item.get("type", "").lower()
|
||
file_paths = config_item.get("file_path", [])
|
||
|
||
if prompt_type == "style":
|
||
for path in file_paths:
|
||
if os.path.exists(path):
|
||
filename = os.path.basename(path)
|
||
content = ResourceLoader.load_file_content(path)
|
||
if content:
|
||
self._style_cache[filename] = content
|
||
name_without_ext = os.path.splitext(filename)[0]
|
||
self._style_cache[name_without_ext] = content # 同时缓存不带扩展名的版本
|
||
|
||
elif prompt_type == "demand":
|
||
for path in file_paths:
|
||
# print(path)
|
||
if os.path.exists(path):
|
||
filename = os.path.basename(path)
|
||
# print(filename)
|
||
content = ResourceLoader.load_file_content(path)
|
||
# print(content)
|
||
if content:
|
||
self._demand_cache[filename] = content
|
||
name_without_ext = os.path.splitext(filename)[0]
|
||
self._demand_cache[name_without_ext] = content # 同时缓存不带扩展名的版本
|
||
|
||
elif prompt_type == "refer":
|
||
for path in file_paths:
|
||
# print(path)
|
||
if os.path.exists(path):
|
||
filename = os.path.basename(path)
|
||
# print(filename)
|
||
# 检测是否为JSON文件
|
||
if filename.lower().endswith('.json'):
|
||
# 直接加载和解析JSON文件
|
||
try:
|
||
with open(path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
# 移除可能的BOM头
|
||
if content.startswith('\ufeff'):
|
||
content = content[1:]
|
||
# 解析JSON
|
||
json_data = json.loads(content)
|
||
# 存储解析后的对象
|
||
self._refer_cache[filename] = json_data
|
||
logging.info(f"预加载并解析JSON参考文件: {filename}")
|
||
except Exception as e:
|
||
logging.error(f"预加载JSON文件{filename}失败: {str(e)}")
|
||
# 失败时尝试以普通文本加载
|
||
content = ResourceLoader.load_file_content(path)
|
||
if content:
|
||
self._refer_cache[filename] = content
|
||
else:
|
||
# 非JSON文件使用普通加载方式
|
||
content = ResourceLoader.load_all_refer_files(path, 1)
|
||
if content:
|
||
self._refer_cache[filename] = content
|
||
logging.info(f"预加载普通参考文件: {filename}")
|
||
|
||
def find_directory_fuzzy_match(self, name, directory=None, files=None):
|
||
"""
|
||
对文件名进行模糊匹配,查找最匹配目标名称的文件
|
||
|
||
Args:
|
||
name: 目标名称
|
||
directory: 目录路径,如果提供则从目录中读取文件列表
|
||
files: 文件名列表,如果提供则直接使用
|
||
|
||
Returns:
|
||
tuple: (最佳匹配文件名, 匹配分数) 如果没有匹配则返回 (None, 0)
|
||
"""
|
||
logging.info(f"尝试对文件名进行模糊匹配: {name}")
|
||
|
||
try:
|
||
# 准备文件列表
|
||
all_files = []
|
||
if files:
|
||
all_files = files
|
||
elif directory and os.path.isdir(directory):
|
||
all_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
|
||
|
||
if not all_files:
|
||
logging.warning(f"没有可用于匹配的文件")
|
||
return None, 0
|
||
|
||
logging.info(f"找到 {len(all_files)} 个文件可用于模糊匹配")
|
||
|
||
# 从名称中提取关键词
|
||
# 通过常见分隔符分割(+、空格、_、-等)
|
||
parts = re.split(r'[+\s_\-]', name)
|
||
keywords = []
|
||
for part in parts:
|
||
# 只保留长度大于1的有意义关键词
|
||
if len(part) > 1:
|
||
keywords.append(part)
|
||
|
||
# 尝试匹配更短的语义单元(例如中文的2-3个字的词语)
|
||
for i in range(len(name) - 1):
|
||
keyword = name[i:i+2] # 提取2个字符
|
||
if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
|
||
keywords.append(keyword)
|
||
|
||
logging.info(f"用于文件模糊匹配的关键词: {keywords}")
|
||
|
||
# 对每个文件进行评分
|
||
file_scores = {}
|
||
for filename in all_files:
|
||
score = 0
|
||
file_lower = filename.lower()
|
||
|
||
# 精确匹配,去掉扩展名比较
|
||
name_without_ext = os.path.splitext(name)[0].lower()
|
||
file_without_ext = os.path.splitext(filename)[0].lower()
|
||
|
||
if name_without_ext == file_without_ext:
|
||
# 精确匹配给高分
|
||
score += 10
|
||
else:
|
||
# 为每个匹配的关键词增加分数
|
||
for keyword in keywords:
|
||
if keyword.lower() in file_lower:
|
||
score += 1
|
||
|
||
# 如果得分大于0(至少有匹配),记录该文件
|
||
if score > 0:
|
||
file_scores[filename] = score
|
||
|
||
# 选择得分最高的文件
|
||
if file_scores:
|
||
best_match = max(file_scores.items(), key=lambda x: x[1])
|
||
found_file = best_match[0]
|
||
score = best_match[1]
|
||
logging.info(f"模糊匹配成功!匹配文件: {found_file},匹配分数: {score}")
|
||
return found_file, score
|
||
else:
|
||
logging.warning(f"模糊匹配未找到任何包含关键词的文件")
|
||
return None, 0
|
||
|
||
except Exception as e:
|
||
logging.exception(f"文件模糊匹配过程中出错: {e}")
|
||
return None, 0
|
||
|
||
def _get_style_content(self, style_name):
|
||
"""获取Style文件内容,优先从缓存获取,如果不存在则尝试从目录加载"""
|
||
# 首先检查缓存
|
||
if style_name in self._style_cache:
|
||
return self._style_cache[style_name]
|
||
|
||
# 确保有扩展名
|
||
if not style_name.lower().endswith('.txt'):
|
||
style_file = f"{style_name}.txt"
|
||
else:
|
||
style_file = style_name
|
||
style_name = os.path.splitext(style_name)[0] # 移除扩展名
|
||
|
||
# 尝试模糊匹配缓存中的文件名
|
||
cache_files = list(self._style_cache.keys())
|
||
matched_key, score = self.find_directory_fuzzy_match(style_name, files=cache_files)
|
||
|
||
if matched_key and score > 0:
|
||
return self._style_cache[matched_key]
|
||
|
||
# 如果没有在缓存中找到模糊匹配,尝试从prompts_dir加载
|
||
if self.prompts_dir:
|
||
style_dir = os.path.join(self.prompts_dir, "Style")
|
||
style_path = os.path.join(style_dir, style_file)
|
||
|
||
# 精确路径匹配
|
||
if os.path.exists(style_path):
|
||
content = ResourceLoader.load_file_content(style_path)
|
||
if content:
|
||
# 保存到缓存
|
||
self._style_cache[style_name] = content
|
||
self._style_cache[style_file] = content
|
||
return content
|
||
|
||
# 如果精确匹配失败,尝试目录中的模糊匹配
|
||
if os.path.isdir(style_dir):
|
||
matched_file, score = self.find_directory_fuzzy_match(style_name, directory=style_dir)
|
||
if matched_file and score > 0:
|
||
matched_path = os.path.join(style_dir, matched_file)
|
||
content = ResourceLoader.load_file_content(matched_path)
|
||
if content:
|
||
# 保存到缓存
|
||
self._style_cache[style_name] = content
|
||
self._style_cache[matched_file] = content
|
||
file_without_ext = os.path.splitext(matched_file)[0]
|
||
self._style_cache[file_without_ext] = content # 同时缓存不带扩展名的版本
|
||
return content
|
||
|
||
logging.warning(f"未能找到Style文件: '{style_name}',尝试过以下位置: 缓存, {self.prompts_dir}/Style/")
|
||
return None
|
||
|
||
def _get_demand_content(self, demand_name):
|
||
"""获取Demand文件内容,优先从缓存获取,如果不存在则尝试从目录加载"""
|
||
# 首先检查缓存
|
||
if demand_name in self._demand_cache:
|
||
return self._demand_cache[demand_name]
|
||
|
||
# 确保有扩展名
|
||
if not demand_name.lower().endswith('.txt'):
|
||
demand_file = f"{demand_name}.txt"
|
||
else:
|
||
demand_file = demand_name
|
||
demand_name = os.path.splitext(demand_name)[0] # 移除扩展名
|
||
|
||
# 尝试模糊匹配缓存中的文件名
|
||
cache_files = list(self._demand_cache.keys())
|
||
matched_key, score = self.find_directory_fuzzy_match(demand_name, files=cache_files)
|
||
|
||
if matched_key and score > 0:
|
||
return self._demand_cache[matched_key]
|
||
|
||
# 如果没有在缓存中找到模糊匹配,尝试从prompts_dir加载(向后兼容)
|
||
if self.prompts_dir:
|
||
demand_dir = os.path.join(self.prompts_dir, "Demand")
|
||
demand_path = os.path.join(demand_dir, demand_file)
|
||
|
||
# 精确路径匹配
|
||
if os.path.exists(demand_path):
|
||
content = ResourceLoader.load_file_content(demand_path)
|
||
if content:
|
||
# 保存到缓存
|
||
self._demand_cache[demand_name] = content
|
||
self._demand_cache[demand_file] = content
|
||
return content
|
||
|
||
# 如果精确匹配失败,尝试目录中的模糊匹配
|
||
if os.path.isdir(demand_dir):
|
||
matched_file, score = self.find_directory_fuzzy_match(demand_name, directory=demand_dir)
|
||
if matched_file and score > 0:
|
||
matched_path = os.path.join(demand_dir, matched_file)
|
||
content = ResourceLoader.load_file_content(matched_path)
|
||
if content:
|
||
# 保存到缓存
|
||
self._demand_cache[demand_name] = content
|
||
self._demand_cache[matched_file] = content
|
||
file_without_ext = os.path.splitext(matched_file)[0]
|
||
self._demand_cache[file_without_ext] = content # 同时缓存不带扩展名的版本
|
||
return content
|
||
|
||
# 如果所有尝试都失败
|
||
logging.warning(f"未能找到Demand文件: '{demand_name}',尝试过以下位置: 缓存, {self.prompts_dir}/Demand/")
|
||
return None
|
||
|
||
def _get_refer_content(self, refer_dir):
|
||
"""从Refer目录加载JSON和文本参考文件
|
||
|
||
Args:
|
||
refer_dir: Refer目录路径
|
||
|
||
Returns:
|
||
dict: 文件名到内容的映射
|
||
"""
|
||
results = {}
|
||
if not os.path.isdir(refer_dir):
|
||
return results
|
||
|
||
# 列出所有文件
|
||
refer_files = [f for f in os.listdir(refer_dir) if os.path.isfile(os.path.join(refer_dir, f))]
|
||
|
||
for refer_file in refer_files:
|
||
refer_path = os.path.join(refer_dir, refer_file)
|
||
content = None
|
||
|
||
# 对JSON文件特殊处理
|
||
if refer_file.lower().endswith('.json'):
|
||
try:
|
||
with open(refer_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
# 移除可能的BOM头
|
||
if content.startswith('\ufeff'):
|
||
content = content[1:]
|
||
|
||
# 验证是否为有效的JSON
|
||
try:
|
||
json.loads(content) # 只验证,不存储结果
|
||
logging.info(f"成功验证JSON文件: {refer_file}")
|
||
except json.JSONDecodeError as e:
|
||
logging.warning(f"文件{refer_file}内容不是有效的JSON: {str(e)}")
|
||
except Exception as e:
|
||
logging.error(f"读取JSON文件{refer_file}时出错: {str(e)}")
|
||
content = None
|
||
else:
|
||
# 使用ResourceLoader加载非JSON文件
|
||
content = ResourceLoader.load_file_content(refer_path)
|
||
|
||
if content:
|
||
results[refer_file] = content
|
||
|
||
return results
|
||
|
||
def _load_and_parse_json(self, file_path):
|
||
"""安全地加载和解析JSON文件
|
||
|
||
Args:
|
||
file_path: JSON文件路径
|
||
|
||
Returns:
|
||
dict or None: 解析后的JSON对象,解析失败则返回None
|
||
"""
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
# 移除可能的BOM头
|
||
if content.startswith('\ufeff'):
|
||
content = content[1:]
|
||
return json.loads(content)
|
||
except Exception as e:
|
||
logging.error(f"加载和解析JSON文件{file_path}时出错: {str(e)}")
|
||
return None
|
||
|
||
def _get_all_refer_contents(self, random_sample=True):
|
||
"""获取所有Refer文件内容,可选择随机抽样文件内容
|
||
|
||
Args:
|
||
random_sample: 是否对文件内容进行随机抽样,默认为True
|
||
|
||
Returns:
|
||
str: 组合后的refer内容
|
||
"""
|
||
import json
|
||
|
||
# 初始化结果字符串
|
||
refer_content_all = ""
|
||
|
||
# 如果缓存为空,尝试加载文件
|
||
if not self._refer_cache and self.prompts_dir:
|
||
refer_dir = os.path.join(self.prompts_dir, "Refer")
|
||
if os.path.isdir(refer_dir):
|
||
refer_files = [f for f in os.listdir(refer_dir) if os.path.isfile(os.path.join(refer_dir, f))]
|
||
for refer_file in refer_files:
|
||
file_path = os.path.join(refer_dir, refer_file)
|
||
|
||
# 对JSON文件特殊处理
|
||
if refer_file.lower().endswith('.json'):
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
# 移除可能的BOM头
|
||
if content.startswith('\ufeff'):
|
||
content = content[1:]
|
||
# 解析并存储JSON对象
|
||
json_data = json.loads(content)
|
||
self._refer_cache[refer_file] = json_data
|
||
logging.info(f"加载并解析JSON参考文件: {refer_file}")
|
||
except Exception as e:
|
||
logging.error(f"加载JSON文件{refer_file}失败: {str(e)}")
|
||
# 失败时尝试以普通文本加载
|
||
content = ResourceLoader.load_file_content(file_path)
|
||
if content:
|
||
self._refer_cache[refer_file] = content
|
||
else:
|
||
# 非JSON文件使用普通加载方式
|
||
content = ResourceLoader.load_file_content(file_path)
|
||
if content:
|
||
self._refer_cache[refer_file] = content
|
||
|
||
if not self._refer_cache:
|
||
logging.warning("没有找到任何Refer文件")
|
||
return refer_content_all
|
||
|
||
logging.info(f"找到{len(self._refer_cache)}个Refer文件")
|
||
|
||
# 处理所有文件
|
||
for filename, content in self._refer_cache.items():
|
||
# 添加文件头部信息
|
||
refer_content_all += f"--- Refer File: {filename} ---\n"
|
||
|
||
# 检查内容类型
|
||
if isinstance(content, dict) and 'title' in content and 'examples' in content:
|
||
# 已解析的JSON对象
|
||
title = content.get("title", "未命名参考资料")
|
||
description = content.get("description", "")
|
||
examples = content.get("examples", [])
|
||
|
||
refer_content_all += f"标题: {title}\n"
|
||
refer_content_all += f"描述: {description}\n\n"
|
||
|
||
if examples:
|
||
# 处理examples数组
|
||
if random_sample and len(examples) > 10:
|
||
# 对examples进行随机抽样
|
||
sample_size = max(10, int(len(examples) * self._sample_rate))
|
||
sampled_examples = random.sample(examples, sample_size)
|
||
logging.info(f"从文件{filename}的JSON中随机抽样了{sample_size}/{len(examples)}个示例")
|
||
|
||
refer_content_all += "示例:\n"
|
||
for idx, example in enumerate(sampled_examples, 1):
|
||
content_text = example.get("content", "")
|
||
refer_content_all += f"{idx}. {content_text}\n"
|
||
else:
|
||
# 不进行抽样或examples数量较少,使用全部
|
||
refer_content_all += "示例:\n"
|
||
for idx, example in enumerate(examples, 1):
|
||
content_text = example.get("content", "")
|
||
refer_content_all += f"{idx}. {content_text}\n"
|
||
elif isinstance(content, str):
|
||
# 文本内容
|
||
if random_sample:
|
||
lines = content.split('\n')
|
||
if len(lines) > 10: # 只对较长的内容进行抽样
|
||
sample_size = max(10, int(len(lines) * self._sample_rate))
|
||
sampled_lines = random.sample(lines, sample_size)
|
||
# 保持原有顺序
|
||
sampled_lines.sort(key=lambda line: lines.index(line))
|
||
sampled_content = '\n'.join(sampled_lines)
|
||
logging.info(f"从文件{filename}中随机抽样了{sample_size}/{len(lines)}行内容")
|
||
refer_content_all += f"{sampled_content}\n"
|
||
else:
|
||
# 内容较短,不进行抽样
|
||
refer_content_all += f"{content}\n"
|
||
else:
|
||
# 不进行抽样
|
||
refer_content_all += f"{content}\n"
|
||
else:
|
||
# 内容是其他类型
|
||
refer_content_all += f"未知内容类型: {type(content)}\n"
|
||
|
||
# 添加文件之间的分隔
|
||
refer_content_all += "\n"
|
||
|
||
return refer_content_all
|
||
|
||
def get_topic_prompts(self):
|
||
"""Constructs the system and user prompts for topic generation."""
|
||
logging.info("Constructing prompts for topic generation...")
|
||
try:
|
||
# --- System Prompt ---
|
||
system_prompt = self._system_prompt_cache.get("topic")
|
||
if not system_prompt:
|
||
if not self.topic_system_prompt_path:
|
||
logging.error("Topic system prompt path not provided during PromptManager initialization.")
|
||
return None, None
|
||
system_prompt = ResourceLoader.load_file_content(self.topic_system_prompt_path)
|
||
if system_prompt:
|
||
self._system_prompt_cache["topic"] = system_prompt
|
||
else:
|
||
logging.error(f"Failed to load topic system prompt from '{self.topic_system_prompt_path}'.")
|
||
return None, None
|
||
|
||
# --- User Prompt ---
|
||
base_user_prompt = self._user_prompt_cache.get("topic")
|
||
if not base_user_prompt:
|
||
if not self.topic_user_prompt_path:
|
||
logging.error("Topic user prompt path not provided during PromptManager initialization.")
|
||
return None, None
|
||
base_user_prompt = ResourceLoader.load_file_content(self.topic_user_prompt_path)
|
||
if base_user_prompt:
|
||
self._user_prompt_cache["topic"] = base_user_prompt
|
||
else:
|
||
logging.error(f"Failed to load base topic user prompt from '{self.topic_user_prompt_path}'.")
|
||
return None, None
|
||
|
||
# --- Build the dynamic part of the user prompt ---
|
||
user_prompt_dynamic = "你拥有的创作资料如下:\n"
|
||
|
||
# 添加prompts_config配置的文件信息
|
||
if self.prompts_config:
|
||
for config_item in self.prompts_config:
|
||
prompt_type = config_item.get("type", "").lower()
|
||
file_paths = config_item.get("file_path", [])
|
||
|
||
if file_paths:
|
||
user_prompt_dynamic += f"{prompt_type.capitalize()}文件列表:\n"
|
||
for path in file_paths:
|
||
filename = os.path.basename(path)
|
||
user_prompt_dynamic += f"- {filename}\n"
|
||
user_prompt_dynamic += "\n"
|
||
|
||
# 兼容旧配置:Add genPrompts directory structure
|
||
elif self.prompts_dir and os.path.isdir(self.prompts_dir):
|
||
try:
|
||
gen_prompts_list = os.listdir(self.prompts_dir)
|
||
for gen_prompt_folder in gen_prompts_list:
|
||
folder_path = os.path.join(self.prompts_dir, gen_prompt_folder)
|
||
if os.path.isdir(folder_path):
|
||
try:
|
||
# List files, filter out subdirs if needed
|
||
gen_prompts_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
|
||
user_prompt_dynamic += f"{gen_prompt_folder}\n{gen_prompts_files}\n"
|
||
except OSError as e:
|
||
logging.warning(f"Could not list directory {folder_path}: {e}")
|
||
except OSError as e:
|
||
logging.warning(f"Could not list base prompts directory {self.prompts_dir}: {e}")
|
||
else:
|
||
logging.warning(f"Neither prompts_config nor prompts_dir provided or valid.")
|
||
|
||
# Add resource directory contents
|
||
for dir_info in self.resource_dir_config:
|
||
source_type = dir_info.get("type", "UnknownType")
|
||
source_file_paths = dir_info.get("file_path", [])
|
||
for file_path in source_file_paths:
|
||
# Use ResourceLoader's static method
|
||
file_content = ResourceLoader.load_file_content(file_path)
|
||
if file_content:
|
||
user_prompt_dynamic += f"{source_type}信息:\n{os.path.basename(file_path)}\n{file_content}\n\n"
|
||
else:
|
||
logging.warning(f"Could not load resource file {file_path}")
|
||
|
||
# Add dateline information (optional)
|
||
if self._dateline_cache:
|
||
user_prompt_dynamic += f"\n{self._dateline_cache}"
|
||
else:
|
||
user_prompt_dir = os.path.dirname(self.topic_user_prompt_path)
|
||
dateline_path = os.path.join(user_prompt_dir, "2025各月节日宣传节点时间表.md") # Consider making this configurable
|
||
if os.path.exists(dateline_path):
|
||
dateline_content = ResourceLoader.load_file_content(dateline_path)
|
||
if dateline_content:
|
||
self._dateline_cache = dateline_content
|
||
user_prompt_dynamic += f"\n{dateline_content}"
|
||
|
||
# Combine dynamic part, base template, and final parameters
|
||
user_prompt = user_prompt_dynamic + base_user_prompt
|
||
|
||
user_prompt += f"\n选题数量:{self.topic_gen_num}\n选题日期:{self.topic_gen_date}\n"
|
||
|
||
logging.info(f"Topic prompts constructed. System: {len(system_prompt)} chars, User: {len(user_prompt)} chars.")
|
||
return system_prompt, user_prompt
|
||
|
||
except Exception as e:
|
||
logging.exception("Error constructing topic prompts:")
|
||
return None, None
|
||
|
||
def get_content_prompts(self, topic_item):
|
||
"""Constructs the system and user prompts for content generation."""
|
||
logging.info("Constructing prompts for content generation...")
|
||
|
||
try:
|
||
# --- System Prompt ---
|
||
system_prompt = self._system_prompt_cache.get("content")
|
||
if not system_prompt:
|
||
if not self.content_system_prompt_path:
|
||
logging.error("Content system prompt path not provided.")
|
||
return None, None
|
||
|
||
system_prompt = ResourceLoader.load_file_content(self.content_system_prompt_path)
|
||
if system_prompt:
|
||
self._system_prompt_cache["content"] = system_prompt
|
||
else:
|
||
logging.error(f"Failed to load content system prompt from '{self.content_system_prompt_path}'.")
|
||
return None, None
|
||
|
||
# --- User Prompt ---
|
||
style = ""
|
||
demand = ""
|
||
refers = ""
|
||
object_info = ""
|
||
|
||
# Extract style from topic
|
||
if "style" in topic_item and topic_item["style"]:
|
||
style_name = topic_item["style"]
|
||
style_content = self._get_style_content(style_name)
|
||
if style_content:
|
||
style = f"Style: {style_name}\n{style_content}\n\n"
|
||
else:
|
||
logging.warning(f"Style content for '{style_name}' not found.")
|
||
|
||
# Extract demand from topic
|
||
if "target_audience" in topic_item and topic_item["target_audience"]:
|
||
demand_name = topic_item["target_audience"]
|
||
demand_content = self._get_demand_content(demand_name)
|
||
if demand_content:
|
||
demand = f"Demand: {demand_name}\n{demand_content}\n\n"
|
||
else:
|
||
logging.warning(f"Demand content for '{demand_name}' not found.")
|
||
|
||
# Add refer contents - 现在使用随机抽样
|
||
refers_content = self._get_all_refer_contents(random_sample=True)
|
||
if refers_content:
|
||
refers = f"Reference:\n{refers_content}\n\n"
|
||
|
||
# Get object information
|
||
object_name = topic_item.get("object", "")
|
||
if object_name:
|
||
# 优化:遍历resource_dir_config查找对象描述
|
||
found_object_info = False
|
||
|
||
# 1. 搜集所有可能的资源文件
|
||
all_description_files = []
|
||
for dir_info in self.resource_dir_config:
|
||
if dir_info.get("type") in ["Object", "Description"]:
|
||
all_description_files.extend(dir_info.get("file_path", []))
|
||
|
||
# 2. 尝试精确匹配
|
||
for file_path in all_description_files:
|
||
if object_name in os.path.basename(file_path):
|
||
info = ResourceLoader.load_file_content(file_path)
|
||
if info:
|
||
object_info = f"Object: {object_name}\n{info}\n\n"
|
||
logging.info(f"找到对象'{object_name}'的精确匹配资源文件: {file_path}")
|
||
found_object_info = True
|
||
break
|
||
|
||
# 3. 如果精确匹配失败,尝试模糊匹配
|
||
if not found_object_info and all_description_files:
|
||
logging.info(f"尝试模糊匹配对象'{object_name}'的资源文件")
|
||
|
||
# 提取所有文件名
|
||
file_names = [os.path.basename(f) for f in all_description_files]
|
||
# 模糊匹配
|
||
matched_filename, score = self.find_directory_fuzzy_match(object_name, files=file_names)
|
||
|
||
if matched_filename and score > 0:
|
||
# 找到匹配的完整路径
|
||
for file_path in all_description_files:
|
||
if os.path.basename(file_path) == matched_filename:
|
||
info = ResourceLoader.load_file_content(file_path)
|
||
if info:
|
||
object_info = f"Object: {object_name}\n{info}\n\n"
|
||
logging.info(f"模糊匹配找到对象'{object_name}'的资源文件: {file_path},匹配分数: {score}")
|
||
found_object_info = True
|
||
break
|
||
|
||
if not found_object_info:
|
||
logging.warning(f"未找到对象'{object_name}'的任何匹配资源文件")
|
||
|
||
# Get product information if any
|
||
product_name = topic_item.get("product", "")
|
||
if product_name:
|
||
product_info = ""
|
||
# 优化:遍历resource_dir_config查找产品描述
|
||
found_product_info = False
|
||
|
||
# 搜集所有可能的产品资源文件
|
||
all_product_files = []
|
||
for dir_info in self.resource_dir_config:
|
||
if dir_info.get("type") == "Product":
|
||
all_product_files.extend(dir_info.get("file_path", []))
|
||
|
||
# 尝试精确匹配
|
||
for file_path in all_product_files:
|
||
if product_name in os.path.basename(file_path):
|
||
info = ResourceLoader.load_file_content(file_path)
|
||
if info:
|
||
product_info = f"Product: {product_name}\n{info}\n\n"
|
||
logging.info(f"找到产品'{product_name}'的精确匹配资源文件: {file_path}")
|
||
found_product_info = True
|
||
break
|
||
|
||
# 如果精确匹配失败,尝试模糊匹配
|
||
if not found_product_info and all_product_files:
|
||
logging.info(f"尝试模糊匹配产品'{product_name}'的资源文件")
|
||
|
||
# 提取所有文件名
|
||
file_names = [os.path.basename(f) for f in all_product_files]
|
||
# 模糊匹配
|
||
matched_filename, score = self.find_directory_fuzzy_match(product_name, files=file_names)
|
||
|
||
if matched_filename and score > 0:
|
||
# 找到匹配的完整路径
|
||
for file_path in all_product_files:
|
||
if os.path.basename(file_path) == matched_filename:
|
||
info = ResourceLoader.load_file_content(file_path)
|
||
if info:
|
||
product_info = f"Product: {product_name}\n{info}\n\n"
|
||
logging.info(f"模糊匹配找到产品'{product_name}'的资源文件: {file_path},匹配分数: {score}")
|
||
found_product_info = True
|
||
break
|
||
|
||
if not found_product_info:
|
||
logging.warning(f"未找到产品'{product_name}'的任何匹配资源文件")
|
||
|
||
# 添加产品信息到对象信息中
|
||
if product_info:
|
||
object_info += product_info
|
||
|
||
# Construct final user prompt
|
||
user_prompt = f"""请为我创建一个旅游文案。
|
||
|
||
{style}{demand}{refers}{object_info}
|
||
|
||
请考虑以上所有信息,创作一篇{topic_item.get('target_audience','')}文旅内容。"""
|
||
|
||
return system_prompt, user_prompt
|
||
except Exception as e:
|
||
traceback.print_exc()
|
||
logging.error(f"Error creating content prompts: {str(e)}")
|
||
return None, None |