hot_video_analyse/base_line/prompt_manager.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Manages the construction of prompts for different AI generation tasks.
"""

import os
import traceback
import logging # Add logging
import re  # 添加正则表达式支持
import random  # 添加随机模块支持
from .resource_loader import ResourceLoader # Use relative import within the same package
import json

class PromptManager:
    """Handles the loading and construction of prompts."""

    def __init__(self, 
                 topic_system_prompt_path: str, 
                 topic_user_prompt_path: str, 
                 content_system_prompt_path: str, 
                 prompts_dir: str = None,  # 兼容旧配置
                 prompts_config: list = None,  # 新的配置方式 
                 resource_dir_config: list = None,
                 topic_gen_num: int = 1, # Default values if needed
                 topic_gen_date: str = "",
                 content_judger_system_prompt_path: str = None # 添加内容审核系统提示词路径参数
                 ):
        self.topic_system_prompt_path = topic_system_prompt_path
        self.topic_user_prompt_path = topic_user_prompt_path
        self.content_system_prompt_path = content_system_prompt_path
        self.content_judger_system_prompt_path = content_judger_system_prompt_path # 添加成员变量
        self.prompts_dir = prompts_dir  # 保留兼容旧配置
        self.prompts_config = prompts_config or []  # 新的配置方式
        self.resource_dir_config = resource_dir_config or []
        self.topic_gen_num = topic_gen_num
        self.topic_gen_date = topic_gen_date
        
        # 缓存加载的文件内容
        self._style_cache = {}
        self._demand_cache = {}
        self._refer_cache = {}
        self._system_prompt_cache = {}  # 新增：系统提示词缓存
        self._user_prompt_cache = {}    # 新增：用户提示词缓存
        self._dateline_cache = None     # 新增：日期线缓存
        
        self._sample_rate = 0.1  # 提高随机抽样率
        # 初始化时预加载配置的文件
        self._preload_prompt_files()
    
    def _preload_prompt_files(self):
        """预加载配置中的提示文件到缓存"""
        # 预加载系统提示词和用户提示词文件
        if self.topic_system_prompt_path and os.path.exists(self.topic_system_prompt_path):
            content = ResourceLoader.load_file_content(self.topic_system_prompt_path)
            if content:
                self._system_prompt_cache["topic"] = content
                logging.info(f"预加载系统提示词: {self.topic_system_prompt_path}")
        
        if self.topic_user_prompt_path and os.path.exists(self.topic_user_prompt_path):
            content = ResourceLoader.load_file_content(self.topic_user_prompt_path)
            if content:
                self._user_prompt_cache["topic"] = content
                logging.info(f"预加载用户提示词: {self.topic_user_prompt_path}")
        
        if self.content_system_prompt_path and os.path.exists(self.content_system_prompt_path):
            content = ResourceLoader.load_file_content(self.content_system_prompt_path)
            if content:
                self._system_prompt_cache["content"] = content
                logging.info(f"预加载内容系统提示词: {self.content_system_prompt_path}")
        
        # 预加载内容审核系统提示词
        if self.content_judger_system_prompt_path and os.path.exists(self.content_judger_system_prompt_path):
            content = ResourceLoader.load_file_content(self.content_judger_system_prompt_path)
            if content:
                self._system_prompt_cache["judger_system_prompt"] = content
                logging.info(f"预加载内容审核系统提示词: {self.content_judger_system_prompt_path}")
        
        # 预加载日期线文件
        if self.topic_user_prompt_path:
            user_prompt_dir = os.path.dirname(self.topic_user_prompt_path)
            dateline_path = os.path.join(user_prompt_dir, "2025各月节日宣传节点时间表.md")
            if os.path.exists(dateline_path):
                self._dateline_cache = ResourceLoader.load_file_content(dateline_path)
                logging.info(f"预加载日期线文件: {dateline_path}")
    
        
        # 加载prompts_config配置的文件
        if not self.prompts_config:
            return
        
        for config_item in self.prompts_config:
            prompt_type = config_item.get("type", "").lower()
            file_paths = config_item.get("file_path", [])
            
            if prompt_type == "style":
                for path in file_paths:
                    if os.path.exists(path):
                        filename = os.path.basename(path)
                        content = ResourceLoader.load_file_content(path)
                        if content:
                            self._style_cache[filename] = content
                            name_without_ext = os.path.splitext(filename)[0]
                            self._style_cache[name_without_ext] = content  # 同时缓存不带扩展名的版本
            
            elif prompt_type == "demand":
                for path in file_paths:
                    # print(path)
                    if os.path.exists(path):
                        filename = os.path.basename(path)
                        # print(filename)
                        content = ResourceLoader.load_file_content(path)
                        # print(content)
                        if content:
                            self._demand_cache[filename] = content
                            name_without_ext = os.path.splitext(filename)[0]
                            self._demand_cache[name_without_ext] = content  # 同时缓存不带扩展名的版本
            
            elif prompt_type == "refer":
                for path in file_paths:
                    # print(path)
                    if os.path.exists(path):
                        filename = os.path.basename(path)
                        # print(filename)
                        # 检测是否为JSON文件
                        if filename.lower().endswith('.json'):
                            # 直接加载和解析JSON文件
                            try:
                                with open(path, 'r', encoding='utf-8') as f:
                                    content = f.read()
                                    # 移除可能的BOM头
                                    if content.startswith('\ufeff'):
                                        content = content[1:]
                                    # 解析JSON
                                    json_data = json.loads(content)
                                    # 存储解析后的对象
                                    self._refer_cache[filename] = json_data
                                    logging.info(f"预加载并解析JSON参考文件: {filename}")
                            except Exception as e:
                                logging.error(f"预加载JSON文件{filename}失败: {str(e)}")
                                # 失败时尝试以普通文本加载
                                content = ResourceLoader.load_file_content(path)
                                if content:
                                    self._refer_cache[filename] = content
                        else:
                            # 非JSON文件使用普通加载方式
                            content = ResourceLoader.load_all_refer_files(path, 1)
                            if content:
                                self._refer_cache[filename] = content
                                logging.info(f"预加载普通参考文件: {filename}")

    def find_directory_fuzzy_match(self, name, directory=None, files=None):
        """
        对文件名进行模糊匹配，查找最匹配目标名称的文件
        
        Args:
            name: 目标名称
            directory: 目录路径，如果提供则从目录中读取文件列表
            files: 文件名列表，如果提供则直接使用
            
        Returns:
            tuple: (最佳匹配文件名, 匹配分数) 如果没有匹配则返回 (None, 0)
        """
        logging.info(f"尝试对文件名进行模糊匹配: {name}")
        
        try:
            # 准备文件列表
            all_files = []
            if files:
                all_files = files
            elif directory and os.path.isdir(directory):
                all_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
            
            if not all_files:
                logging.warning(f"没有可用于匹配的文件")
                return None, 0
            
            logging.info(f"找到 {len(all_files)} 个文件可用于模糊匹配")
            
            # 从名称中提取关键词
            # 通过常见分隔符分割（+、空格、_、-等）
            parts = re.split(r'[+\s_\-]', name)
            keywords = []
            for part in parts:
                # 只保留长度大于1的有意义关键词
                if len(part) > 1:
                    keywords.append(part)
            
            # 尝试匹配更短的语义单元（例如中文的2-3个字的词语）
            for i in range(len(name) - 1):
                keyword = name[i:i+2]  # 提取2个字符
                if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
                    keywords.append(keyword)
            
            logging.info(f"用于文件模糊匹配的关键词: {keywords}")
            
            # 对每个文件进行评分
            file_scores = {}
            for filename in all_files:
                score = 0
                file_lower = filename.lower()
                
                # 精确匹配，去掉扩展名比较
                name_without_ext = os.path.splitext(name)[0].lower()
                file_without_ext = os.path.splitext(filename)[0].lower()
                
                if name_without_ext == file_without_ext:
                    # 精确匹配给高分
                    score += 10
                else:
                    # 为每个匹配的关键词增加分数
                    for keyword in keywords:
                        if keyword.lower() in file_lower:
                            score += 1
                
                # 如果得分大于0（至少有匹配），记录该文件
                if score > 0:
                    file_scores[filename] = score
            
            # 选择得分最高的文件
            if file_scores:
                best_match = max(file_scores.items(), key=lambda x: x[1])
                found_file = best_match[0]
                score = best_match[1]
                logging.info(f"模糊匹配成功！匹配文件: {found_file}，匹配分数: {score}")
                return found_file, score
            else:
                logging.warning(f"模糊匹配未找到任何包含关键词的文件")
                return None, 0
                
        except Exception as e:
            logging.exception(f"文件模糊匹配过程中出错: {e}")
            return None, 0
    
    def _get_style_content(self, style_name):
        """获取Style文件内容，优先从缓存获取，如果不存在则尝试从目录加载"""
        # 首先检查缓存
        if style_name in self._style_cache:
            return self._style_cache[style_name]
        
        # 确保有扩展名
        if not style_name.lower().endswith('.txt'):
            style_file = f"{style_name}.txt"
        else:
            style_file = style_name
            style_name = os.path.splitext(style_name)[0]  # 移除扩展名
        
        # 尝试模糊匹配缓存中的文件名
        cache_files = list(self._style_cache.keys())
        matched_key, score = self.find_directory_fuzzy_match(style_name, files=cache_files)
        
        if matched_key and score > 0:
            return self._style_cache[matched_key]
        
        # 如果没有在缓存中找到模糊匹配，尝试从prompts_dir加载
        if self.prompts_dir:
            style_dir = os.path.join(self.prompts_dir, "Style")
            style_path = os.path.join(style_dir, style_file)
            
            # 精确路径匹配
            if os.path.exists(style_path):
                content = ResourceLoader.load_file_content(style_path)
                if content:
                    # 保存到缓存
                    self._style_cache[style_name] = content
                    self._style_cache[style_file] = content
                    return content
            
            # 如果精确匹配失败，尝试目录中的模糊匹配
            if os.path.isdir(style_dir):
                matched_file, score = self.find_directory_fuzzy_match(style_name, directory=style_dir)
                if matched_file and score > 0:
                    matched_path = os.path.join(style_dir, matched_file)
                    content = ResourceLoader.load_file_content(matched_path)
                    if content:
                        # 保存到缓存
                        self._style_cache[style_name] = content
                        self._style_cache[matched_file] = content
                        file_without_ext = os.path.splitext(matched_file)[0]
                        self._style_cache[file_without_ext] = content  # 同时缓存不带扩展名的版本
                        return content
        
        logging.warning(f"未能找到Style文件: '{style_name}'，尝试过以下位置: 缓存, {self.prompts_dir}/Style/")
        return None
    
    def _get_demand_content(self, demand_name):
        """获取Demand文件内容，优先从缓存获取，如果不存在则尝试从目录加载"""
        # 首先检查缓存
        if demand_name in self._demand_cache:
            return self._demand_cache[demand_name]
        
        # 确保有扩展名
        if not demand_name.lower().endswith('.txt'):
            demand_file = f"{demand_name}.txt"
        else:
            demand_file = demand_name
            demand_name = os.path.splitext(demand_name)[0]  # 移除扩展名
        
        # 尝试模糊匹配缓存中的文件名
        cache_files = list(self._demand_cache.keys())
        matched_key, score = self.find_directory_fuzzy_match(demand_name, files=cache_files)
        
        if matched_key and score > 0:
            return self._demand_cache[matched_key]
        
        # 如果没有在缓存中找到模糊匹配，尝试从prompts_dir加载（向后兼容）
        if self.prompts_dir:
            demand_dir = os.path.join(self.prompts_dir, "Demand")
            demand_path = os.path.join(demand_dir, demand_file)
            
            # 精确路径匹配
            if os.path.exists(demand_path):
                content = ResourceLoader.load_file_content(demand_path)
                if content:
                    # 保存到缓存
                    self._demand_cache[demand_name] = content
                    self._demand_cache[demand_file] = content
                    return content
            
            # 如果精确匹配失败，尝试目录中的模糊匹配
            if os.path.isdir(demand_dir):
                matched_file, score = self.find_directory_fuzzy_match(demand_name, directory=demand_dir)
                if matched_file and score > 0:
                    matched_path = os.path.join(demand_dir, matched_file)
                    content = ResourceLoader.load_file_content(matched_path)
                    if content:
                        # 保存到缓存
                        self._demand_cache[demand_name] = content
                        self._demand_cache[matched_file] = content
                        file_without_ext = os.path.splitext(matched_file)[0]
                        self._demand_cache[file_without_ext] = content  # 同时缓存不带扩展名的版本
                        return content
        
        # 如果所有尝试都失败
        logging.warning(f"未能找到Demand文件: '{demand_name}'，尝试过以下位置: 缓存, {self.prompts_dir}/Demand/")
        return None
    
    def _get_refer_content(self, refer_dir):
        """从Refer目录加载JSON和文本参考文件
        
        Args:
            refer_dir: Refer目录路径
            
        Returns:
            dict: 文件名到内容的映射
        """
        results = {}
        if not os.path.isdir(refer_dir):
            return results
            
        # 列出所有文件
        refer_files = [f for f in os.listdir(refer_dir) if os.path.isfile(os.path.join(refer_dir, f))]
        
        for refer_file in refer_files:
            refer_path = os.path.join(refer_dir, refer_file)
            content = None
            
            # 对JSON文件特殊处理
            if refer_file.lower().endswith('.json'):
                try:
                    with open(refer_path, 'r', encoding='utf-8') as f:
                        content = f.read()
                        # 移除可能的BOM头
                        if content.startswith('\ufeff'):
                            content = content[1:]
                            
                        # 验证是否为有效的JSON
                        try:
                            json.loads(content)  # 只验证，不存储结果
                            logging.info(f"成功验证JSON文件: {refer_file}")
                        except json.JSONDecodeError as e:
                            logging.warning(f"文件{refer_file}内容不是有效的JSON: {str(e)}")
                except Exception as e:
                    logging.error(f"读取JSON文件{refer_file}时出错: {str(e)}")
                    content = None
            else:
                # 使用ResourceLoader加载非JSON文件
                content = ResourceLoader.load_file_content(refer_path)
                
            if content:
                results[refer_file] = content
                
        return results
    
    def _load_and_parse_json(self, file_path):
        """安全地加载和解析JSON文件
        
        Args:
            file_path: JSON文件路径
            
        Returns:
            dict or None: 解析后的JSON对象，解析失败则返回None
        """
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                # 移除可能的BOM头
                if content.startswith('\ufeff'):
                    content = content[1:]
                return json.loads(content)
        except Exception as e:
            logging.error(f"加载和解析JSON文件{file_path}时出错: {str(e)}")
            return None
         
    def _get_all_refer_contents(self, random_sample=True):
        """获取所有Refer文件内容，可选择随机抽样文件内容

        Args:
            random_sample: 是否对文件内容进行随机抽样，默认为True

        Returns:
            str: 组合后的refer内容
        """
        import json
        
        # 初始化结果字符串
        refer_content_all = ""
        
        # 如果缓存为空，尝试加载文件
        if not self._refer_cache and self.prompts_dir:
            refer_dir = os.path.join(self.prompts_dir, "Refer")
            if os.path.isdir(refer_dir):
                refer_files = [f for f in os.listdir(refer_dir) if os.path.isfile(os.path.join(refer_dir, f))]
                for refer_file in refer_files:
                    file_path = os.path.join(refer_dir, refer_file)
                    
                    # 对JSON文件特殊处理
                    if refer_file.lower().endswith('.json'):
                        try:
                            with open(file_path, 'r', encoding='utf-8') as f:
                                content = f.read()
                                # 移除可能的BOM头
                                if content.startswith('\ufeff'):
                                    content = content[1:]
                                # 解析并存储JSON对象
                                json_data = json.loads(content)
                                self._refer_cache[refer_file] = json_data
                                logging.info(f"加载并解析JSON参考文件: {refer_file}")
                        except Exception as e:
                            logging.error(f"加载JSON文件{refer_file}失败: {str(e)}")
                            # 失败时尝试以普通文本加载
                            content = ResourceLoader.load_file_content(file_path)
                            if content:
                                self._refer_cache[refer_file] = content
                    else:
                        # 非JSON文件使用普通加载方式
                        content = ResourceLoader.load_file_content(file_path)
                        if content:
                            self._refer_cache[refer_file] = content
        
        if not self._refer_cache:
            logging.warning("没有找到任何Refer文件")
            return refer_content_all
        
        logging.info(f"找到{len(self._refer_cache)}个Refer文件")
        
        # 处理所有文件
        for filename, content in self._refer_cache.items():
            # 添加文件头部信息
            refer_content_all += f"--- Refer File: {filename} ---\n"
            
            # 检查内容类型
            if isinstance(content, dict) and 'title' in content and 'examples' in content:
                # 已解析的JSON对象
                title = content.get("title", "未命名参考资料")
                description = content.get("description", "")
                examples = content.get("examples", [])
                
                refer_content_all += f"标题: {title}\n"
                refer_content_all += f"描述: {description}\n\n"
                
                if examples:
                    # 处理examples数组
                    if random_sample and len(examples) > 10:
                        # 对examples进行随机抽样
                        sample_size = max(10, int(len(examples) * self._sample_rate))
                        sampled_examples = random.sample(examples, sample_size)
                        logging.info(f"从文件{filename}的JSON中随机抽样了{sample_size}/{len(examples)}个示例")
                        
                        refer_content_all += "示例:\n"
                        for idx, example in enumerate(sampled_examples, 1):
                            content_text = example.get("content", "")
                            refer_content_all += f"{idx}. {content_text}\n"
                    else:
                        # 不进行抽样或examples数量较少，使用全部
                        refer_content_all += "示例:\n"
                        for idx, example in enumerate(examples, 1):
                            content_text = example.get("content", "")
                            refer_content_all += f"{idx}. {content_text}\n"
            elif isinstance(content, str):
                # 文本内容
                if random_sample:
                    lines = content.split('\n')
                    if len(lines) > 10:  # 只对较长的内容进行抽样
                        sample_size = max(10, int(len(lines) * self._sample_rate))
                        sampled_lines = random.sample(lines, sample_size)
                        # 保持原有顺序
                        sampled_lines.sort(key=lambda line: lines.index(line))
                        sampled_content = '\n'.join(sampled_lines)
                        logging.info(f"从文件{filename}中随机抽样了{sample_size}/{len(lines)}行内容")
                        refer_content_all += f"{sampled_content}\n"
                    else:
                        # 内容较短，不进行抽样
                        refer_content_all += f"{content}\n"
                else:
                    # 不进行抽样
                    refer_content_all += f"{content}\n"
            else:
                # 内容是其他类型
                refer_content_all += f"未知内容类型: {type(content)}\n"
            
            # 添加文件之间的分隔
            refer_content_all += "\n"
        
        return refer_content_all

    def get_topic_prompts(self):
        """Constructs the system and user prompts for topic generation."""
        logging.info("Constructing prompts for topic generation...")
        try:
            # --- System Prompt --- 
            system_prompt = self._system_prompt_cache.get("topic")
            if not system_prompt:
                if not self.topic_system_prompt_path:
                    logging.error("Topic system prompt path not provided during PromptManager initialization.")
                    return None, None
                system_prompt = ResourceLoader.load_file_content(self.topic_system_prompt_path)
                if system_prompt:
                    self._system_prompt_cache["topic"] = system_prompt
                else:
                    logging.error(f"Failed to load topic system prompt from '{self.topic_system_prompt_path}'.")
                    return None, None

            # --- User Prompt --- 
            base_user_prompt = self._user_prompt_cache.get("topic")
            if not base_user_prompt:
                if not self.topic_user_prompt_path:
                    logging.error("Topic user prompt path not provided during PromptManager initialization.")
                    return None, None
                base_user_prompt = ResourceLoader.load_file_content(self.topic_user_prompt_path)
                if base_user_prompt:
                    self._user_prompt_cache["topic"] = base_user_prompt
                else:
                    logging.error(f"Failed to load base topic user prompt from '{self.topic_user_prompt_path}'.")
                    return None, None
            
            # --- Build the dynamic part of the user prompt ---
            user_prompt_dynamic = "你拥有的创作资料如下：\n"
            
            # 添加prompts_config配置的文件信息
            if self.prompts_config:
                for config_item in self.prompts_config:
                    prompt_type = config_item.get("type", "").lower()
                    file_paths = config_item.get("file_path", [])
                    
                    if file_paths:
                        user_prompt_dynamic += f"{prompt_type.capitalize()}文件列表:\n"
                        for path in file_paths:
                            filename = os.path.basename(path)
                            user_prompt_dynamic += f"- {filename}\n"
                        user_prompt_dynamic += "\n"
            
            # 兼容旧配置：Add genPrompts directory structure
            elif self.prompts_dir and os.path.isdir(self.prompts_dir):
                try:
                    gen_prompts_list = os.listdir(self.prompts_dir)
                    for gen_prompt_folder in gen_prompts_list:
                        folder_path = os.path.join(self.prompts_dir, gen_prompt_folder)
                        if os.path.isdir(folder_path):
                            try:
                                # List files, filter out subdirs if needed
                                gen_prompts_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
                                user_prompt_dynamic += f"{gen_prompt_folder}\n{gen_prompts_files}\n"
                            except OSError as e:
                                logging.warning(f"Could not list directory {folder_path}: {e}")
                except OSError as e:
                    logging.warning(f"Could not list base prompts directory {self.prompts_dir}: {e}")
            else:
                 logging.warning(f"Neither prompts_config nor prompts_dir provided or valid.")

            # Add resource directory contents
            for dir_info in self.resource_dir_config:
                source_type = dir_info.get("type", "UnknownType")
                source_file_paths = dir_info.get("file_path", [])
                for file_path in source_file_paths:
                    # Use ResourceLoader's static method
                    file_content = ResourceLoader.load_file_content(file_path)
                    if file_content:
                         user_prompt_dynamic += f"{source_type}信息:\n{os.path.basename(file_path)}\n{file_content}\n\n"
                    else:
                         logging.warning(f"Could not load resource file {file_path}")

            # Add dateline information (optional)
            if self._dateline_cache:
                user_prompt_dynamic += f"\n{self._dateline_cache}"
            else:
                user_prompt_dir = os.path.dirname(self.topic_user_prompt_path)
                dateline_path = os.path.join(user_prompt_dir, "2025各月节日宣传节点时间表.md") # Consider making this configurable
                if os.path.exists(dateline_path):
                    dateline_content = ResourceLoader.load_file_content(dateline_path)
                    if dateline_content:
                        self._dateline_cache = dateline_content
                        user_prompt_dynamic += f"\n{dateline_content}"
            
            # Combine dynamic part, base template, and final parameters
            user_prompt = user_prompt_dynamic + base_user_prompt
            
            user_prompt += f"\n选题数量：{self.topic_gen_num}\n选题日期：{self.topic_gen_date}\n"
            
            logging.info(f"Topic prompts constructed. System: {len(system_prompt)} chars, User: {len(user_prompt)} chars.")
            return system_prompt, user_prompt

        except Exception as e:
            logging.exception("Error constructing topic prompts:")
            return None, None

    def get_content_prompts(self, topic_item):
        """Constructs the system and user prompts for content generation."""
        logging.info("Constructing prompts for content generation...")
        
        try:
            # --- System Prompt ---
            system_prompt = self._system_prompt_cache.get("content")
            if not system_prompt:
                if not self.content_system_prompt_path:
                    logging.error("Content system prompt path not provided.")
                    return None, None
                
                system_prompt = ResourceLoader.load_file_content(self.content_system_prompt_path)
                if system_prompt:
                    self._system_prompt_cache["content"] = system_prompt
                else:
                    logging.error(f"Failed to load content system prompt from '{self.content_system_prompt_path}'.")
                    return None, None

            # --- User Prompt ---
            style = ""
            demand = ""
            refers = ""
            object_info = ""
            
            # Extract style from topic
            if "style" in topic_item and topic_item["style"]:
                style_name = topic_item["style"]
                style_content = self._get_style_content(style_name)
                if style_content:
                    style = f"Style: {style_name}\n{style_content}\n\n"
                else:
                    logging.warning(f"Style content for '{style_name}' not found.")
            
            # Extract demand from topic
            if "target_audience" in topic_item and topic_item["target_audience"]:
                demand_name = topic_item["target_audience"]
                demand_content = self._get_demand_content(demand_name)
                if demand_content:
                    demand = f"Demand: {demand_name}\n{demand_content}\n\n"
                else:
                    logging.warning(f"Demand content for '{demand_name}' not found.")
            
            # Add refer contents - 现在使用随机抽样
            refers_content = self._get_all_refer_contents(random_sample=True)
            if refers_content:
                refers = f"Reference:\n{refers_content}\n\n"
            
            # Get object information
            object_name = topic_item.get("object", "")
            if object_name:
                # 优化：遍历resource_dir_config查找对象描述
                found_object_info = False
                
                # 1. 搜集所有可能的资源文件
                all_description_files = []
                for dir_info in self.resource_dir_config:
                    if dir_info.get("type") in ["Object", "Description"]:
                        all_description_files.extend(dir_info.get("file_path", []))
                
                # 2. 尝试精确匹配
                for file_path in all_description_files:
                    if object_name in os.path.basename(file_path):
                        info = ResourceLoader.load_file_content(file_path)
                        if info:
                            object_info = f"Object: {object_name}\n{info}\n\n"
                            logging.info(f"找到对象'{object_name}'的精确匹配资源文件: {file_path}")
                            found_object_info = True
                            break
                
                # 3. 如果精确匹配失败，尝试模糊匹配
                if not found_object_info and all_description_files:
                    logging.info(f"尝试模糊匹配对象'{object_name}'的资源文件")
                    
                    # 提取所有文件名
                    file_names = [os.path.basename(f) for f in all_description_files]
                    # 模糊匹配
                    matched_filename, score = self.find_directory_fuzzy_match(object_name, files=file_names)
                    
                    if matched_filename and score > 0:
                        # 找到匹配的完整路径
                        for file_path in all_description_files:
                            if os.path.basename(file_path) == matched_filename:
                                info = ResourceLoader.load_file_content(file_path)
                                if info:
                                    object_info = f"Object: {object_name}\n{info}\n\n"
                                    logging.info(f"模糊匹配找到对象'{object_name}'的资源文件: {file_path}，匹配分数: {score}")
                                    found_object_info = True
                                    break
                    
                if not found_object_info:
                    logging.warning(f"未找到对象'{object_name}'的任何匹配资源文件")

            # Get product information if any
            product_name = topic_item.get("product", "")
            if product_name:
                product_info = ""
                # 优化：遍历resource_dir_config查找产品描述
                found_product_info = False
                
                # 搜集所有可能的产品资源文件
                all_product_files = []
                for dir_info in self.resource_dir_config:
                    if dir_info.get("type") == "Product":
                        all_product_files.extend(dir_info.get("file_path", []))
                
                # 尝试精确匹配
                for file_path in all_product_files:
                    if product_name in os.path.basename(file_path):
                        info = ResourceLoader.load_file_content(file_path)
                        if info:
                            product_info = f"Product: {product_name}\n{info}\n\n"
                            logging.info(f"找到产品'{product_name}'的精确匹配资源文件: {file_path}")
                            found_product_info = True
                            break
                
                # 如果精确匹配失败，尝试模糊匹配
                if not found_product_info and all_product_files:
                    logging.info(f"尝试模糊匹配产品'{product_name}'的资源文件")
                    
                    # 提取所有文件名
                    file_names = [os.path.basename(f) for f in all_product_files]
                    # 模糊匹配
                    matched_filename, score = self.find_directory_fuzzy_match(product_name, files=file_names)
                    
                    if matched_filename and score > 0:
                        # 找到匹配的完整路径
                        for file_path in all_product_files:
                            if os.path.basename(file_path) == matched_filename:
                                info = ResourceLoader.load_file_content(file_path)
                                if info:
                                    product_info = f"Product: {product_name}\n{info}\n\n"
                                    logging.info(f"模糊匹配找到产品'{product_name}'的资源文件: {file_path}，匹配分数: {score}")
                                    found_product_info = True
                                    break
                
                if not found_product_info:
                    logging.warning(f"未找到产品'{product_name}'的任何匹配资源文件")
                    
                # 添加产品信息到对象信息中
                if product_info:
                    object_info += product_info

            # Construct final user prompt
            user_prompt = f"""请为我创建一个旅游文案。

{style}{demand}{refers}{object_info}

请考虑以上所有信息，创作一篇{topic_item.get('target_audience','')}文旅内容。"""

            return system_prompt, user_prompt
        except Exception as e:
            traceback.print_exc()
            logging.error(f"Error creating content prompts: {str(e)}")
            return None, None