TravelContentCreator/utils/tweet_generator.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import time
import random
import argparse
import json
from datetime import datetime
import sys
import traceback
import logging # Add logging
import re
# sys.path.append('/root/autodl-tmp') # No longer needed if running as a module or if path is set correctly
# 从本地模块导入
# from TravelContentCreator.core.ai_agent import AI_Agent # Remove project name prefix
# from TravelContentCreator.core.topic_parser import TopicParser # Remove project name prefix
# ResourceLoader is now used implicitly via PromptManager
# from TravelContentCreator.utils.resource_loader import ResourceLoader 
# from TravelContentCreator.utils.prompt_manager import PromptManager # Remove project name prefix
# from ..core import contentGen as core_contentGen # Change to absolute import
# from ..core import posterGen as core_posterGen # Change to absolute import
# from ..core import simple_collage as core_simple_collage # Change to absolute import
from core.ai_agent import AI_Agent
from core.topic_parser import TopicParser
from utils.prompt_manager import PromptManager # Keep this as it's importing from the same level package 'utils'
from utils import content_generator as core_contentGen
from core import poster_gen as core_posterGen
from core import simple_collage as core_simple_collage
from .output_handler import OutputHandler # <-- 添加导入
from utils.content_judger import ContentJudger # <-- 添加ContentJudger导入

class tweetTopic:
    def __init__(self, index, date, logic, object, product, product_logic, style, style_logic, target_audience, target_audience_logic):
        self.index = index
        self.date = date
        self.logic = logic
        self.object = object
        self.product = product
        self.product_logic = product_logic
        self.style = style
        self.style_logic = style_logic
        self.target_audience = target_audience
        self.target_audience_logic = target_audience_logic
    
class tweetTopicRecord:
    def __init__(self, topics_list, system_prompt, user_prompt, run_id):
        self.topics_list = topics_list
        self.system_prompt = system_prompt
        self.user_prompt = user_prompt
        self.run_id = run_id
    
class tweetContent:
    def __init__(self, result, prompt, run_id, article_index, variant_index):
        self.result = result
        self.prompt = prompt
        self.run_id = run_id
        self.article_index = article_index
        self.variant_index = variant_index
        
        try:
            self.json_data = self.split_content(result)
            
        except Exception as e:
             logging.error(f"Failed to parse AI result for {article_index}_{variant_index}: {e}")
             logging.debug(f"Raw result: {result[:500]}...") # Log partial raw result
             self.json_data = {"title": "", "content": "", "tag": "", "error": True, "raw_result": e}  # 不再包含raw_result

    def split_content(self, result):
        # Assuming split logic might still fail, keep it simple or improve with regex/json
        # We should ideally switch content generation to JSON output as well.
        # For now, keep existing logic but handle errors in __init__.
        
        # Optional: Add basic check before splitting
        # if not result or "</think>" not in result or "title>" not in result or "content>" not in result:
        #      logging.warning(f"AI result format unexpected: {result[:200]}...")
        #      # 返回空字符串而不是抛出异常，这样可以在主函数继续处理
             
        #      return "", ""
             
        # --- Existing Logic (prone to errors) ---
        try:
            processed_result = result
            if "</think>" in result:
                 processed_result = result.split("</think>")[1] # Take part after </think>
            
            # 以json 格式输出
            json_data = json.loads(processed_result)
            json_data["error"] = False
            json_data["raw_result"] = None
            # 确保judge_success字段存在
            if "judge_success" not in json_data:
                json_data["judge_success"] = None
            return json_data
            # --- End Existing Logic --- 
            
        except Exception as e:
            logging.warning(f"解析内容时出错: {e}, 使用默认空内容")
            # 创建一个新的json_data而不是使用未定义的变量
            return {
                "title": "", 
                "content": "", 
                "error": True, 
                "raw_result": str(e),
                "judge_success": False
            }
    
    def get_json_data(self):
        """Returns the generated JSON data dictionary."""
        return self.json_data
    
    def get_prompt(self):
        """Returns the user prompt used to generate this content."""
        return self.prompt
    
    def get_content(self):
        return self.content
    
    def get_title(self):
        return self.title
    
    
def generate_topics(ai_agent, system_prompt, user_prompt, run_id, temperature=0.2, top_p=0.5, presence_penalty=1.5):
    """生成选题列表 (run_id is now passed in, output_dir removed as argument)"""
    logging.info("Starting topic generation...")
    time_start = time.time()
    
    # Call AI agent work method (updated return values)
    result, tokens, time_cost = ai_agent.work(
        system_prompt, user_prompt, "", temperature, top_p, presence_penalty
    )
    
    logging.info(f"Topic generation API call completed in {time_cost:.2f}s. Estimated tokens: {tokens}")
    
    # Parse topics
    result_list = TopicParser.parse_topics(result, run_id)
    if not result_list:
         logging.warning("Topic parsing resulted in an empty list.")
         # Optionally handle raw result logging here if needed, but saving is responsibility of OutputHandler
         # error_log_path = os.path.join(output_dir, run_id, f"topic_parsing_error_{run_id}.txt") # output_dir is not available here
         # try:
         #     # ... (save raw output logic) ...
         # except Exception as log_err:
         #     logging.error(f"Failed to save raw AI output on parsing failure: {log_err}")
             
    # 直接返回解析后的列表
    return result_list


def generate_single_content(ai_agent, system_prompt, user_prompt, item, run_id, 
                           article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5):
    """Generates single content variant data. Returns (content_json, user_prompt) or (None, None)."""
    logging.info(f"Generating content for topic {article_index}, variant {variant_index}")
    try:
        if not system_prompt or not user_prompt:
            logging.error("System or User prompt is empty. Cannot generate content.")
            return None, None
            
        logging.debug(f"Using pre-constructed prompts. User prompt length: {len(user_prompt)}")
        
        time.sleep(random.random() * 0.5)
        
        # Generate content (non-streaming work returns result, tokens, time_cost)
        result, tokens, time_cost = ai_agent.work(
            system_prompt, user_prompt, "", temperature, top_p, presence_penalty
        )
        
        if result is None: # Check if AI call failed
            logging.error(f"AI agent work failed for {article_index}_{variant_index}. No result returned.")
            return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt  # 添加judge_success字段
            
        logging.info(f"Content generation for {article_index}_{variant_index} completed in {time_cost:.2f}s. Estimated tokens: {tokens}")

        # --- Create tweetContent object (handles parsing) --- 
        # Pass user_prompt instead of full prompt? Yes, user_prompt is what we need later.
        tweet_content = tweetContent(result, user_prompt, run_id, article_index, variant_index)

        # --- Remove Saving Logic --- 
        # run_specific_output_dir = os.path.join(output_dir, run_id) # output_dir no longer available
        # variant_result_dir = os.path.join(run_specific_output_dir, f"{article_index}_{variant_index}")
        # os.makedirs(variant_result_dir, exist_ok=True)
        # content_save_path = os.path.join(variant_result_dir, "article.json")
        # prompt_save_path = os.path.join(variant_result_dir, "tweet_prompt.txt")
        # tweet_content.save_content(content_save_path) # Method removed
        # tweet_content.save_prompt(prompt_save_path) # Method removed
        # --- End Remove Saving Logic --- 
        
        # Return the data needed by the output handler
        content_json = tweet_content.get_json_data()
        prompt_data = tweet_content.get_prompt() # Get the stored user prompt
        
        return content_json, prompt_data # Return data pair
        
    except Exception as e:
        logging.exception(f"Error generating single content for {article_index}_{variant_index}:")
        return {"title": "", "content": "", "error": True, "judge_success": False}, user_prompt  # 添加judge_success字段

def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir,
                    variants=2, temperature=0.3, start_index=0, end_index=None):
    """根据选题生成内容"""
    if not topics:
        logging.warning("没有选题，无法生成内容")
        return
    
    # 确定处理范围
    if end_index is None or end_index > len(topics):
        end_index = len(topics)
    
    topics_to_process = topics[start_index:end_index]
    logging.info(f"准备处理{len(topics_to_process)}个选题...")
    
    # 创建汇总文件
    # summary_file = ResourceLoader.create_summary_file(output_dir, run_id, len(topics_to_process))
    
    # 处理每个选题
    processed_results = []
    for i, item in enumerate(topics_to_process):
        logging.info(f"处理第 {i+1}/{len(topics_to_process)} 篇文章")
        
        # 为每个选题生成多个变体
        for j in range(variants):
            logging.info(f"正在生成变体 {j+1}/{variants}")
            
            # 调用单篇文章生成函数
            tweet_content, result = generate_single_content(
                ai_agent, system_prompt, item, run_id, i+1, j+1, temperature
            )
            
            if tweet_content:
                processed_results.append(tweet_content)
                
                # # 更新汇总文件 (仅保存第一个变体到汇总文件)
                # if j == 0:
                #     ResourceLoader.update_summary(summary_file, i+1, user_prompt, result)
    
    logging.info(f"完成{len(processed_results)}篇文章生成")
    return processed_results


def prepare_topic_generation(prompt_manager: PromptManager, 
                           api_url: str, 
                           model_name: str, 
                           api_key: str, 
                           timeout: int, 
                           max_retries: int):
    """准备选题生成的环境和参数. Returns agent, system_prompt, user_prompt.
    
    Args:
        prompt_manager: An initialized PromptManager instance.
        api_url, model_name, api_key, timeout, max_retries: Parameters for AI_Agent.
    """
    logging.info("Preparing for topic generation (using provided PromptManager)...")
    # 从传入的 PromptManager 获取 prompts
    system_prompt, user_prompt = prompt_manager.get_topic_prompts()
    
    if not system_prompt or not user_prompt:
        logging.error("Failed to get topic generation prompts from PromptManager.")
        return None, None, None # Return three Nones
    
    # 使用传入的参数初始化 AI Agent
    try:
         logging.info("Initializing AI Agent for topic generation...")
         ai_agent = AI_Agent(
             api_url, # Use passed arg
             model_name, # Use passed arg
             api_key, # Use passed arg
             timeout=timeout, # Use passed arg
             max_retries=max_retries # Use passed arg
         )
    except Exception as e:
         logging.exception("Error initializing AI Agent for topic generation:")
         return None, None, None # Return three Nones
         
    # 返回 agent 和 prompts
    return ai_agent, system_prompt, user_prompt

def run_topic_generation_pipeline(config, run_id=None):
    """
    Runs the complete topic generation pipeline based on the configuration.
    Returns: (run_id, topics_list, system_prompt, user_prompt) or (None, None, None, None) on failure.
    """
    logging.info("Starting Step 1: Topic Generation Pipeline...")

    if run_id is None:
        logging.info("No run_id provided, generating one based on timestamp.")
        run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    else:
        logging.info(f"Using provided run_id: {run_id}")
    
    ai_agent, system_prompt, user_prompt = None, None, None # Initialize
    topics_list = None
    prompt_manager = None # Initialize prompt_manager
    try:
        # --- 读取 PromptManager 所需参数 --- 
        topic_sys_prompt_path = config.get("topic_system_prompt")
        topic_user_prompt_path = config.get("topic_user_prompt")
        content_sys_prompt_path = config.get("content_system_prompt") # 虽然这里不用，但 PromptManager 可能需要
        prompts_dir_path = config.get("prompts_dir")
        prompts_config = config.get("prompts_config") # 新增：获取prompts_config配置
        resource_config = config.get("resource_dir", [])
        topic_num = config.get("num", 1)
        topic_date = config.get("date", "")
        
        # --- 创建 PromptManager 实例 --- 
        prompt_manager = PromptManager(
            topic_system_prompt_path=topic_sys_prompt_path,
            topic_user_prompt_path=topic_user_prompt_path,
            content_system_prompt_path=content_sys_prompt_path,
            prompts_dir=prompts_dir_path,
            prompts_config=prompts_config, # 新增：传入prompts_config配置
            resource_dir_config=resource_config,
            topic_gen_num=topic_num,
            topic_gen_date=topic_date
        )
        logging.info("PromptManager instance created.")
        
        # --- 读取 AI Agent 所需参数 --- 
        ai_api_url = config.get("api_url")
        ai_model = config.get("model")
        ai_api_key = config.get("api_key")
        ai_timeout = config.get("request_timeout", 30)
        ai_max_retries = config.get("max_retries", 3)
        
        # 检查必需的 AI 参数是否存在
        if not all([ai_api_url, ai_model, ai_api_key]):
             raise ValueError("Missing required AI configuration (api_url, model, api_key) in config.")

        # --- 调用修改后的 prepare_topic_generation --- 
        ai_agent, system_prompt, user_prompt = prepare_topic_generation(
            prompt_manager, # Pass instance
            ai_api_url, 
            ai_model, 
            ai_api_key, 
            ai_timeout, 
            ai_max_retries
        )
        if not ai_agent or not system_prompt or not user_prompt:
             raise ValueError("Failed to prepare topic generation (agent or prompts missing).")
             
        # --- Generate topics (保持不变) --- 
        topics_list = generate_topics(
            ai_agent, system_prompt, user_prompt,
            run_id, # Pass run_id
            config.get("topic_temperature", 0.2), 
            config.get("topic_top_p", 0.5), 
            config.get("topic_presence_penalty", 1.5)
        )
    except Exception as e:
        logging.exception("Error during topic generation pipeline execution:")
        # Ensure agent is closed even if generation fails mid-way
        if ai_agent: ai_agent.close()
        return None, None, None, None # Signal failure
    finally:
        # Ensure the AI agent is closed after generation attempt (if initialized)
        if ai_agent:
            logging.info("Closing topic generation AI Agent...")
            ai_agent.close()

    if topics_list is None: # Check if generate_topics returned None (though it currently returns list)
        logging.error("Topic generation failed (generate_topics returned None or an error occurred).")
        return None, None, None, None
    elif not topics_list: # Check if list is empty
         logging.warning(f"Topic generation completed for run {run_id}, but the resulting topic list is empty.")
         # Return empty list and prompts anyway, let caller decide

    # --- Saving logic removed previously --- 

    logging.info(f"Topic generation pipeline completed successfully. Run ID: {run_id}")
    # Return the raw data needed by the OutputHandler
    return run_id, topics_list, system_prompt, user_prompt

# --- Decoupled Functional Units (Moved from main.py) --- 

def generate_content_for_topic(ai_agent: AI_Agent, 
                             prompt_manager: PromptManager, 
                             topic_item: dict, 
                             run_id: str, 
                             topic_index: int, 
                             output_handler: OutputHandler, # Changed name to match convention
                             # 添加具体参数，移除 config 和 output_dir
                             variants: int, 
                             temperature: float, 
                             top_p: float, 
                             presence_penalty: float,
                             enable_content_judge: bool):
    """Generates all content variants for a single topic item and uses OutputHandler.
    
    Args:
        ai_agent: Initialized AI_Agent instance.
        prompt_manager: Initialized PromptManager instance.
        topic_item: Dictionary representing the topic.
        run_id: Current run ID.
        topic_index: 1-based index of the topic.
        output_handler: An instance of OutputHandler to process results.
        variants: Number of variants to generate.
        temperature, top_p, presence_penalty: AI generation parameters.
        enable_content_judge: Whether to enable content judge.
    Returns:
        bool: True if at least one variant was successfully generated and handled, False otherwise.
    """
    logging.info(f"Generating content for Topic {topic_index} (Object: {topic_item.get('object', 'N/A')})...")
    success_flag = False # Track if any variant succeeded
    # 使用传入的 variants 参数
    # variants = config.get("variants", 1)
    
    # 如果启用了内容审核，获取产品资料
    product_info = None
    content_judger = None
    if enable_content_judge:
        logging.info(f"内容审核功能已启用，准备获取产品资料...")
        # 从topic_item中获取产品名称和对象名称
        product_name = topic_item.get("product", "")
        object_name = topic_item.get("object", "")
        
        # 组合获取产品资料
        product_info = ""
        
        # 获取对象信息
        if object_name:
            # 通过PromptManager获取对象和产品资料
            # 这一部分逻辑来自PromptManager.get_content_prompts中对object_info的构建
            found_object_info = False
            all_description_files = []
            
            # 从resource_dir_config搜集所有可能的资源文件
            for dir_info in prompt_manager.resource_dir_config:
                if dir_info.get("type") in ["Object", "Description"]:
                    all_description_files.extend(dir_info.get("file_path", []))
            
            # 尝试精确匹配对象资料
            for file_path in all_description_files:
                if object_name in os.path.basename(file_path):
                    from utils.resource_loader import ResourceLoader
                    info = ResourceLoader.load_file_content(file_path)
                    if info:
                        product_info += f"Object: {object_name}\n{info}\n\n"
                        logging.info(f"为内容审核找到对象'{object_name}'的资源文件: {file_path}")
                        found_object_info = True
                        break
            
            # 如果未找到对象资料，记录警告但继续处理
            if not found_object_info:
                logging.warning(f"未能为内容审核找到对象'{object_name}'的资源文件")
        
        # 获取产品信息
        if product_name:
            found_product_info = False
            all_product_files = []
            
            # 搜集所有可能的产品资源文件
            for dir_info in prompt_manager.resource_dir_config:
                if dir_info.get("type") == "Product":
                    all_product_files.extend(dir_info.get("file_path", []))
            
            # 尝试精确匹配产品资料
            for file_path in all_product_files:
                if product_name in os.path.basename(file_path):
                    from utils.resource_loader import ResourceLoader
                    info = ResourceLoader.load_file_content(file_path)
                    if info:
                        product_info += f"Product: {product_name}\n{info}\n\n"
                        logging.info(f"为内容审核找到产品'{product_name}'的资源文件: {file_path}")
                        found_product_info = True
                        break
            
            # 如果未找到产品资料，记录警告但继续处理
            if not found_product_info:
                logging.warning(f"未能为内容审核找到产品'{product_name}'的资源文件")
        
        # 如果成功获取产品资料，初始化ContentJudger
        if product_info:
            logging.info("成功获取产品资料，初始化ContentJudger...")
            # 从配置中读取系统提示词路径（脚本级别无法直接获取，需要传递）
            # 使用ai_agent的model_name或api_url判断是否使用主AI模型，避免额外资源占用
            content_judger_system_prompt_path = prompt_manager._system_prompt_cache.get("judger_system_prompt")
            content_judger = ContentJudger(ai_agent, system_prompt_path=content_judger_system_prompt_path)
        else:
            logging.warning("未能获取产品资料，内容审核功能将被跳过")
            enable_content_judge = False

    for j in range(variants):
        variant_index = j + 1
        logging.info(f"  Generating Variant {variant_index}/{variants}...")

        # PromptManager 实例已传入，直接调用
        content_system_prompt, content_user_prompt = prompt_manager.get_content_prompts(topic_item)

        if not content_system_prompt or not content_user_prompt:
            logging.warning(f"  Skipping Variant {variant_index} due to missing content prompts.")
            continue 

        time.sleep(random.random() * 0.5)
        try:
            # Call generate_single_content with passed-in parameters
            content_json, prompt_data = generate_single_content(
                ai_agent, content_system_prompt, content_user_prompt, topic_item,
                run_id, topic_index, variant_index, 
                temperature, # 使用传入的参数
                top_p,       # 使用传入的参数
                presence_penalty # 使用传入的参数
            )
            
            # 简化检查，只要content_json不是None就处理它
            # 即使是空标题和内容也是有效的结果
            if content_json is not None:
                # 进行内容审核（如果启用且ContentJudger已初始化）
                if enable_content_judge and content_judger and product_info:
                    logging.info(f"  对Topic {topic_index}, Variant {variant_index}进行内容审核...")
                    
                    # 准备审核内容
                    content_to_judge = f"""title: {content_json.get('title', '')}
                    
content: {content_json.get('content', '')}
                    """
                    
                    # 调用ContentJudger进行审核
                    try:
                        judged_result = content_judger.judge_content(product_info, content_to_judge)
                        if judged_result and isinstance(judged_result, dict):
                            if "title" in judged_result and "content" in judged_result:
                                # 保存原始标题和内容
                                content_json["original_title"] = content_json.get("title", "")
                                content_json["original_content"] = content_json.get("content", "")
                                # 保存原始标签（优先使用tags，如果没有则使用tag）
                                original_tags = content_json.get("tags", content_json.get("tag", ""))
                                content_json["original_tags"] = original_tags
                                # 更新为审核后的内容
                                content_json["title"] = judged_result["title"]
                                content_json["content"] = judged_result["content"]
                                # 保留原始标签，避免重复
                                content_json["tags"] = original_tags
                                # 删除可能存在的重复tag字段
                                if "tag" in content_json:
                                    del content_json["tag"]
                                # 添加审核标记
                                content_json["judged"] = True
                                # 添加judge_success状态
                                content_json["judge_success"] = judged_result.get("judge_success", False)
                                # 可选：保存审核分析结果
                                if "不良内容分析" in judged_result:
                                    content_json["judge_analysis"] = judged_result["不良内容分析"]
                            else:
                                logging.warning(f"  审核结果缺少title或content字段，保留原内容")
                                content_json["judge_success"] = False
                        else:
                            logging.warning(f"  内容审核返回无效结果，保留原内容")
                            content_json["judge_success"] = False
                    except Exception as judge_err:
                        logging.exception(f"  内容审核过程出错: {judge_err}，保留原内容")
                        content_json["judge_success"] = False
                else:
                    # 未启用内容审核时，添加相应标记
                    content_json["judged"] = False
                    content_json["judge_success"] = None
                
                # Use the output handler to process/save the result
                output_handler.handle_content_variant(
                    run_id, topic_index, variant_index, content_json, prompt_data or ""
                )
                success_flag = True # Mark success for this topic
                
                # Check specifically if the AI result itself indicated a parsing error internally
                if content_json.get("error"): 
                     logging.warning(f"  Content generation for Topic {topic_index}, Variant {variant_index} succeeded but response parsing had issues. Using empty content values.")
                else:
                     logging.info(f"  Successfully generated and handled content for Topic {topic_index}, Variant {variant_index}.")
            else:
                logging.error(f"  Content generation failed for Topic {topic_index}, Variant {variant_index}. Skipping handling.")
                
        except Exception as e:
            logging.exception(f"  Error during content generation call or handling for Topic {topic_index}, Variant {variant_index}:")

    # Return the success flag for this topic
    return success_flag

def generate_posters_for_topic(topic_item: dict, 
                             output_dir: str, 
                             run_id: str, 
                             topic_index: int, 
                             output_handler: OutputHandler, # 添加 handler
                             variants: int,
                             model_name: str,
                             base_url: str,
                             api_key: str,
                             poster_assets_base_dir: str,
                             image_base_dir: str,
                             img_frame_possibility: float,
                             text_bg_possibility: float,
                             title_possibility: float,
                             text_possibility: float,
                             resource_dir_config: list,
                             poster_target_size: tuple,
                             output_collage_subdir: str,
                             output_poster_subdir: str,
                             output_poster_filename: str,
                             system_prompt: str,
                             collage_style: str,
                             timeout: int
                             ):
    """Generates all posters for a single topic item, handling image data via OutputHandler.
    
    Args:
        topic_item: The dictionary representing a single topic.
        output_dir: The base output directory for the entire run (e.g., ./result).
        run_id: The ID for the current run.
        topic_index: The 1-based index of the current topic.
        variants: Number of variants.
        poster_assets_base_dir: Path to poster assets (fonts, frames etc.).
        image_base_dir: Base path for source images.
        img_frame_possibility: Probability of adding image frame.
        text_bg_possibility: Probability of adding text background.
        resource_dir_config: Configuration for resource directories (used for Description).
        poster_target_size: Target size tuple (width, height) for the poster.
        text_possibility: Probability of adding secondary text.
        output_collage_subdir: Subdirectory name for saving collages.
        output_poster_subdir: Subdirectory name for saving posters.
        output_poster_filename: Filename for the final poster.
        system_prompt: System prompt for content generation.
        output_handler: An instance of OutputHandler to process results.
        timeout: Timeout for content generation.
    Returns:
        True if poster generation was attempted (regardless of individual variant success),
        False if setup failed before attempting variants.
    """
    logging.info(f"Generating posters for Topic {topic_index} (Object: {topic_item.get('object', 'N/A')})...")
    
    # --- Load content data from files --- 
    loaded_content_list = []
    logging.info(f"Attempting to load content data for {variants} variants for topic {topic_index}...")
    for j in range(variants):
        variant_index = j + 1
        variant_dir = os.path.join(output_dir, run_id, f"{topic_index}_{variant_index}")
        content_path = os.path.join(variant_dir, "article.json")
        try:
            if os.path.exists(content_path):
                with open(content_path, 'r', encoding='utf-8') as f_content:
                    content_data = json.load(f_content)
                    if isinstance(content_data, dict) and 'title' in content_data and 'content' in content_data:
                         loaded_content_list.append(content_data)
                         logging.debug(f"  Successfully loaded content from: {content_path}")
                    else:
                         logging.warning(f"  Content file {content_path} has invalid format. Skipping.")
            else:
                logging.warning(f"  Content file not found for variant {variant_index}: {content_path}. Skipping.")
        except json.JSONDecodeError:
             logging.error(f"  Error decoding JSON from content file: {content_path}. Skipping.")
        except Exception as e:
            logging.exception(f"  Error loading content file {content_path}: {e}")
            
    if not loaded_content_list:
        logging.error(f"No valid content data loaded for topic {topic_index}. Cannot generate posters.")
        return False
    logging.info(f"Successfully loaded content data for {len(loaded_content_list)} variants.")
    # --- End Load content data --- 
    
    # Initialize generators using parameters
    try:
        content_gen_instance = core_contentGen.ContentGenerator(model_name=model_name, base_url=base_url, api_key=api_key)
        if not poster_assets_base_dir:
            logging.error("Error: 'poster_assets_base_dir' not provided. Cannot generate posters.")
            return False
        poster_gen_instance = core_posterGen.PosterGenerator(base_dir=poster_assets_base_dir)
        poster_gen_instance.set_img_frame_possibility(img_frame_possibility)
        poster_gen_instance.set_text_bg_possibility(text_bg_possibility)
    except Exception as e:
         logging.exception("Error initializing generators for poster creation:")
         return False 
    # --- Setup: Paths and Object Name --- 
    object_name = topic_item.get("object", "")
    if not object_name:
        logging.warning("Warning: Topic object name is missing. Cannot generate posters.")
        return False

    # Clean object name
    try:
        object_name_cleaned = object_name.split(".")[0].replace("景点信息-", "").strip()
        if not object_name_cleaned:
            logging.warning(f"Warning: Object name '{object_name}' resulted in empty string after cleaning. Skipping posters.")
            return False
        object_name = object_name_cleaned
    except Exception as e:
        logging.warning(f"Warning: Could not fully clean object name '{object_name}': {e}. Skipping posters.")
        return False
        
    # Construct and check INPUT image paths
    input_img_dir_path = os.path.join(image_base_dir, object_name)
    if not os.path.exists(input_img_dir_path) or not os.path.isdir(input_img_dir_path):
        # 模糊匹配：如果找不到完全匹配的目录，尝试查找包含关键词的目录
        logging.info(f"尝试对图片目录进行模糊匹配: {object_name}")
        found_dir = None
        
        # 1. 尝试获取image_base_dir下的所有目录
        try:
            all_dirs = [d for d in os.listdir(image_base_dir) 
                       if os.path.isdir(os.path.join(image_base_dir, d))]
            logging.info(f"找到 {len(all_dirs)} 个图片目录可用于模糊匹配")
            
            # 2. 提取对象名称中的关键词
            # 例如："美的鹭湖鹭栖台酒店+盈香心动乐园" -> ["美的", "鹭湖", "酒店", "乐园"]
            # 首先通过常见分隔符分割（+、空格、_、-等）
            parts = re.split(r'[+\s_\-]', object_name)
            keywords = []
            for part in parts:
                # 只保留长度大于1的有意义关键词
                if len(part) > 1:
                    keywords.append(part)
            
            # 尝试匹配更短的语义单元（例如中文的2-3个字的词语）
            # 对于中文名称，可以尝试提取2-3个字的短语
            for i in range(len(object_name) - 1):
                keyword = object_name[i:i+2]  # 提取2个字符
                if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
                    keywords.append(keyword)
            
            # 3. 对每个目录进行评分
            dir_scores = {}
            for directory in all_dirs:
                score = 0
                dir_lower = directory.lower()
                # 为每个匹配的关键词增加分数
                for keyword in keywords:
                    if keyword.lower() in dir_lower:
                        score += 1
                
                # 如果得分大于0（至少匹配一个关键词），记录该目录
                if score > 0:
                    dir_scores[directory] = score
            
            # 4. 选择得分最高的目录
            if dir_scores:
                best_match = max(dir_scores.items(), key=lambda x: x[1])
                found_dir = best_match[0]
                logging.info(f"模糊匹配成功！匹配目录: {found_dir}，匹配分数: {best_match[1]}")
                
                # 更新图片目录路径
                input_img_dir_path = os.path.join(image_base_dir, found_dir)
                logging.info(f"使用模糊匹配的图片目录: {input_img_dir_path}")
            else:
                logging.warning(f"模糊匹配未找到任何包含关键词的目录")
        except Exception as e:
            logging.warning(f"模糊匹配过程中出错: {e}")
            
        # 如果仍然无法找到有效目录，则返回错误
        if not found_dir or not os.path.exists(input_img_dir_path) or not os.path.isdir(input_img_dir_path):
            logging.warning(f"Warning: 即使通过模糊匹配也无法找到图片目录: '{input_img_dir_path}'. Skipping posters for this topic.")
            return False

    # Locate Description File using resource_dir_config parameter
    info_directory = []
    description_file_path = None
    found_description = False
    
    # 准备关键词列表用于模糊匹配
    # 与上面图片目录匹配类似，提取对象名称的关键词
    parts = re.split(r'[+\s_\-]', object_name)
    keywords = []
    for part in parts:
        if len(part) > 1:
            keywords.append(part)
    
    # 尝试提取中文短语作为关键词
    for i in range(len(object_name) - 1):
        keyword = object_name[i:i+2]
        if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
            keywords.append(keyword)
    
    logging.info(f"用于描述文件模糊匹配的关键词: {keywords}")
    
    # 尝试精确匹配
    for dir_info in resource_dir_config:
        if dir_info.get("type") == "Description":
            for file_path in dir_info.get("file_path", []):
                if object_name in os.path.basename(file_path):
                    description_file_path = file_path
                    if os.path.exists(description_file_path):
                        info_directory = [description_file_path]
                        logging.info(f"找到并使用精确匹配的描述文件: {description_file_path}")
                        found_description = True
                    else:
                        logging.warning(f"Warning: 配置中指定的描述文件未找到: {description_file_path}")
                    break
            if found_description:
                break
    
    # 如果精确匹配失败，尝试模糊匹配
    if not found_description:
        logging.info(f"未找到'{object_name}'的精确匹配描述文件，尝试模糊匹配...")
        best_score = 0
        best_file = None
        
        for dir_info in resource_dir_config:
            if dir_info.get("type") == "Description":
                for file_path in dir_info.get("file_path", []):
                    file_name = os.path.basename(file_path)
                    score = 0
                    
                    # 计算关键词匹配分数
                    for keyword in keywords:
                        if keyword.lower() in file_name.lower():
                            score += 1
                    
                    # 如果当前文件得分更高，更新最佳匹配
                    if score > best_score and os.path.exists(file_path):
                        best_score = score
                        best_file = file_path
        
        # 如果找到了最佳匹配文件
        if best_file:
            description_file_path = best_file
            info_directory = [description_file_path]
            logging.info(f"模糊匹配找到描述文件: {description_file_path}，匹配分数: {best_score}")
            found_description = True
    
    if not found_description:
        logging.warning(f"未找到对象'{object_name}'的匹配描述文件。")
    
    # Generate Text Configurations for All Variants
    try:
        poster_text_configs_raw = content_gen_instance.run(info_directory, variants, loaded_content_list, system_prompt, timeout=timeout)
        if not poster_text_configs_raw:
             logging.warning("Warning: ContentGenerator returned empty configuration data. Skipping posters.")
             return False 
        
        # --- 使用 OutputHandler 保存 Poster Config --- 
        output_handler.handle_poster_configs(run_id, topic_index, poster_text_configs_raw)
        # --- 结束使用 Handler 保存 ---
        
        # 打印原始配置数据以进行调试
        logging.info(f"生成的海报配置数据: {poster_text_configs_raw}")
            
        # 直接使用配置数据，避免通过文件读取
        if isinstance(poster_text_configs_raw, list):
            poster_configs = poster_text_configs_raw
            logging.info(f"直接使用生成的配置列表，包含 {len(poster_configs)} 个配置项")
        else:
            # 如果不是列表，尝试转换或使用PosterConfig类解析
            logging.info("生成的配置数据不是列表，使用PosterConfig类进行处理")
            poster_config_summary = core_posterGen.PosterConfig(poster_text_configs_raw)
            poster_configs = poster_config_summary.get_config()
    except Exception as e:
        logging.exception("Error running ContentGenerator or parsing poster configs:")
        traceback.print_exc()
        return False
        
    # Poster Generation Loop for each variant
    poster_num = min(variants, len(poster_configs)) if isinstance(poster_configs, list) else variants
    logging.info(f"计划生成 {poster_num} 个海报变体")
    any_poster_attempted = False

    for j_index in range(poster_num):
        variant_index = j_index + 1
        logging.info(f"Generating Poster {variant_index}/{poster_num}...")
        any_poster_attempted = True
        collage_img = None # To store the generated collage PIL Image
        poster_img = None  # To store the final poster PIL Image
        try:
            # 获取当前变体的配置
            if isinstance(poster_configs, list) and j_index < len(poster_configs):
                poster_config = poster_configs[j_index]
                logging.info(f"使用配置数据项 {j_index+1}: {poster_config}")
            else:
                # 回退方案：使用PosterConfig类
                poster_config = poster_config_summary.get_config_by_index(j_index)
                logging.info(f"使用PosterConfig类获取配置项 {j_index+1}")
                
            if not poster_config:
                logging.warning(f"Warning: Could not get poster config for index {j_index}. Skipping.")
                continue

            # --- Image Collage --- 
            logging.info(f"Generating collage from: {input_img_dir_path}")
            collage_images, used_image_filenames = core_simple_collage.process_directory(
                input_img_dir_path,
                style=collage_style,
                target_size=poster_target_size,
                output_count=1 
            )

            if not collage_images: # 检查列表是否为空
                logging.warning(f"Warning: Failed to generate collage image for Variant {variant_index}. Skipping poster.")
                continue
            collage_img = collage_images[0] # 获取第一个 PIL Image
            used_image_files = used_image_filenames[0] if used_image_filenames else [] # 获取使用的图片文件名
            logging.info(f"Collage image generated successfully (in memory). Used images: {used_image_files}")
            logging.info(f"拼贴图使用的图片文件: {used_image_files}")
            
            # --- 使用 Handler 保存 Collage 图片和使用的图片文件信息 --- 
            output_handler.handle_generated_image(
                run_id, topic_index, variant_index, 
                image_type='collage', 
                image_data=collage_img, 
                output_filename='collage.png', # 或者其他期望的文件名
                metadata={'used_images': used_image_files} # 添加图片文件信息到元数据
            )
            # --- 结束保存 Collage --- 

            # --- Create Poster --- 
            if random.random() > title_possibility:
                text_data = {
                "title": poster_config.get('main_title', ''),
                "subtitle": "",
                "additional_texts": []
                }
                texts = poster_config.get('texts', [])
                if texts:
                    # 确保文本不为空
                    if random.random() > text_possibility:
                        text_data["additional_texts"].append({"text": texts[0], "position": "middle", "size_factor": 0.8})
                    # for text in texts:
                    #     if random.random() < text_possibility:
                    #         text_data["additional_texts"].append({"text": text, "position": "middle", "size_factor": 0.8})
            else:
                text_data = {
                "title": "",
                "subtitle": "",
                "additional_texts": []
                }
            # 打印要发送的文本数据
            logging.info(f"文本数据: {text_data}")

            # 调用修改后的 create_poster, 接收 PIL Image
            poster_img = poster_gen_instance.create_poster(collage_img, text_data)
            
            if poster_img:
                logging.info(f"Poster image generated successfully (in memory).")
                # --- 使用 Handler 保存 Poster 图片 --- 
                output_handler.handle_generated_image(
                    run_id, topic_index, variant_index, 
                    image_type='poster', 
                    image_data=poster_img, 
                    output_filename=output_poster_filename, # 使用参数中的文件名
                    metadata={'used_collage': True, 'collage_images': used_image_files}
                )
                # --- 结束保存 Poster --- 
            else:
                logging.warning(f"Warning: Poster generation function returned None for variant {variant_index}.")

        except Exception as e:
            logging.exception(f"Error during poster generation for Variant {variant_index}:")
            traceback.print_exc()
            continue
            
    return any_poster_attempted