TravelContentCreator/main.py

import os
import time
from datetime import datetime
import argparse
import sys
import traceback
import json
import logging

from core.ai_agent import AI_Agent
# from core.topic_parser import TopicParser # No longer needed directly in main?
import utils.content_generator as contentGen
import core.poster_gen as poster_gen
import core.simple_collage as simple_collage
from utils.resource_loader import ResourceLoader
from utils.tweet_generator import ( # Import the moved functions
    run_topic_generation_pipeline,
    generate_content_for_topic,
    generate_posters_for_topic
)
from utils.prompt_manager import PromptManager # Import PromptManager
import random
# Import Output Handlers
from utils.output_handler import FileSystemOutputHandler, OutputHandler
from core.topic_parser import TopicParser
from utils.tweet_generator import tweetTopicRecord # Needed only if loading old topics files?
# 导入额外配图选择函数
from utils.poster_notes_creator import select_additional_images

def load_config(config_path="poster_gen_config.json"):
    """Loads configuration from a JSON file."""
    if not os.path.exists(config_path):
        print(f"Error: Configuration file '{config_path}' not found.")
        print("Please copy 'example_config.json' to 'poster_gen_config.json' and customize it.")
        sys.exit(1)
    try:
        with open(config_path, 'r', encoding='utf-8') as f:
            config = json.load(f)
        # Basic validation (can be expanded)
        required_keys = ["api_url", "model", "api_key", "resource_dir", "output_dir", "num", "variants", "topic_system_prompt", "topic_user_prompt", "content_system_prompt", "image_base_dir"]
        if not all(key in config for key in required_keys):
             missing_keys = [key for key in required_keys if key not in config]
             print(f"Error: Config file '{config_path}' is missing required keys: {missing_keys}")
             sys.exit(1)

        # 验证prompts_dir或prompts_config至少有一个存在
        if not ("prompts_dir" in config
                or "prompts_config" in config):
            print(f"Error: Config file '{config_path}' must contain either 'prompts_dir' or 'prompts_config'")
            sys.exit(1)

        # Resolve relative paths based on config location or a defined base path if necessary
        # For simplicity, assuming paths in config are relative to project root or absolute
        return config
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{config_path}'. Check the file format.")
        sys.exit(1)
    except Exception as e:
        print(f"Error loading configuration from '{config_path}': {e}")
        sys.exit(1)

# Removed generate_topics_step function definition from here
# Its logic is now in utils.tweet_generator.run_topic_generation_pipeline

# --- Main Orchestration Step (Remains in main.py) ---

def generate_content_and_posters_step(config, run_id, topics_list, output_handler):
    """
    Step 2: Generates content and posters for each topic in the record.
    Returns True if successful (at least partially), False otherwise.
    """
    if not topics_list or not topics_list:
        # print("Skipping content/poster generation: No valid topics found in the record.")
        logging.warning("Skipping content/poster generation: No valid topics found in the record.")
        return False

    # print(f"\n--- Starting Step 2: Content and Poster Generation for run_id: {run_id} ---")
    logging.info(f"Starting Step 2: Content and Poster Generation for run_id: {run_id}")
    # print(f"Processing {len(topics_list)} topics...")
    logging.info(f"Processing {len(topics_list)} topics...")

    success_flag = False
    # --- 创建 PromptManager 实例 (传入具体参数) ---
    try:
        prompt_manager = PromptManager(
            topic_system_prompt_path=config.get("topic_system_prompt"),
            topic_user_prompt_path=config.get("topic_user_prompt"),
            content_system_prompt_path=config.get("content_system_prompt"),
            prompts_config=config.get("prompts_config"),  # 新的配置方式
            resource_dir_config=config.get("resource_dir", []),
            topic_gen_num=config.get("num", 1), # Topic gen num/date used by topic prompts
            topic_gen_date=config.get("date", ""),
            content_judger_system_prompt_path=config.get("content_judger_system_prompt") # 添加内容审核系统提示词路径
        )
        logging.info("PromptManager instance created for Step 2.")
    except KeyError as e:
         logging.error(f"Configuration error creating PromptManager: Missing key '{e}'. Cannot proceed with content generation.")
         return False
    # --- 结束创建 PromptManager ---

    ai_agent = None
    try:
        # --- Initialize AI Agent for Content Generation ---
        request_timeout = config.get("request_timeout", 180) # Default 180 seconds
        max_retries = config.get("max_retries", 3) # Default 3 retries
        stream_chunk_timeout = config.get("stream_chunk_timeout", 30) # Default 30 seconds for stream chunk
        ai_agent = AI_Agent(
            config.get("api_url"),
            config.get("model"),
            config.get("api_key"),
            timeout=request_timeout,
            max_retries=max_retries,
            stream_chunk_timeout=stream_chunk_timeout,
        )
        logging.info("AI Agent for content generation initialized.")

        # --- Iterate through Topics ---
        for i, topic_item in enumerate(topics_list):
            topic_index = topic_item.get('index', i + 1) # Use parsed index if available
            logging.info(f"--- Processing Topic {topic_index}/{len(topics_list)}: {topic_item.get('object', 'N/A')} ---") # Make it stand out

            # --- Generate Content Variants ---
            # 读取内容生成需要的参数
            content_variants = config.get("variants", 1)
            content_temp = config.get("content_temperature", 0.3)
            content_top_p = config.get("content_top_p", 0.4)
            content_presence_penalty = config.get("content_presence_penalty", 1.5)

            # 调用修改后的 generate_content_for_topic
            content_success = generate_content_for_topic(
                ai_agent,
                prompt_manager, # Pass PromptManager instance
                topic_item,
                run_id,
                topic_index,
                output_handler, # Pass OutputHandler instance
                # 传递具体参数
                variants=content_variants,
                temperature=content_temp,
                top_p=content_top_p,
                presence_penalty=content_presence_penalty,
                enable_content_judge=config.get("enable_content_judge", False)
            )

            # if tweet_content_list: # generate_content_for_topic 现在返回 bool
            if content_success:
                 logging.info(f"Content generation successful for Topic {topic_index}.")
                 # --- Generate Posters ---
                 # TODO: 重构 generate_posters_for_topic 以移除 config 依赖
                 # TODO: 需要确定如何将 content 数据传递给 poster 生成步骤 (已解决：函数内部读取)
                 # 临时方案：可能需要在这里读取由 output_handler 保存的 content 文件
                 # 或者修改 generate_content_for_topic 以返回收集到的 content 数据列表 (选项1)
                 # 暂时跳过 poster 生成的调用，直到确定方案

                 # --- 重新启用 poster 生成调用 ---
                 logging.info(f"Proceeding to poster generation for Topic {topic_index}...")

                 # --- 读取 Poster 生成所需参数 ---
                 poster_variants = config.get("variants", 1) # 通常与 content variants 相同
                 poster_assets_dir = config.get("poster_assets_base_dir")
                 img_base_dir = config.get("image_base_dir")
                 res_dir_config = config.get("resource_dir", [])
                 poster_size = tuple(config.get("poster_target_size", [900, 1200]))
                 txt_possibility = config.get("text_possibility", 0.3)
                 img_frame_possibility = config.get("img_frame_possibility", 0.7)
                 text_bg_possibility = config.get("text_bg_possibility", 0)
                 collage_subdir = config.get("output_collage_subdir", "collage_img")
                 poster_subdir = config.get("output_poster_subdir", "poster")
                 poster_filename = config.get("output_poster_filename", "poster.jpg")
                 poster_content_system_prompt = config.get("poster_content_system_prompt", None)
                 collage_style = config.get("collage_style", None)
                 title_possibility = config.get("title_possibility", 0.3)
                 # 检查关键路径是否存在
                 if not poster_assets_dir or not img_base_dir:
                     logging.error(f"Missing critical paths for poster generation (poster_assets_base_dir or image_base_dir) in config. Skipping posters for topic {topic_index}.")
                     continue # 跳过此主题的海报生成
                 # --- 结束读取参数 ---

                 with open(poster_content_system_prompt, "r", encoding="utf-8") as f:
                     poster_content_system_prompt = f.read()

                 posters_attempted = generate_posters_for_topic(
                     topic_item=topic_item,
                     output_dir=config["output_dir"], # Base output dir is still needed
                     run_id=run_id,
                     topic_index=topic_index,
                     output_handler=output_handler, # <--- 传递 Output Handler
                     # 传递具体参数
                     model_name=config["model"],
                     base_url=config["api_url"],
                     api_key=config["api_key"],
                     variants=poster_variants,
                     title_possibility=title_possibility,
                     poster_assets_base_dir=poster_assets_dir,
                     image_base_dir=img_base_dir,
                     resource_dir_config=res_dir_config,
                     poster_target_size=poster_size,
                     text_possibility=txt_possibility,
                     img_frame_possibility=img_frame_possibility,
                     text_bg_possibility=text_bg_possibility,
                     output_collage_subdir=collage_subdir,
                     output_poster_subdir=poster_subdir,
                     output_poster_filename=poster_filename,
                     system_prompt=poster_content_system_prompt,
                     collage_style=collage_style,
                     timeout=request_timeout
                 )
                 if posters_attempted:
                     logging.info(f"Poster generation process completed for Topic {topic_index}.")

                     # --- 为每个变体添加额外配图 ---
                     logging.info(f"开始为主题 {topic_index} 添加额外配图...")
                     additional_images_count = config.get("additional_images_count", 3)

                     # 循环处理每个变体
                     for j in range(poster_variants):
                         variant_index = j + 1
                         variant_dir = os.path.join(config["output_dir"], run_id, f"{topic_index}_{variant_index}")

                         # 获取海报元数据路径
                         poster_dir = os.path.join(variant_dir, poster_subdir)

                         # 添加目录存在检查
                         if not os.path.exists(poster_dir):
                             logging.warning(f"海报目录不存在: {poster_dir}, 跳过此变体的额外配图处理")
                             continue

                         try:
                             metadata_files = [f for f in os.listdir(poster_dir)
                                              if f.endswith("_metadata.json") and os.path.isfile(os.path.join(poster_dir, f))]
                         except Exception as e:
                             logging.warning(f"访问海报目录时出错: {e}, 跳过此变体的额外配图处理")
                             continue

                         if metadata_files:
                             poster_metadata_path = os.path.join(poster_dir, metadata_files[0])
                             logging.info(f"为变体 {variant_index} 选择额外配图，使用元数据: {poster_metadata_path}")

                             # 调用额外配图选择函数
                             try:
                                 object_name = topic_item.get("object", "").split(".")[0].replace("景点信息-", "").strip()
                                 source_image_dir = os.path.join(img_base_dir, object_name)

                                 if os.path.exists(source_image_dir) and os.path.isdir(source_image_dir):
                                     # 获取图像选择配置参数
                                     image_selection_config = config.get("image_selection", {})
                                     variation_strength = image_selection_config.get("variation_strength", "medium")
                                     extra_effects = image_selection_config.get("extra_effects", True)

                                     logging.info(f"图像处理配置: 变化强度={variation_strength}, 额外效果={'启用' if extra_effects else '禁用'}")

                                     additional_paths = select_additional_images(
                                         run_id=run_id,
                                         topic_index=topic_index,
                                         variant_index=variant_index,
                                         poster_metadata_path=poster_metadata_path,
                                         source_image_dir=source_image_dir,
                                         num_additional_images=additional_images_count,
                                         output_handler=output_handler,
                                         variation_strength=variation_strength,
                                         extra_effects=extra_effects
                                     )

                                     if additional_paths:
                                         logging.info(f"已为变体 {variant_index} 选择 {len(additional_paths)} 张额外配图")
                                     else:
                                         logging.warning(f"未能为变体 {variant_index} 选择任何额外配图")
                                 else:
                                     logging.warning(f"源图像目录不存在: {source_image_dir}")
                             except Exception as e:
                                 logging.exception(f"选择额外配图时发生错误: {e}")
                         else:
                             logging.warning(f"未找到变体 {variant_index} 的海报元数据文件，跳过额外配图选择")

                     # --- 结束添加额外配图 ---

                     success_flag = True # Mark overall success if content AND poster attempts were made
                 else:
                     logging.warning(f"Poster generation skipped or failed early for Topic {topic_index}.")
                     # 即使海报失败，只要内容成功了，也算部分成功？根据需求决定 success_flag
                     # success_flag = True # 取决于是否认为内容成功就足够
                 # logging.warning(f"Skipping poster generation for Topic {topic_index} pending refactor and data passing strategy.")
                 # Mark overall success if content generation succeeded
                 # success_flag = True
            else:
                 logging.warning(f"Content generation failed or yielded no valid results for Topic {topic_index}. Skipping posters.")
            logging.info(f"--- Finished Topic {topic_index} ---")


    except KeyError as e:
        # print(f"\nError: Configuration error during content/poster generation: Missing key '{e}'")
        logging.error(f"Configuration error during content/poster generation: Missing key '{e}'")
        traceback.print_exc()
        return False # Indicate failure due to config error
    except Exception as e:
        # print(f"\nAn unexpected error occurred during content and poster generation: {e}")
        # traceback.print_exc()
        logging.exception("An unexpected error occurred during content and poster generation:")
        return False # Indicate general failure
    finally:
        # Ensure the AI agent is closed
        if ai_agent:
            # print("Closing content generation AI Agent...")
            logging.info("Closing content generation AI Agent...")
        ai_agent.close()

    if success_flag:
         # print("\n--- Step 2: Content and Poster Generation completed (at least partially). ---")
         logging.info("Step 2: Content and Poster Generation completed (at least partially).")
    else:
         # print("\n--- Step 2: Content and Poster Generation completed, but no content/posters were successfully generated or attempted. ---")
         logging.warning("Step 2: Content and Poster Generation completed, but no content/posters were successfully generated or attempted.")
    return success_flag # Return True if at least some steps were attempted

def main():
    # --- Basic Logging Setup ---
    logging.basicConfig(
        level=logging.INFO, # Default level
        format='%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S'
    )
    # --- End Logging Setup ---

    parser = argparse.ArgumentParser(description="Travel Content Creator Pipeline")
    parser.add_argument(
        "--config",
        type=str,
        default="poster_gen_config.json",
        help="Path to the configuration file (e.g., poster_gen_config.json)"
    )
    parser.add_argument(
        "--run_id",
        type=str,
        default=None, # Default to None, let the pipeline generate it
        help="Optional specific run ID (e.g., 'my_test_run_01'). If not provided, a timestamp-based ID will be generated."
    )
    parser.add_argument(
        "--topics_file",
        type=str,
        default=None,
        help="Optional path to a pre-generated topics JSON file. If provided, skips topic generation (Step 1)."
    )
    parser.add_argument(
        "--debug",
        action='store_true', # Add debug flag
        help="Enable debug level logging."
    )
    args = parser.parse_args()

    # --- Adjust Logging Level if Debug ---
    if args.debug:
        logging.getLogger().setLevel(logging.DEBUG)
        logging.info("Debug logging enabled.")
    # --- End Debug Level Adjustment ---

    logging.info("Starting Travel Content Creator Pipeline...")
    logging.info(f"Using configuration file: {args.config}")
    if args.run_id: logging.info(f"Using specific run_id: {args.run_id}")
    if args.topics_file: logging.info(f"Using existing topics file: {args.topics_file}")

    config = load_config(args.config)
    if config is None:
        logging.critical("Failed to load configuration. Exiting.")
        sys.exit(1)

    # --- Initialize Output Handler ---
    # For now, always use FileSystemOutputHandler. Later, this could be configurable.
    output_handler: OutputHandler = FileSystemOutputHandler(config.get("output_dir", "result"))
    logging.info(f"Using Output Handler: {output_handler.__class__.__name__}")
    # --- End Output Handler Init ---

    run_id = args.run_id
    # tweet_topic_record = None # No longer the primary way to pass data
    topics_list = None
    system_prompt = None
    user_prompt = None
    pipeline_start_time = time.time()

    # --- Step 1: Topic Generation (or Load Existing) ---
    if args.topics_file:
        logging.info(f"Skipping Topic Generation (Step 1) - Loading topics from: {args.topics_file}")
        topics_list = TopicParser.load_topics_from_json(args.topics_file)
        if topics_list:
            # Try to infer run_id from filename if not provided
            if not run_id:
                try:
                    base = os.path.basename(args.topics_file)
                    # Assuming format "tweet_topic_{run_id}.json" or "tweet_topic.json"
                    if base.startswith("tweet_topic_") and base.endswith(".json"):
                        run_id = base[len("tweet_topic_"): -len(".json")]
                        # print(f"Inferred run_id from topics filename: {run_id}")
                        logging.info(f"Inferred run_id from topics filename: {run_id}")
                    elif base == "tweet_topic.json": # Handle the default name?
                        # Decide how to handle this - maybe use parent dir name or generate new?
                        # For now, let's generate a new one if run_id is still None below
                        logging.warning(f"Loaded topics from default filename '{base}'. Run ID not inferred.")
                    else:
                         # print(f"Warning: Could not infer run_id from topics filename: {base}")
                         logging.warning(f"Could not infer run_id from topics filename: {base}")
                except Exception as e:
                     # print(f"Warning: Error trying to infer run_id from topics filename: {e}")
                     logging.warning(f"Error trying to infer run_id from topics filename: {e}")

            # If run_id is still None after trying to infer, generate one
            if run_id is None:
                 run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_loaded")
                 # print(f"Generated run_id for loaded topics: {run_id}")
                 logging.info(f"Generated run_id for loaded topics: {run_id}")

            # Prompts are missing when loading from file, handle this if needed later
            system_prompt = "" # Placeholder
            user_prompt = "" # Placeholder
            logging.info(f"Successfully loaded {len(topics_list)} topics for run_id: {run_id}. Prompts are not available.")
            # Optionally, save the loaded topics using the handler?
            # output_handler.handle_topic_results(run_id, topics_list, system_prompt, user_prompt)
        else:
            # print(f"Error: Failed to load topics from {args.topics_file}. Cannot proceed.")
            logging.error(f"Failed to load topics from {args.topics_file}. Cannot proceed.")
            sys.exit(1)
    else:
        # print("--- Executing Topic Generation (Step 1) ---")
        logging.info("Executing Topic Generation (Step 1)...")
        step1_start = time.time()
        # Call the updated function, receive raw data
        run_id, topics_list, system_prompt, user_prompt = run_topic_generation_pipeline(config, args.run_id)
        step1_end = time.time()
        if run_id is not None and topics_list is not None: # Check if step succeeded
            # print(f"Step 1 completed successfully in {step1_end - step1_start:.2f} seconds. Run ID: {run_id}")
            logging.info(f"Step 1 completed successfully in {step1_end - step1_start:.2f} seconds. Run ID: {run_id}")
            # --- Use Output Handler to save results ---
            output_handler.handle_topic_results(run_id, topics_list, system_prompt, user_prompt)
        else:
            # print("Critical: Topic Generation (Step 1) failed. Exiting.")
            logging.critical("Topic Generation (Step 1) failed. Exiting.")
            sys.exit(1)

    # --- Step 2: Content & Poster Generation ---
    if run_id is not None and topics_list is not None:
        # print("\n--- Executing Content and Poster Generation (Step 2) ---")
        logging.info("Executing Content and Poster Generation (Step 2)...")
        step2_start = time.time()
        # TODO: Refactor generate_content_and_posters_step to accept topics_list
        # and use the output_handler instead of saving files directly.
        # For now, we might need to pass topics_list and handler, or adapt it.
        # Let's tentatively adapt the call signature, assuming the function will be refactored.
        success = generate_content_and_posters_step(config, run_id, topics_list, output_handler)
        step2_end = time.time()
        if success:
            # print(f"Step 2 completed in {step2_end - step2_start:.2f} seconds.")
            logging.info(f"Step 2 completed in {step2_end - step2_start:.2f} seconds.")
        else:
            # print("Warning: Step 2 finished, but may have encountered errors or generated no output.")
            logging.warning("Step 2 finished, but may have encountered errors or generated no output.")
    else:
        # print("Error: Cannot proceed to Step 2: Invalid run_id or topics_list from Step 1.")
        logging.error("Cannot proceed to Step 2: Invalid run_id or topics_list from Step 1.")

    # --- Finalize Output ---
    if run_id:
        output_handler.finalize(run_id)
    # --- End Finalize ---

    pipeline_end_time = time.time()
    # print(f"\nPipeline finished. Total execution time: {pipeline_end_time - pipeline_start_time:.2f} seconds.")
    logging.info(f"Pipeline finished. Total execution time: {pipeline_end_time - pipeline_start_time:.2f} seconds.")
    # print(f"Results for run_id '{run_id}' are in: {os.path.join(config.get('output_dir', 'result'), run_id)}")
    logging.info(f"Results for run_id '{run_id}' are in: {os.path.join(config.get('output_dir', 'result'), run_id)}")

if __name__ == "__main__":
    main()