增加了图片目录模糊匹配

2025-04-27 10:38:30 +08:00 · 2025-04-27 10:38:30 +08:00 · 568400aa32
commit 568400aa32
parent 21f3c15a32
4 changed files with 116 additions and 10 deletions
--- a/poster_gen_config.json
+++ b/poster_gen_config.json
@ -1,7 +1,7 @@
 {
    "date": "4月29日,4月30日, 4月28日, 5月1日",
-    "num": 10,
+    "num": 5,
-    "variants": 5,
+    "variants": 1,
    "topic_temperature": 0.2,
    "topic_top_p": 0.3,
    "topic_presence_penalty": 1.5,
@ -42,13 +42,13 @@
        {
            "type": "Object",
            "file_path": [
-                "./resource/Object/笔架山居森林度假酒店.txt"
+                "./resource/Object/美的鹭湖鹭栖台酒店.txt"
            ]
        },
        {
            "type": "Description",
            "file_path": [
-                "./resource/Object/笔架山居森林度假酒店.txt"
+                "./resource/Object/美的鹭湖鹭栖台酒店.txt"
            ]
        },
        {
--- a/utils/pycache/content_generator.cpython-312.pyc
+++ b/utils/pycache/content_generator.cpython-312.pyc
--- a/utils/pycache/tweet_generator.cpython-312.pyc
+++ b/utils/pycache/tweet_generator.cpython-312.pyc
--- a/utils/tweet_generator.py
+++ b/utils/tweet_generator.py
@ -10,6 +10,7 @@ from datetime import datetime
 import sys
 import traceback
 import logging # Add logging
 import re
 # sys.path.append('/root/autodl-tmp') # No longer needed if running as a module or if path is set correctly
 # 从本地模块导入
 # from TravelContentCreator.core.ai_agent import AI_Agent # Remove project name prefix
@ -548,13 +549,88 @@ def generate_posters_for_topic(topic_item: dict,
    # Construct and check INPUT image paths
    input_img_dir_path = os.path.join(image_base_dir, object_name)
    if not os.path.exists(input_img_dir_path) or not os.path.isdir(input_img_dir_path):
-        logging.warning(f"Warning: Modify Image directory not found or not a directory: '{input_img_dir_path}'. Skipping posters for this topic.")
+        # 模糊匹配：如果找不到完全匹配的目录，尝试查找包含关键词的目录
-        return False
+        logging.info(f"尝试对图片目录进行模糊匹配: {object_name}")
        found_dir = None
        # 1. 尝试获取image_base_dir下的所有目录
        try:
            all_dirs = [d for d in os.listdir(image_base_dir) 
                       if os.path.isdir(os.path.join(image_base_dir, d))]
            logging.info(f"找到 {len(all_dirs)} 个图片目录可用于模糊匹配")
            # 2. 提取对象名称中的关键词
            # 例如："美的鹭湖鹭栖台酒店+盈香心动乐园" -> ["美的", "鹭湖", "酒店", "乐园"]
            # 首先通过常见分隔符分割（+、空格、_、-等）
            parts = re.split(r'[+\s_\-]', object_name)
            keywords = []
            for part in parts:
                # 只保留长度大于1的有意义关键词
                if len(part) > 1:
                    keywords.append(part)
            # 尝试匹配更短的语义单元（例如中文的2-3个字的词语）
            # 对于中文名称，可以尝试提取2-3个字的短语
            for i in range(len(object_name) - 1):
                keyword = object_name[i:i+2]  # 提取2个字符
                if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
                    keywords.append(keyword)
            # 3. 对每个目录进行评分
            dir_scores = {}
            for directory in all_dirs:
                score = 0
                dir_lower = directory.lower()
                # 为每个匹配的关键词增加分数
                for keyword in keywords:
                    if keyword.lower() in dir_lower:
                        score += 1
                # 如果得分大于0（至少匹配一个关键词），记录该目录
                if score > 0:
                    dir_scores[directory] = score
            # 4. 选择得分最高的目录
            if dir_scores:
                best_match = max(dir_scores.items(), key=lambda x: x[1])
                found_dir = best_match[0]
                logging.info(f"模糊匹配成功！匹配目录: {found_dir}，匹配分数: {best_match[1]}")
                # 更新图片目录路径
                input_img_dir_path = os.path.join(image_base_dir, found_dir)
                logging.info(f"使用模糊匹配的图片目录: {input_img_dir_path}")
            else:
                logging.warning(f"模糊匹配未找到任何包含关键词的目录")
        except Exception as e:
            logging.warning(f"模糊匹配过程中出错: {e}")
        # 如果仍然无法找到有效目录，则返回错误
        if not found_dir or not os.path.exists(input_img_dir_path) or not os.path.isdir(input_img_dir_path):
            logging.warning(f"Warning: 即使通过模糊匹配也无法找到图片目录: '{input_img_dir_path}'. Skipping posters for this topic.")
            return False
    # Locate Description File using resource_dir_config parameter
    info_directory = []
    description_file_path = None
    found_description = False
    # 准备关键词列表用于模糊匹配
    # 与上面图片目录匹配类似，提取对象名称的关键词
    parts = re.split(r'[+\s_\-]', object_name)
    keywords = []
    for part in parts:
        if len(part) > 1:
            keywords.append(part)
    # 尝试提取中文短语作为关键词
    for i in range(len(object_name) - 1):
        keyword = object_name[i:i+2]
        if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
            keywords.append(keyword)
    logging.info(f"用于描述文件模糊匹配的关键词: {keywords}")
    # 尝试精确匹配
    for dir_info in resource_dir_config:
        if dir_info.get("type") == "Description":
            for file_path in dir_info.get("file_path", []):
@ -562,15 +638,45 @@ def generate_posters_for_topic(topic_item: dict,
                    description_file_path = file_path
                    if os.path.exists(description_file_path):
                        info_directory = [description_file_path]
-                        logging.info(f"Found and using description file from config: {description_file_path}")
+                        logging.info(f"找到并使用精确匹配的描述文件: {description_file_path}")
                        found_description = True
                    else:
-                        logging.warning(f"Warning: Description file specified in config not found: {description_file_path}")
+                        logging.warning(f"Warning: 配置中指定的描述文件未找到: {description_file_path}")
                    break
            if found_description:
-                break 
+                break
    # 如果精确匹配失败，尝试模糊匹配
    if not found_description:
-        logging.info(f"Warning: No matching description file found for object '{object_name}' in config resource_dir (type='Description').")
+        logging.info(f"未找到'{object_name}'的精确匹配描述文件，尝试模糊匹配...")
        best_score = 0
        best_file = None
        for dir_info in resource_dir_config:
            if dir_info.get("type") == "Description":
                for file_path in dir_info.get("file_path", []):
                    file_name = os.path.basename(file_path)
                    score = 0
                    # 计算关键词匹配分数
                    for keyword in keywords:
                        if keyword.lower() in file_name.lower():
                            score += 1
                    # 如果当前文件得分更高，更新最佳匹配
                    if score > best_score and os.path.exists(file_path):
                        best_score = score
                        best_file = file_path
        # 如果找到了最佳匹配文件
        if best_file:
            description_file_path = best_file
            info_directory = [description_file_path]
            logging.info(f"模糊匹配找到描述文件: {description_file_path}，匹配分数: {best_score}")
            found_description = True
    if not found_description:
        logging.warning(f"未找到对象'{object_name}'的匹配描述文件。")
    # Generate Text Configurations for All Variants
    try: