增加了图片目录模糊匹配

This commit is contained in:
jinye_huang 2025-04-27 10:38:30 +08:00
parent 21f3c15a32
commit 568400aa32
4 changed files with 116 additions and 10 deletions

View File

@ -1,7 +1,7 @@
{ {
"date": "4月29日,4月30日, 4月28日, 5月1日", "date": "4月29日,4月30日, 4月28日, 5月1日",
"num": 10, "num": 5,
"variants": 5, "variants": 1,
"topic_temperature": 0.2, "topic_temperature": 0.2,
"topic_top_p": 0.3, "topic_top_p": 0.3,
"topic_presence_penalty": 1.5, "topic_presence_penalty": 1.5,
@ -42,13 +42,13 @@
{ {
"type": "Object", "type": "Object",
"file_path": [ "file_path": [
"./resource/Object/笔架山居森林度假酒店.txt" "./resource/Object/美的鹭湖鹭栖台酒店.txt"
] ]
}, },
{ {
"type": "Description", "type": "Description",
"file_path": [ "file_path": [
"./resource/Object/笔架山居森林度假酒店.txt" "./resource/Object/美的鹭湖鹭栖台酒店.txt"
] ]
}, },
{ {

View File

@ -10,6 +10,7 @@ from datetime import datetime
import sys import sys
import traceback import traceback
import logging # Add logging import logging # Add logging
import re
# sys.path.append('/root/autodl-tmp') # No longer needed if running as a module or if path is set correctly # sys.path.append('/root/autodl-tmp') # No longer needed if running as a module or if path is set correctly
# 从本地模块导入 # 从本地模块导入
# from TravelContentCreator.core.ai_agent import AI_Agent # Remove project name prefix # from TravelContentCreator.core.ai_agent import AI_Agent # Remove project name prefix
@ -548,13 +549,88 @@ def generate_posters_for_topic(topic_item: dict,
# Construct and check INPUT image paths # Construct and check INPUT image paths
input_img_dir_path = os.path.join(image_base_dir, object_name) input_img_dir_path = os.path.join(image_base_dir, object_name)
if not os.path.exists(input_img_dir_path) or not os.path.isdir(input_img_dir_path): if not os.path.exists(input_img_dir_path) or not os.path.isdir(input_img_dir_path):
logging.warning(f"Warning: Modify Image directory not found or not a directory: '{input_img_dir_path}'. Skipping posters for this topic.") # 模糊匹配:如果找不到完全匹配的目录,尝试查找包含关键词的目录
return False logging.info(f"尝试对图片目录进行模糊匹配: {object_name}")
found_dir = None
# 1. 尝试获取image_base_dir下的所有目录
try:
all_dirs = [d for d in os.listdir(image_base_dir)
if os.path.isdir(os.path.join(image_base_dir, d))]
logging.info(f"找到 {len(all_dirs)} 个图片目录可用于模糊匹配")
# 2. 提取对象名称中的关键词
# 例如:"美的鹭湖鹭栖台酒店+盈香心动乐园" -> ["美的", "鹭湖", "酒店", "乐园"]
# 首先通过常见分隔符分割(+、空格、_、-等)
parts = re.split(r'[+\s_\-]', object_name)
keywords = []
for part in parts:
# 只保留长度大于1的有意义关键词
if len(part) > 1:
keywords.append(part)
# 尝试匹配更短的语义单元例如中文的2-3个字的词语
# 对于中文名称可以尝试提取2-3个字的短语
for i in range(len(object_name) - 1):
keyword = object_name[i:i+2] # 提取2个字符
if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
keywords.append(keyword)
# 3. 对每个目录进行评分
dir_scores = {}
for directory in all_dirs:
score = 0
dir_lower = directory.lower()
# 为每个匹配的关键词增加分数
for keyword in keywords:
if keyword.lower() in dir_lower:
score += 1
# 如果得分大于0至少匹配一个关键词记录该目录
if score > 0:
dir_scores[directory] = score
# 4. 选择得分最高的目录
if dir_scores:
best_match = max(dir_scores.items(), key=lambda x: x[1])
found_dir = best_match[0]
logging.info(f"模糊匹配成功!匹配目录: {found_dir},匹配分数: {best_match[1]}")
# 更新图片目录路径
input_img_dir_path = os.path.join(image_base_dir, found_dir)
logging.info(f"使用模糊匹配的图片目录: {input_img_dir_path}")
else:
logging.warning(f"模糊匹配未找到任何包含关键词的目录")
except Exception as e:
logging.warning(f"模糊匹配过程中出错: {e}")
# 如果仍然无法找到有效目录,则返回错误
if not found_dir or not os.path.exists(input_img_dir_path) or not os.path.isdir(input_img_dir_path):
logging.warning(f"Warning: 即使通过模糊匹配也无法找到图片目录: '{input_img_dir_path}'. Skipping posters for this topic.")
return False
# Locate Description File using resource_dir_config parameter # Locate Description File using resource_dir_config parameter
info_directory = [] info_directory = []
description_file_path = None description_file_path = None
found_description = False found_description = False
# 准备关键词列表用于模糊匹配
# 与上面图片目录匹配类似,提取对象名称的关键词
parts = re.split(r'[+\s_\-]', object_name)
keywords = []
for part in parts:
if len(part) > 1:
keywords.append(part)
# 尝试提取中文短语作为关键词
for i in range(len(object_name) - 1):
keyword = object_name[i:i+2]
if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
keywords.append(keyword)
logging.info(f"用于描述文件模糊匹配的关键词: {keywords}")
# 尝试精确匹配
for dir_info in resource_dir_config: for dir_info in resource_dir_config:
if dir_info.get("type") == "Description": if dir_info.get("type") == "Description":
for file_path in dir_info.get("file_path", []): for file_path in dir_info.get("file_path", []):
@ -562,15 +638,45 @@ def generate_posters_for_topic(topic_item: dict,
description_file_path = file_path description_file_path = file_path
if os.path.exists(description_file_path): if os.path.exists(description_file_path):
info_directory = [description_file_path] info_directory = [description_file_path]
logging.info(f"Found and using description file from config: {description_file_path}") logging.info(f"找到并使用精确匹配的描述文件: {description_file_path}")
found_description = True found_description = True
else: else:
logging.warning(f"Warning: Description file specified in config not found: {description_file_path}") logging.warning(f"Warning: 配置中指定的描述文件未找到: {description_file_path}")
break break
if found_description: if found_description:
break break
# 如果精确匹配失败,尝试模糊匹配
if not found_description: if not found_description:
logging.info(f"Warning: No matching description file found for object '{object_name}' in config resource_dir (type='Description').") logging.info(f"未找到'{object_name}'的精确匹配描述文件,尝试模糊匹配...")
best_score = 0
best_file = None
for dir_info in resource_dir_config:
if dir_info.get("type") == "Description":
for file_path in dir_info.get("file_path", []):
file_name = os.path.basename(file_path)
score = 0
# 计算关键词匹配分数
for keyword in keywords:
if keyword.lower() in file_name.lower():
score += 1
# 如果当前文件得分更高,更新最佳匹配
if score > best_score and os.path.exists(file_path):
best_score = score
best_file = file_path
# 如果找到了最佳匹配文件
if best_file:
description_file_path = best_file
info_directory = [description_file_path]
logging.info(f"模糊匹配找到描述文件: {description_file_path},匹配分数: {best_score}")
found_description = True
if not found_description:
logging.warning(f"未找到对象'{object_name}'的匹配描述文件。")
# Generate Text Configurations for All Variants # Generate Text Configurations for All Variants
try: try: