选题管理器模块增加了模糊匹配功能

This commit is contained in:
jinye_huang 2025-04-27 15:57:59 +08:00
parent 5a3c638a3f
commit d9f37d6816
4 changed files with 267 additions and 275 deletions

View File

@ -1,7 +1,7 @@
{
"date": "4月29日,4月30日, 4月28日, 5月1日",
"num": 15,
"variants": 15,
"num": 1,
"variants": 2,
"topic_temperature": 0.2,
"topic_top_p": 0.3,
"topic_presence_penalty": 1.5,

View File

@ -1,3 +1,4 @@
主体:美的鹭湖鹭栖台酒店
【佛山·盈香心动乐园X美的鹭湖鹭栖台酒店】周末不加收浪漫四季花海萌宠乐园惬意度假~50+机动游戏项目全体验399元美的鹭栖台高级房+大阳台观景+2大2小两天无限次盈香生态园门票+童话牧场、森林剧场、无动力乐园、机动乐园畅玩加100元还可升级全新网红项目芭比飞车、飞毯上山全包
【价格】399 元r

View File

@ -7,6 +7,7 @@ Manages the construction of prompts for different AI generation tasks.
import os
import traceback
import logging # Add logging
import re # 添加正则表达式支持
from .resource_loader import ResourceLoader # Use relative import within the same package
class PromptManager:
@ -107,6 +108,89 @@ class PromptManager:
if content:
self._refer_cache[filename] = content
def find_directory_fuzzy_match(self, name, directory=None, files=None):
"""
对文件名进行模糊匹配查找最匹配目标名称的文件
Args:
name: 目标名称
directory: 目录路径如果提供则从目录中读取文件列表
files: 文件名列表如果提供则直接使用
Returns:
tuple: (最佳匹配文件名, 匹配分数) 如果没有匹配则返回 (None, 0)
"""
logging.info(f"尝试对文件名进行模糊匹配: {name}")
try:
# 准备文件列表
all_files = []
if files:
all_files = files
elif directory and os.path.isdir(directory):
all_files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
if not all_files:
logging.warning(f"没有可用于匹配的文件")
return None, 0
logging.info(f"找到 {len(all_files)} 个文件可用于模糊匹配")
# 从名称中提取关键词
# 通过常见分隔符分割(+、空格、_、-等)
parts = re.split(r'[+\s_\-]', name)
keywords = []
for part in parts:
# 只保留长度大于1的有意义关键词
if len(part) > 1:
keywords.append(part)
# 尝试匹配更短的语义单元例如中文的2-3个字的词语
for i in range(len(name) - 1):
keyword = name[i:i+2] # 提取2个字符
if len(keyword) == 2 and all('\u4e00' <= c <= '\u9fff' for c in keyword):
keywords.append(keyword)
logging.info(f"用于文件模糊匹配的关键词: {keywords}")
# 对每个文件进行评分
file_scores = {}
for filename in all_files:
score = 0
file_lower = filename.lower()
# 精确匹配,去掉扩展名比较
name_without_ext = os.path.splitext(name)[0].lower()
file_without_ext = os.path.splitext(filename)[0].lower()
if name_without_ext == file_without_ext:
# 精确匹配给高分
score += 10
else:
# 为每个匹配的关键词增加分数
for keyword in keywords:
if keyword.lower() in file_lower:
score += 1
# 如果得分大于0至少有匹配记录该文件
if score > 0:
file_scores[filename] = score
# 选择得分最高的文件
if file_scores:
best_match = max(file_scores.items(), key=lambda x: x[1])
found_file = best_match[0]
score = best_match[1]
logging.info(f"模糊匹配成功!匹配文件: {found_file},匹配分数: {score}")
return found_file, score
else:
logging.warning(f"模糊匹配未找到任何包含关键词的文件")
return None, 0
except Exception as e:
logging.exception(f"文件模糊匹配过程中出错: {e}")
return None, 0
def _get_style_content(self, style_name):
"""获取Style文件内容优先从缓存获取如果不存在则尝试从目录加载"""
# 首先检查缓存
@ -121,35 +205,18 @@ class PromptManager:
style_name = os.path.splitext(style_name)[0] # 移除扩展名
# 尝试模糊匹配缓存中的文件名
for cache_key in self._style_cache.keys():
cache_key_lower = cache_key.lower()
style_name_lower = style_name.lower()
cache_files = list(self._style_cache.keys())
matched_key, score = self.find_directory_fuzzy_match(style_name, files=cache_files)
# 完全匹配
if cache_key_lower == style_name_lower:
return self._style_cache[cache_key]
# 部分匹配
# 攻略风格
if ("攻略" in style_name_lower or "干货" in style_name_lower) and "攻略" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Style文件: '{cache_key}' 匹配 '{style_name}'")
return self._style_cache[cache_key]
# 轻奢风格
if "轻奢" in style_name_lower and "轻奢" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Style文件: '{cache_key}' 匹配 '{style_name}'")
return self._style_cache[cache_key]
# 推荐风格
if ("推荐" in style_name_lower or "种草" in style_name_lower) and "推荐" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Style文件: '{cache_key}' 匹配 '{style_name}'")
return self._style_cache[cache_key]
# 美食风格
if "美食" in style_name_lower and "美食" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Style文件: '{cache_key}' 匹配 '{style_name}'")
return self._style_cache[cache_key]
if matched_key and score > 0:
return self._style_cache[matched_key]
# 如果没有在缓存中找到模糊匹配尝试从prompts_dir加载
if self.prompts_dir:
style_path = os.path.join(self.prompts_dir, "Style", style_file)
style_dir = os.path.join(self.prompts_dir, "Style")
style_path = os.path.join(style_dir, style_file)
# 精确路径匹配
if os.path.exists(style_path):
content = ResourceLoader.load_file_content(style_path)
if content:
@ -158,38 +225,21 @@ class PromptManager:
self._style_cache[style_file] = content
return content
# 如果直接加载失败,尝试列出目录中的所有文件并进行模糊匹配
style_dir = os.path.join(self.prompts_dir, "Style")
# 如果精确匹配失败,尝试目录中的模糊匹配
if os.path.isdir(style_dir):
try:
files = os.listdir(style_dir)
style_name_lower = style_name.lower()
for file in files:
file_lower = file.lower()
# 检查关键词匹配
matched = False
if ("攻略" in style_name_lower or "干货" in style_name_lower) and "攻略" in file_lower:
matched = True
elif "轻奢" in style_name_lower and "轻奢" in file_lower:
matched = True
elif ("推荐" in style_name_lower or "种草" in style_name_lower) and "推荐" in file_lower:
matched = True
elif "美食" in style_name_lower and "美食" in file_lower:
matched = True
if matched:
matched_path = os.path.join(style_dir, file)
logging.info(f"模糊匹配 - 在目录中找到部分匹配的Style文件: '{file}' 匹配 '{style_name}'")
matched_file, score = self.find_directory_fuzzy_match(style_name, directory=style_dir)
if matched_file and score > 0:
matched_path = os.path.join(style_dir, matched_file)
content = ResourceLoader.load_file_content(matched_path)
if content:
# 保存到缓存
self._style_cache[style_name] = content
self._style_cache[file] = content
self._style_cache[matched_file] = content
file_without_ext = os.path.splitext(matched_file)[0]
self._style_cache[file_without_ext] = content # 同时缓存不带扩展名的版本
return content
except Exception as e:
logging.warning(f"尝试列出Style目录内容时出错: {e}")
logging.warning(f"未能找到Style文件: '{style_name}',尝试过以下位置: 缓存, {self.prompts_dir}/Style/")
return None
def _get_demand_content(self, demand_name):
@ -206,41 +256,18 @@ class PromptManager:
demand_name = os.path.splitext(demand_name)[0] # 移除扩展名
# 尝试模糊匹配缓存中的文件名
for cache_key in self._demand_cache.keys():
cache_key_lower = cache_key.lower()
demand_name_lower = demand_name.lower()
cache_files = list(self._demand_cache.keys())
matched_key, score = self.find_directory_fuzzy_match(demand_name, files=cache_files)
# 完全匹配
if cache_key_lower == demand_name_lower:
return self._demand_cache[cache_key]
# 部分匹配:检查需求名称是否是缓存键的一部分,或者缓存键是否是需求名称的一部分
# 例如"亲子家庭文旅需求"能匹配到"亲子向文旅需求"
if "亲子" in demand_name_lower and "亲子" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Demand文件: '{cache_key}' 匹配 '{demand_name}'")
return self._demand_cache[cache_key]
if "情侣" in demand_name_lower and "情侣" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Demand文件: '{cache_key}' 匹配 '{demand_name}'")
return self._demand_cache[cache_key]
if "职场" in demand_name_lower and "职场" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Demand文件: '{cache_key}' 匹配 '{demand_name}'")
return self._demand_cache[cache_key]
if "学生" in demand_name_lower and "学生" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Demand文件: '{cache_key}' 匹配 '{demand_name}'")
return self._demand_cache[cache_key]
if "银发" in demand_name_lower and "夕阳红" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Demand文件: '{cache_key}' 匹配 '{demand_name}'")
return self._demand_cache[cache_key]
if "夕阳红" in demand_name_lower and "银发" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Demand文件: '{cache_key}' 匹配 '{demand_name}'")
return self._demand_cache[cache_key]
if "周边" in demand_name_lower and "周边" in cache_key_lower:
logging.info(f"模糊匹配 - 找到部分匹配的Demand文件: '{cache_key}' 匹配 '{demand_name}'")
return self._demand_cache[cache_key]
if matched_key and score > 0:
return self._demand_cache[matched_key]
# 如果没有在缓存中找到模糊匹配尝试从prompts_dir加载向后兼容
if self.prompts_dir:
demand_path = os.path.join(self.prompts_dir, "Demand", demand_file)
demand_dir = os.path.join(self.prompts_dir, "Demand")
demand_path = os.path.join(demand_dir, demand_file)
# 精确路径匹配
if os.path.exists(demand_path):
content = ResourceLoader.load_file_content(demand_path)
if content:
@ -249,41 +276,19 @@ class PromptManager:
self._demand_cache[demand_file] = content
return content
# 如果直接加载失败,尝试列出目录中的所有文件并进行模糊匹配
demand_dir = os.path.join(self.prompts_dir, "Demand")
# 如果精确匹配失败,尝试目录中的模糊匹配
if os.path.isdir(demand_dir):
try:
files = os.listdir(demand_dir)
demand_name_lower = demand_name.lower()
for file in files:
file_lower = file.lower()
# 检查关键词匹配
matched = False
if "亲子" in demand_name_lower and "亲子" in file_lower:
matched = True
elif "情侣" in demand_name_lower and "情侣" in file_lower:
matched = True
elif "职场" in demand_name_lower and "职场" in file_lower:
matched = True
elif "学生" in demand_name_lower and "学生" in file_lower:
matched = True
elif ("银发" in demand_name_lower or "夕阳红" in demand_name_lower) and ("银发" in file_lower or "夕阳红" in file_lower):
matched = True
elif "周边" in demand_name_lower and "周边" in file_lower:
matched = True
if matched:
matched_path = os.path.join(demand_dir, file)
logging.info(f"模糊匹配 - 在目录中找到部分匹配的Demand文件: '{file}' 匹配 '{demand_name}'")
matched_file, score = self.find_directory_fuzzy_match(demand_name, directory=demand_dir)
if matched_file and score > 0:
matched_path = os.path.join(demand_dir, matched_file)
content = ResourceLoader.load_file_content(matched_path)
if content:
# 保存到缓存
self._demand_cache[demand_name] = content
self._demand_cache[file] = content
self._demand_cache[matched_file] = content
file_without_ext = os.path.splitext(matched_file)[0]
self._demand_cache[file_without_ext] = content # 同时缓存不带扩展名的版本
return content
except Exception as e:
logging.warning(f"尝试列出Demand目录内容时出错: {e}")
# 如果所有尝试都失败
logging.warning(f"未能找到Demand文件: '{demand_name}',尝试过以下位置: 缓存, {self.prompts_dir}/Demand/")
@ -415,15 +420,17 @@ class PromptManager:
return None, None
def get_content_prompts(self, topic_item):
"""Constructs the system and user prompts for content generation based on a topic item."""
logging.info(f"Constructing content prompts for topic: {topic_item.get('object', 'N/A')}...")
"""Constructs the system and user prompts for content generation."""
logging.info("Constructing prompts for content generation...")
try:
# --- System Prompt ---
system_prompt = self._system_prompt_cache.get("content")
if not system_prompt:
if not self.content_system_prompt_path:
logging.error("Content system prompt path not provided during PromptManager initialization.")
logging.error("Content system prompt path not provided.")
return None, None
system_prompt = ResourceLoader.load_file_content(self.content_system_prompt_path)
if system_prompt:
self._system_prompt_cache["content"] = system_prompt
@ -432,154 +439,138 @@ class PromptManager:
return None, None
# --- User Prompt ---
user_prompt = ""
style = ""
demand = ""
refers = ""
object_info = ""
# 1. 添加Demand部分 (直接使用 topic_item['logic'] 的描述性文本)
try:
demand_description = topic_item.get('logic')
if demand_description:
user_prompt += f"Demand Logic:\n{demand_description}\n"
else:
logging.warning("Warning: 'logic' key missing or empty in topic_item for Demand prompt.")
except Exception as e:
logging.exception("Error processing Demand description:")
# 2. Object Info - 先列出所有可用文件,再注入匹配文件的内容
try:
object_name_from_topic = topic_item.get('object') # e.g., "尚书第建筑群"
object_file_basenames = []
matched_object_file_path = None
matched_object_basename = None
# 遍历查找 Object 文件
for dir_info in self.resource_dir_config:
if dir_info.get("type") == "Object":
for file_path in dir_info.get("file_path", []):
basename = os.path.basename(file_path)
object_file_basenames.append(basename)
# 尝试匹配当前 topic 的 object (仅当尚未找到匹配时)
if object_name_from_topic and not matched_object_file_path:
cleaned_resource_name = basename
if cleaned_resource_name.startswith("景点信息-"):
cleaned_resource_name = cleaned_resource_name[len("景点信息-"):]
if cleaned_resource_name.endswith(".txt"):
cleaned_resource_name = cleaned_resource_name[:-len(".txt")]
if cleaned_resource_name and cleaned_resource_name in object_name_from_topic:
matched_object_file_path = file_path
matched_object_basename = basename
# 注意:这里不 break继续收集所有文件名
# 构建提示词 - Part 1: 文件列表
if object_file_basenames:
user_prompt += "Object信息:\n"
# user_prompt += f"{object_file_basenames}\n\n" # 直接打印列表可能不够清晰
for fname in object_file_basenames:
user_prompt += f"- {fname}\n"
user_prompt += "\n" # 加一个空行
logging.info(f"Listed {len(object_file_basenames)} available object files.")
else:
logging.warning("No resource directory entry found with type 'Object', or it has no file paths.")
# 构建提示词 - Part 2: 注入匹配文件内容
if matched_object_file_path:
logging.info(f"Attempting to load content for matched object file: {matched_object_basename}")
matched_object_content = ResourceLoader.load_file_content(matched_object_file_path)
if matched_object_content:
user_prompt += f"{matched_object_basename}\n{matched_object_content}\n\n"
logging.info(f"Successfully loaded and injected content for: {matched_object_basename}")
else:
logging.warning(f"Object file matched ({matched_object_basename}) but could not be loaded or is empty.")
elif object_name_from_topic: # 只有当 topic 中指定了 object 但没找到匹配文件时才警告
logging.warning(f"Could not find a matching Object resource file to inject content for '{object_name_from_topic}'. Only the list of files was provided.")
except KeyError:
logging.warning("Warning: 'object' key potentially missing in topic_item.")
except Exception as e:
logging.exception("Error processing Object prompt section:")
# 3. 添加Product信息 (if applicable)
try:
product_name = topic_item.get('product')
product_logic_description = topic_item.get('product_logic') # Directly use this description
if product_name:
# Add Product Logic description first (if available)
if product_logic_description:
user_prompt += f"Product Logic:\n{product_logic_description}\n"
else:
logging.warning(f"Warning: 'product_logic' key missing or empty for product '{product_name}'.")
# Then, load Product Info file
product_file_path = None
for dir_info in self.resource_dir_config:
if dir_info.get("type") == "Product":
for file_path in dir_info.get("file_path", []):
if product_name in os.path.basename(file_path):
product_file_path = file_path
break
if product_file_path: break
if product_file_path:
product_content = ResourceLoader.load_file_content(product_file_path)
if product_content:
user_prompt += f"Product Info:\n{product_content}\n"
else:
logging.warning(f"Product file could not be loaded: {product_file_path}")
else:
logging.warning(f"Product file path not found in config for: {product_name}")
except KeyError:
logging.warning("Warning: Missing 'product' key in topic_item for Product prompt.")
except Exception as e:
logging.exception("Error processing Product prompt:")
# 4. 添加Style信息 (加载文件 based on topic_item['style'])
try:
style_name = topic_item.get('style')
if style_name:
# Extract style from topic
if "style" in topic_item and topic_item["style"]:
style_name = topic_item["style"]
style_content = self._get_style_content(style_name)
if style_content:
user_prompt += f"Style Info:\n{style_content}\n"
style = f"Style: {style_name}\n{style_content}\n\n"
else:
logging.warning(f"Style file not found or empty for: {style_name}")
else:
logging.warning("Warning: 'style' key missing or empty in topic_item.")
except Exception as e:
logging.exception("Error processing Style prompt:")
logging.warning(f"Style content for '{style_name}' not found.")
# 5. 添加Target Audience信息 (加载文件 based on topic_item['target_audience'])
try:
target_audience_name = topic_item.get('target_audience')
if target_audience_name:
target_audience_content = self._get_demand_content(target_audience_name)
if target_audience_content:
user_prompt += f"Target Audience Info:\n{target_audience_content}\n"
# Extract demand from topic
if "target_audience" in topic_item and topic_item["target_audience"]:
demand_name = topic_item["target_audience"]
demand_content = self._get_demand_content(demand_name)
if demand_content:
demand = f"Demand: {demand_name}\n{demand_content}\n\n"
else:
logging.warning(f"Target Audience file not found or empty for: {target_audience_name}")
else:
logging.warning("Warning: 'target_audience' key missing or empty in topic_item.")
except Exception as e:
logging.exception("Error processing Target Audience prompt:")
logging.warning(f"Demand content for '{demand_name}' not found.")
# 6. 添加Refer信息 (加载所有Refer文件的内容)
try:
refer_content_all = self._get_all_refer_contents()
if refer_content_all:
user_prompt += f"Refer Info:\n{refer_content_all}"
else:
logging.warning("No content loaded from Refer files.")
except Exception as e:
logging.exception("Error processing Refer files:")
# Add refer contents
refers_content = self._get_all_refer_contents()
if refers_content:
refers = f"Reference:\n{refers_content}\n\n"
# --- End of prompt construction logic ---
# Get object information
object_name = topic_item.get("object", "")
if object_name:
# 优化遍历resource_dir_config查找对象描述
found_object_info = False
# 1. 搜集所有可能的资源文件
all_description_files = []
for dir_info in self.resource_dir_config:
if dir_info.get("type") in ["Object", "Description"]:
all_description_files.extend(dir_info.get("file_path", []))
# 2. 尝试精确匹配
for file_path in all_description_files:
if object_name in os.path.basename(file_path):
info = ResourceLoader.load_file_content(file_path)
if info:
object_info = f"Object: {object_name}\n{info}\n\n"
logging.info(f"找到对象'{object_name}'的精确匹配资源文件: {file_path}")
found_object_info = True
break
# 3. 如果精确匹配失败,尝试模糊匹配
if not found_object_info and all_description_files:
logging.info(f"尝试模糊匹配对象'{object_name}'的资源文件")
# 提取所有文件名
file_names = [os.path.basename(f) for f in all_description_files]
# 模糊匹配
matched_filename, score = self.find_directory_fuzzy_match(object_name, files=file_names)
if matched_filename and score > 0:
# 找到匹配的完整路径
for file_path in all_description_files:
if os.path.basename(file_path) == matched_filename:
info = ResourceLoader.load_file_content(file_path)
if info:
object_info = f"Object: {object_name}\n{info}\n\n"
logging.info(f"模糊匹配找到对象'{object_name}'的资源文件: {file_path},匹配分数: {score}")
found_object_info = True
break
if not found_object_info:
logging.warning(f"未找到对象'{object_name}'的任何匹配资源文件")
# Get product information if any
product_name = topic_item.get("product", "")
if product_name:
product_info = ""
# 优化遍历resource_dir_config查找产品描述
found_product_info = False
# 搜集所有可能的产品资源文件
all_product_files = []
for dir_info in self.resource_dir_config:
if dir_info.get("type") == "Product":
all_product_files.extend(dir_info.get("file_path", []))
# 尝试精确匹配
for file_path in all_product_files:
if product_name in os.path.basename(file_path):
info = ResourceLoader.load_file_content(file_path)
if info:
product_info = f"Product: {product_name}\n{info}\n\n"
logging.info(f"找到产品'{product_name}'的精确匹配资源文件: {file_path}")
found_product_info = True
break
# 如果精确匹配失败,尝试模糊匹配
if not found_product_info and all_product_files:
logging.info(f"尝试模糊匹配产品'{product_name}'的资源文件")
# 提取所有文件名
file_names = [os.path.basename(f) for f in all_product_files]
# 模糊匹配
matched_filename, score = self.find_directory_fuzzy_match(product_name, files=file_names)
if matched_filename and score > 0:
# 找到匹配的完整路径
for file_path in all_product_files:
if os.path.basename(file_path) == matched_filename:
info = ResourceLoader.load_file_content(file_path)
if info:
product_info = f"Product: {product_name}\n{info}\n\n"
logging.info(f"模糊匹配找到产品'{product_name}'的资源文件: {file_path},匹配分数: {score}")
found_product_info = True
break
if not found_product_info:
logging.warning(f"未找到产品'{product_name}'的任何匹配资源文件")
# 添加产品信息到对象信息中
if product_info:
object_info += product_info
# Construct final user prompt
user_prompt = f"""请为我创建一个旅游文案。
{style}{demand}{refers}{object_info}
请考虑以上所有信息创作一篇{topic_item.get('target_audience','')}文旅内容"""
logging.info(f"Content prompts constructed. System: {len(system_prompt)} chars, User: {len(user_prompt)} chars.")
return system_prompt, user_prompt
except KeyError as e:
# Catch potential KeyErrors from accessing topic_item if a required key is missing early on
logging.error(f"Error constructing content prompts: Missing essential key '{e}' in topic_item: {topic_item}")
return None, None
except Exception as e:
logging.exception("Error constructing content prompts:")
traceback.print_exc()
logging.error(f"Error creating content prompts: {str(e)}")
return None, None