754 lines
35 KiB
Python
754 lines
35 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
文字内容服务层
|
||
封装现有功能,提供API调用
|
||
"""
|
||
|
||
import logging
|
||
import uuid
|
||
from typing import List, Dict, Any, Optional, Tuple
|
||
from datetime import datetime
|
||
import re
|
||
from difflib import SequenceMatcher
|
||
|
||
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
||
from core.ai import AIAgent
|
||
from utils.file_io import OutputManager
|
||
from tweet.topic_generator import TopicGenerator
|
||
from tweet.content_generator import ContentGenerator
|
||
from tweet.content_judger import ContentJudger
|
||
from api.services.prompt_builder import PromptBuilderService
|
||
from api.services.prompt_service import PromptService
|
||
from api.services.database_service import DatabaseService
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class TopicIDMappingManager:
|
||
"""选题ID映射管理器 - 专门处理选题中的对象ID映射"""
|
||
|
||
def __init__(self):
|
||
"""初始化映射管理器"""
|
||
self.name_to_id = {} # 名称 -> ID映射
|
||
self.mapping_data = {
|
||
'styles': {},
|
||
'audiences': {},
|
||
'scenic_spots': {},
|
||
'products': {}
|
||
}
|
||
|
||
def add_objects_mapping(self,
|
||
style_objects: Optional[List[Dict[str, Any]]] = None,
|
||
audience_objects: Optional[List[Dict[str, Any]]] = None,
|
||
scenic_spot_objects: Optional[List[Dict[str, Any]]] = None,
|
||
product_objects: Optional[List[Dict[str, Any]]] = None):
|
||
"""
|
||
批量添加对象映射关系
|
||
|
||
Args:
|
||
style_objects: 风格对象列表
|
||
audience_objects: 受众对象列表
|
||
scenic_spot_objects: 景区对象列表
|
||
product_objects: 产品对象列表
|
||
"""
|
||
logger.info("开始建立选题ID映射关系")
|
||
mapping_count = 0
|
||
|
||
if style_objects:
|
||
logger.info(f"处理 {len(style_objects)} 个风格对象")
|
||
for obj in style_objects:
|
||
name = obj.get('styleName', '')
|
||
obj_id = obj.get('id')
|
||
if name and obj_id:
|
||
self.mapping_data['styles'][name] = str(obj_id)
|
||
self._add_name_variants('styles', name, str(obj_id))
|
||
mapping_count += 1
|
||
logger.info(f"添加风格映射: {name} -> ID {obj_id}")
|
||
|
||
if audience_objects:
|
||
logger.info(f"处理 {len(audience_objects)} 个受众对象")
|
||
for obj in audience_objects:
|
||
name = obj.get('audienceName', '')
|
||
obj_id = obj.get('id')
|
||
if name and obj_id:
|
||
self.mapping_data['audiences'][name] = str(obj_id)
|
||
self._add_name_variants('audiences', name, str(obj_id))
|
||
mapping_count += 1
|
||
logger.info(f"添加受众映射: {name} -> ID {obj_id}")
|
||
|
||
if scenic_spot_objects:
|
||
logger.info(f"处理 {len(scenic_spot_objects)} 个景区对象")
|
||
for obj in scenic_spot_objects:
|
||
name = obj.get('name', '')
|
||
obj_id = obj.get('id')
|
||
if name and obj_id:
|
||
self.mapping_data['scenic_spots'][name] = str(obj_id)
|
||
self._add_name_variants('scenic_spots', name, str(obj_id))
|
||
mapping_count += 1
|
||
logger.info(f"添加景区映射: {name} -> ID {obj_id}")
|
||
|
||
if product_objects:
|
||
logger.info(f"处理 {len(product_objects)} 个产品对象")
|
||
for obj in product_objects:
|
||
name = obj.get('productName', '')
|
||
obj_id = obj.get('id')
|
||
if name and obj_id:
|
||
self.mapping_data['products'][name] = str(obj_id)
|
||
self._add_name_variants('products', name, str(obj_id))
|
||
mapping_count += 1
|
||
logger.info(f"添加产品映射: {name} -> ID {obj_id}")
|
||
|
||
total_variants = len(self.name_to_id)
|
||
logger.info(f"ID映射关系建立完成: {mapping_count} 个对象,生成 {total_variants} 个名称变体")
|
||
|
||
def _add_name_variants(self, category: str, name: str, obj_id: str):
|
||
"""为名称添加各种变体以支持模糊匹配"""
|
||
variants = [name]
|
||
|
||
# 去除标点符号版本
|
||
clean_name = re.sub(r'[^\w\s]', '', name)
|
||
if clean_name != name:
|
||
variants.append(clean_name)
|
||
|
||
# 去除空格版本
|
||
no_space_name = name.replace(' ', '')
|
||
if no_space_name != name:
|
||
variants.append(no_space_name)
|
||
|
||
# 添加简化版本(去除常见后缀)
|
||
simplified = re.sub(r'(风格|类型|系列|产品|景区|公园)$', '', name)
|
||
if simplified != name and simplified:
|
||
variants.append(simplified)
|
||
|
||
# 为所有变体添加映射
|
||
for variant in variants:
|
||
self.name_to_id[variant.lower()] = {
|
||
'category': category,
|
||
'id': obj_id,
|
||
'original_name': name
|
||
}
|
||
|
||
# 记录生成的变体(仅在有多个变体时记录)
|
||
if len(variants) > 1:
|
||
variants_str = ', '.join(f"'{v}'" for v in variants)
|
||
logger.info(f" 为 '{name}' 生成 {len(variants)} 个变体: {variants_str}")
|
||
|
||
def find_ids_in_topic(self, topic: Dict[str, Any]) -> Dict[str, List[str]]:
|
||
"""
|
||
在选题中查找相关的对象ID
|
||
|
||
Args:
|
||
topic: 选题字典
|
||
|
||
Returns:
|
||
按类型分组的ID列表
|
||
"""
|
||
topic_index = topic.get('index', 'N/A')
|
||
|
||
found_ids = {
|
||
'style_ids': [],
|
||
'audience_ids': [],
|
||
'scenic_spot_ids': [],
|
||
'product_ids': []
|
||
}
|
||
|
||
# 提取选题中的所有文本
|
||
topic_text = self._extract_topic_text(topic)
|
||
topic_text_lower = topic_text.lower()
|
||
|
||
logger.info(f"开始为选题 {topic_index} 进行ID匹配")
|
||
logger.info(f"选题 {topic_index} 提取的文本内容: '{topic_text}'")
|
||
logger.info(f"当前映射库包含 {len(self.name_to_id)} 个名称变体")
|
||
|
||
# 显示映射库内容
|
||
logger.info("映射库内容:")
|
||
for variant, info in self.name_to_id.items():
|
||
logger.info(f" '{variant}' -> {info['original_name']} ({info['category']}, ID: {info['id']})")
|
||
|
||
# 记录匹配过程
|
||
match_details = []
|
||
|
||
# 在文本中查找匹配的名称
|
||
for name_variant, mapping_info in self.name_to_id.items():
|
||
if name_variant in topic_text_lower:
|
||
category = mapping_info['category']
|
||
obj_id = mapping_info['id']
|
||
original_name = mapping_info['original_name']
|
||
|
||
# 记录匹配详情
|
||
match_details.append({
|
||
'variant': name_variant,
|
||
'original_name': original_name,
|
||
'category': category,
|
||
'id': obj_id
|
||
})
|
||
|
||
# 映射到返回格式
|
||
if category == 'styles' and obj_id not in found_ids['style_ids']:
|
||
found_ids['style_ids'].append(obj_id)
|
||
logger.info(f"选题 {topic_index} 匹配到风格: '{name_variant}' -> {original_name} (ID: {obj_id})")
|
||
elif category == 'audiences' and obj_id not in found_ids['audience_ids']:
|
||
found_ids['audience_ids'].append(obj_id)
|
||
logger.info(f"选题 {topic_index} 匹配到受众: '{name_variant}' -> {original_name} (ID: {obj_id})")
|
||
elif category == 'scenic_spots' and obj_id not in found_ids['scenic_spot_ids']:
|
||
found_ids['scenic_spot_ids'].append(obj_id)
|
||
logger.info(f"选题 {topic_index} 匹配到景区: '{name_variant}' -> {original_name} (ID: {obj_id})")
|
||
elif category == 'products' and obj_id not in found_ids['product_ids']:
|
||
found_ids['product_ids'].append(obj_id)
|
||
logger.info(f"选题 {topic_index} 匹配到产品: '{name_variant}' -> {original_name} (ID: {obj_id})")
|
||
|
||
# 统计匹配结果
|
||
total_found = sum(len(ids) for ids in found_ids.values())
|
||
total_available = len(set(info['category'] + '_' + info['id'] for info in self.name_to_id.values()))
|
||
match_rate = (total_found / total_available * 100) if total_available > 0 else 0
|
||
|
||
# 输出详细的匹配结果
|
||
if total_found > 0:
|
||
logger.info(f"选题 {topic_index} ID匹配完成: 找到 {total_found} 个相关对象,匹配率: {match_rate:.1f}%")
|
||
logger.info(f"选题 {topic_index} 匹配详情: {match_details}")
|
||
|
||
# 按类别显示匹配结果
|
||
for category, ids in found_ids.items():
|
||
if ids:
|
||
category_name = {
|
||
'style_ids': '风格',
|
||
'audience_ids': '受众',
|
||
'scenic_spot_ids': '景区',
|
||
'product_ids': '产品'
|
||
}.get(category, category)
|
||
|
||
# 获取匹配的原始名称
|
||
matched_names = []
|
||
# 建立正确的分类映射关系
|
||
category_mapping = {
|
||
'style_ids': 'styles',
|
||
'audience_ids': 'audiences',
|
||
'scenic_spot_ids': 'scenic_spots',
|
||
'product_ids': 'products'
|
||
}
|
||
target_category = category_mapping.get(category)
|
||
|
||
logger.info(f"选题 {topic_index} 处理分类 {category} -> {target_category}, IDs: {ids}")
|
||
|
||
for detail in match_details:
|
||
if detail['id'] in ids and detail['category'] == target_category:
|
||
matched_names.append(f"{detail['original_name']}({detail['id']})")
|
||
|
||
# 显示分类结果
|
||
names_str = ', '.join(matched_names) if matched_names else '(无匹配名称)'
|
||
logger.info(f" {category_name}: {names_str}")
|
||
|
||
# 如果没有匹配名称,输出调试信息
|
||
if not matched_names:
|
||
logger.info(f"选题 {topic_index} {category_name}无匹配名称,详情检查:")
|
||
logger.info(f" IDs: {ids}")
|
||
logger.info(f" 目标分类: {target_category}")
|
||
for detail in match_details:
|
||
if detail['id'] in ids:
|
||
logger.info(f" 详情: {detail}")
|
||
else:
|
||
logger.info(f"选题 {topic_index} 未匹配到任何相关对象ID")
|
||
|
||
return found_ids
|
||
|
||
def _extract_topic_text(self, topic: Dict[str, Any]) -> str:
|
||
"""提取选题中的所有文本内容"""
|
||
text_parts = []
|
||
|
||
# 提取主要字段的文本
|
||
text_fields = ['object', 'style', 'targetAudience', 'product', 'logic',
|
||
'productLogic', 'styleLogic', 'targetAudienceLogic']
|
||
|
||
for field in text_fields:
|
||
if field in topic and topic[field]:
|
||
text_parts.append(str(topic[field]))
|
||
|
||
return ' '.join(text_parts)
|
||
|
||
|
||
|
||
class TweetService:
|
||
"""文字内容服务类"""
|
||
|
||
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager: OutputManager):
|
||
"""
|
||
初始化文字内容服务
|
||
|
||
Args:
|
||
ai_agent: AI代理
|
||
config_manager: 配置管理器
|
||
output_manager: 输出管理器
|
||
"""
|
||
self.ai_agent = ai_agent
|
||
self.config_manager = config_manager
|
||
self.output_manager = output_manager
|
||
|
||
# 初始化各个组件
|
||
self.topic_generator = TopicGenerator(ai_agent, config_manager, output_manager)
|
||
self.content_generator = ContentGenerator(ai_agent, config_manager, output_manager)
|
||
self.content_judger = ContentJudger(ai_agent, config_manager, output_manager)
|
||
|
||
# 初始化提示词服务和构建器
|
||
self.prompt_service = PromptService(config_manager)
|
||
self.prompt_builder = PromptBuilderService(config_manager, self.prompt_service)
|
||
|
||
# 初始化选题ID映射管理器
|
||
self.topic_id_mapping_manager = TopicIDMappingManager()
|
||
|
||
async def generate_topics(self, dates: Optional[str] = None, numTopics: int = 5,
|
||
styles: Optional[List[str]] = None,
|
||
audiences: Optional[List[str]] = None,
|
||
scenic_spots: Optional[List[str]] = None,
|
||
products: Optional[List[str]] = None,
|
||
style_objects: Optional[List[Dict[str, Any]]] = None,
|
||
audience_objects: Optional[List[Dict[str, Any]]] = None,
|
||
scenic_spot_objects: Optional[List[Dict[str, Any]]] = None,
|
||
product_objects: Optional[List[Dict[str, Any]]] = None) -> Tuple[str, List[Dict[str, Any]]]:
|
||
"""
|
||
生成选题
|
||
|
||
Args:
|
||
dates: 日期字符串,可能为单个日期、多个日期用逗号分隔或范围
|
||
numTopics: 要生成的选题数量
|
||
styles: 风格列表
|
||
audiences: 受众列表
|
||
scenic_spots: 景区列表
|
||
products: 产品列表
|
||
style_objects: 风格对象列表
|
||
audience_objects: 受众对象列表
|
||
scenic_spot_objects: 景区对象列表
|
||
product_objects: 产品对象列表
|
||
|
||
Returns:
|
||
请求ID和生成的选题列表
|
||
"""
|
||
logger.info(f"开始生成选题,日期: {dates}, 数量: {numTopics}")
|
||
|
||
# 获取并更新配置
|
||
topic_config = self.config_manager.get_config('topic_gen', GenerateTopicConfig)
|
||
if dates:
|
||
topic_config.topic.date = dates
|
||
topic_config.topic.num = numTopics
|
||
|
||
# 建立ID映射关系(用于后续的ID反向映射)
|
||
self.topic_id_mapping_manager.add_objects_mapping(
|
||
style_objects=style_objects,
|
||
audience_objects=audience_objects,
|
||
scenic_spot_objects=scenic_spot_objects,
|
||
product_objects=product_objects
|
||
)
|
||
|
||
# 使用PromptBuilderService构建提示词
|
||
system_prompt, user_prompt = self.prompt_builder.build_topic_prompt(
|
||
products=products,
|
||
scenic_spots=scenic_spots,
|
||
styles=styles,
|
||
audiences=audiences,
|
||
dates=dates,
|
||
numTopics=numTopics,
|
||
style_objects=style_objects,
|
||
audience_objects=audience_objects,
|
||
scenic_spot_objects=scenic_spot_objects,
|
||
product_objects=product_objects
|
||
)
|
||
|
||
# 使用预构建的提示词生成选题
|
||
topics = await self.topic_generator.generate_topics_with_prompt(system_prompt, user_prompt)
|
||
if not topics:
|
||
logger.error("未能生成任何选题")
|
||
return str(uuid.uuid4()), []
|
||
|
||
# 为每个选题添加相关的对象ID
|
||
logger.info(f"开始为 {len(topics)} 个选题进行ID映射分析")
|
||
enhanced_topics = []
|
||
total_mapped_topics = 0
|
||
|
||
for topic in topics:
|
||
enhanced_topic = topic.copy()
|
||
|
||
# 查找选题中涉及的对象ID
|
||
related_object_ids = self.topic_id_mapping_manager.find_ids_in_topic(topic)
|
||
|
||
# 只有当找到相关ID时才添加字段
|
||
if any(related_object_ids.values()):
|
||
enhanced_topic['related_object_ids'] = related_object_ids
|
||
total_mapped_topics += 1
|
||
logger.info(f"选题 {topic.get('index', 'N/A')} 找到相关ID: {related_object_ids}")
|
||
|
||
enhanced_topics.append(enhanced_topic)
|
||
|
||
# 统计总体映射情况
|
||
mapping_coverage = (total_mapped_topics / len(topics) * 100) if topics else 0
|
||
logger.info(f"选题ID映射完成: {total_mapped_topics}/{len(topics)} 个选题包含相关对象ID,覆盖率: {mapping_coverage:.1f}%")
|
||
|
||
# 生成请求ID
|
||
requestId = f"topic-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{str(uuid.uuid4())[:8]}"
|
||
|
||
logger.info(f"选题生成完成,请求ID: {requestId}, 数量: {len(enhanced_topics)}")
|
||
return requestId, enhanced_topics
|
||
|
||
async def generate_content(self, topic: Optional[Dict[str, Any]] = None, autoJudge: bool = False,
|
||
style_objects: Optional[List[Dict[str, Any]]] = None,
|
||
audience_objects: Optional[List[Dict[str, Any]]] = None,
|
||
scenic_spot_objects: Optional[List[Dict[str, Any]]] = None,
|
||
product_objects: Optional[List[Dict[str, Any]]] = None) -> Tuple[str, str, Dict[str, Any]]:
|
||
"""
|
||
为单个选题生成内容
|
||
|
||
Args:
|
||
topic: 选题信息(可能包含ID字段)
|
||
autoJudge: 是否进行内嵌审核
|
||
style_objects: 风格对象列表(可选,用于兼容)
|
||
audience_objects: 受众对象列表(可选,用于兼容)
|
||
scenic_spot_objects: 景区对象列表(可选,用于兼容)
|
||
product_objects: 产品对象列表(可选,用于兼容)
|
||
|
||
Returns:
|
||
请求ID、选题索引和生成的内容(包含judgeSuccess状态)
|
||
"""
|
||
if not topic:
|
||
topic = {"index": "1", "date": "2024-07-01"}
|
||
|
||
topicIndex = topic.get('index', 'N/A')
|
||
logger.info(f"开始为选题 {topicIndex} 生成内容{'(含审核)' if autoJudge else ''}")
|
||
|
||
# 增强版的topic处理:优先使用ID获取最新数据
|
||
enhanced_topic = await self._enhance_topic_with_database_data(topic)
|
||
|
||
# 如果没有通过ID获取到数据,使用传入的对象参数作为兜底
|
||
if style_objects and not enhanced_topic.get('style_object'):
|
||
enhanced_topic['style_object'] = style_objects[0]
|
||
enhanced_topic['style'] = style_objects[0].get('styleName')
|
||
if audience_objects and not enhanced_topic.get('audience_object'):
|
||
enhanced_topic['audience_object'] = audience_objects[0]
|
||
enhanced_topic['targetAudience'] = audience_objects[0].get('audienceName')
|
||
if scenic_spot_objects and not enhanced_topic.get('scenic_spot_object'):
|
||
enhanced_topic['scenic_spot_object'] = scenic_spot_objects[0]
|
||
enhanced_topic['object'] = scenic_spot_objects[0].get('name')
|
||
if product_objects and not enhanced_topic.get('product_object'):
|
||
enhanced_topic['product_object'] = product_objects[0]
|
||
enhanced_topic['product'] = product_objects[0].get('productName')
|
||
|
||
# 使用PromptBuilderService构建提示词
|
||
system_prompt, user_prompt = self.prompt_builder.build_content_prompt(enhanced_topic, "content")
|
||
|
||
# 使用预构建的提示词生成内容
|
||
content = await self.content_generator.generate_content_with_prompt(enhanced_topic, system_prompt, user_prompt)
|
||
if not content:
|
||
logger.error(f"未能为选题 {topicIndex} 生成内容")
|
||
return str(uuid.uuid4()), topicIndex, {}
|
||
|
||
# 如果启用自动审核,进行内嵌审核
|
||
if autoJudge:
|
||
try:
|
||
logger.info(f"开始对选题 {topicIndex} 的内容进行内嵌审核")
|
||
|
||
# 使用PromptBuilderService构建审核提示词
|
||
judge_system_prompt, judge_user_prompt = self.prompt_builder.build_judge_prompt(enhanced_topic, content)
|
||
|
||
# 进行内容审核
|
||
judged_content = await self.content_judger.judge_content_with_prompt(content, enhanced_topic, judge_system_prompt, judge_user_prompt)
|
||
|
||
# 统一输出格式:始终包含judgeSuccess状态
|
||
# content_judger返回的是judge_success字段(下划线命名)
|
||
judge_success = judged_content.get('judge_success', False)
|
||
if judge_success:
|
||
logger.info(f"选题 {topicIndex} 内容审核成功")
|
||
# 审核成功:使用审核后的内容,但移除judge_success,添加统一的judgeSuccess
|
||
content = {k: v for k, v in judged_content.items() if k != 'judge_success'}
|
||
content['judgeSuccess'] = True
|
||
else:
|
||
logger.warning(f"选题 {topicIndex} 内容审核未通过")
|
||
# 审核失败:使用原始内容,添加judgeSuccess状态
|
||
content['judgeSuccess'] = False
|
||
|
||
except Exception as e:
|
||
logger.error(f"选题 {topicIndex} 内容审核过程中发生错误: {e}", exc_info=True)
|
||
# 审核出错:使用原始内容,标记审核失败
|
||
content['judgeSuccess'] = False
|
||
|
||
# 生成请求ID
|
||
requestId = f"content-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{str(uuid.uuid4())[:8]}"
|
||
|
||
logger.info(f"选题 {topicIndex} 内容生成完成,请求ID: {requestId}")
|
||
return requestId, topicIndex, content
|
||
|
||
async def _enhance_topic_with_database_data(self, topic: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""
|
||
使用数据库数据增强选题信息,优先使用related_object_ids中的反射ID
|
||
|
||
Args:
|
||
topic: 原始选题数据(可能包含related_object_ids字段)
|
||
|
||
Returns:
|
||
增强后的选题数据
|
||
"""
|
||
enhanced_topic = topic.copy()
|
||
|
||
try:
|
||
# 通过数据库服务获取详细信息
|
||
db_service = DatabaseService(self.config_manager)
|
||
|
||
if not db_service.is_available():
|
||
logger.warning("数据库服务不可用,无法增强选题数据")
|
||
return enhanced_topic
|
||
|
||
# 优先使用related_object_ids中的反射ID(更精确)
|
||
related_ids = topic.get('related_object_ids', {})
|
||
if related_ids:
|
||
logger.info(f"选题包含反射ID信息,优先使用: {related_ids}")
|
||
enhanced_topic = await self._enhance_with_related_ids(enhanced_topic, db_service, related_ids)
|
||
return enhanced_topic
|
||
|
||
# 处理风格ID
|
||
if 'styleIds' in topic and topic['styleIds']:
|
||
style_id = topic['styleIds'][0] if isinstance(topic['styleIds'], list) else topic['styleIds']
|
||
style_data = db_service.get_style_by_id(style_id)
|
||
if style_data:
|
||
style_name = style_data.get('styleName')
|
||
enhanced_topic['style_object'] = style_data
|
||
enhanced_topic['style'] = style_name
|
||
logger.info(f"从数据库加载风格数据: {style_name} (ID: {style_id})")
|
||
|
||
# 处理受众ID
|
||
if 'audienceIds' in topic and topic['audienceIds']:
|
||
audience_id = topic['audienceIds'][0] if isinstance(topic['audienceIds'], list) else topic['audienceIds']
|
||
audience_data = db_service.get_audience_by_id(audience_id)
|
||
if audience_data:
|
||
audience_name = audience_data.get('audienceName')
|
||
enhanced_topic['audience_object'] = audience_data
|
||
enhanced_topic['targetAudience'] = audience_name
|
||
logger.info(f"从数据库加载受众数据: {audience_name} (ID: {audience_id})")
|
||
|
||
# 处理景区ID
|
||
if 'scenicSpotIds' in topic and topic['scenicSpotIds']:
|
||
spot_id = topic['scenicSpotIds'][0] if isinstance(topic['scenicSpotIds'], list) else topic['scenicSpotIds']
|
||
spot_data = db_service.get_scenic_spot_by_id(spot_id)
|
||
if spot_data:
|
||
spot_name = spot_data.get('name')
|
||
enhanced_topic['scenic_spot_object'] = spot_data
|
||
enhanced_topic['object'] = spot_name
|
||
logger.info(f"从数据库加载景区数据: {spot_name} (ID: {spot_id})")
|
||
|
||
# 处理产品ID
|
||
if 'productIds' in topic and topic['productIds']:
|
||
product_id = topic['productIds'][0] if isinstance(topic['productIds'], list) else topic['productIds']
|
||
product_data = db_service.get_product_by_id(product_id)
|
||
if product_data:
|
||
product_name = product_data.get('productName')
|
||
enhanced_topic['product_object'] = product_data
|
||
enhanced_topic['product'] = product_name
|
||
logger.info(f"从数据库加载产品数据: {product_name} (ID: {product_id})")
|
||
|
||
except Exception as e:
|
||
logger.error(f"增强选题数据时发生错误: {e}", exc_info=True)
|
||
|
||
return enhanced_topic
|
||
|
||
async def _enhance_with_related_ids(self, enhanced_topic: Dict[str, Any], db_service, related_ids: Dict[str, List[str]]) -> Dict[str, Any]:
|
||
"""
|
||
使用related_object_ids中的反射ID进行数据库查询和增强
|
||
|
||
Args:
|
||
enhanced_topic: 待增强的选题数据
|
||
db_service: 数据库服务实例
|
||
related_ids: 反射的ID字典
|
||
|
||
Returns:
|
||
增强后的选题数据
|
||
"""
|
||
logger.info("开始使用反射ID进行选题数据增强")
|
||
|
||
try:
|
||
# 处理风格ID
|
||
style_ids = related_ids.get('style_ids', [])
|
||
if style_ids:
|
||
style_id = int(style_ids[0]) # 取第一个ID
|
||
style_data = db_service.get_style_by_id(style_id)
|
||
if style_data:
|
||
enhanced_topic['style_object'] = style_data
|
||
enhanced_topic['style'] = style_data.get('styleName')
|
||
logger.info(f"通过反射ID加载风格数据: {style_data.get('styleName')} (ID: {style_id})")
|
||
|
||
# 处理受众ID
|
||
audience_ids = related_ids.get('audience_ids', [])
|
||
if audience_ids:
|
||
audience_id = int(audience_ids[0]) # 取第一个ID
|
||
audience_data = db_service.get_audience_by_id(audience_id)
|
||
if audience_data:
|
||
enhanced_topic['audience_object'] = audience_data
|
||
enhanced_topic['targetAudience'] = audience_data.get('audienceName')
|
||
logger.info(f"通过反射ID加载受众数据: {audience_data.get('audienceName')} (ID: {audience_id})")
|
||
|
||
# 处理景区ID
|
||
scenic_spot_ids = related_ids.get('scenic_spot_ids', [])
|
||
if scenic_spot_ids:
|
||
spot_id = int(scenic_spot_ids[0]) # 取第一个ID
|
||
spot_data = db_service.get_scenic_spot_by_id(spot_id)
|
||
if spot_data:
|
||
enhanced_topic['scenic_spot_object'] = spot_data
|
||
enhanced_topic['object'] = spot_data.get('name')
|
||
logger.info(f"通过反射ID加载景区数据: {spot_data.get('name')} (ID: {spot_id})")
|
||
|
||
# 处理产品ID
|
||
product_ids = related_ids.get('product_ids', [])
|
||
if product_ids:
|
||
product_id = int(product_ids[0]) # 取第一个ID
|
||
product_data = db_service.get_product_by_id(product_id)
|
||
if product_data:
|
||
enhanced_topic['product_object'] = product_data
|
||
enhanced_topic['product'] = product_data.get('productName')
|
||
logger.info(f"通过反射ID加载产品数据: {product_data.get('productName')} (ID: {product_id})")
|
||
|
||
logger.info("反射ID数据增强完成")
|
||
|
||
except Exception as e:
|
||
logger.error(f"使用反射ID增强选题数据时发生错误: {e}", exc_info=True)
|
||
|
||
return enhanced_topic
|
||
|
||
async def generate_content_with_prompt(self, topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Tuple[str, str, Dict[str, Any]]:
|
||
"""
|
||
使用预构建的提示词为选题生成内容
|
||
|
||
Args:
|
||
topic: 选题信息
|
||
system_prompt: 系统提示词
|
||
user_prompt: 用户提示词
|
||
|
||
Returns:
|
||
请求ID、选题索引和生成的内容
|
||
"""
|
||
topicIndex = topic.get('index', 'unknown')
|
||
logger.info(f"开始使用预构建提示词为选题 {topicIndex} 生成内容")
|
||
|
||
# 直接使用预构建的提示词生成内容
|
||
content = await self.content_generator.generate_content_with_prompt(topic, system_prompt, user_prompt)
|
||
|
||
# 生成请求ID
|
||
requestId = f"content-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{str(uuid.uuid4())[:8]}"
|
||
|
||
logger.info(f"内容生成完成,请求ID: {requestId}, 选题索引: {topicIndex}")
|
||
return requestId, topicIndex, content
|
||
|
||
async def judge_content(self, topic: Optional[Dict[str, Any]] = None, content: Dict[str, Any] = {},
|
||
style_objects: Optional[List[Dict[str, Any]]] = None,
|
||
audience_objects: Optional[List[Dict[str, Any]]] = None,
|
||
scenic_spot_objects: Optional[List[Dict[str, Any]]] = None,
|
||
product_objects: Optional[List[Dict[str, Any]]] = None) -> Tuple[str, str, Dict[str, Any], bool]:
|
||
"""
|
||
审核内容 (已重构)
|
||
"""
|
||
if not topic:
|
||
topic = {"index": "1", "date": "2024-07-01"}
|
||
if not content:
|
||
content = {"title": "未提供内容", "content": "未提供内容"}
|
||
|
||
topicIndex = topic.get('index', 'unknown')
|
||
logger.info(f"开始审核选题 {topicIndex} 的内容")
|
||
|
||
# 构建包含所有预取信息的enhanced_topic
|
||
enhanced_topic = topic.copy()
|
||
if style_objects:
|
||
enhanced_topic['style_object'] = style_objects[0]
|
||
if audience_objects:
|
||
enhanced_topic['audience_object'] = audience_objects[0]
|
||
if scenic_spot_objects:
|
||
enhanced_topic['scenic_spot_object'] = scenic_spot_objects[0]
|
||
if product_objects:
|
||
enhanced_topic['product_object'] = product_objects[0]
|
||
|
||
system_prompt, user_prompt = self.prompt_builder.build_judge_prompt(enhanced_topic, content)
|
||
|
||
judged_data = await self.content_judger.judge_content_with_prompt(content, enhanced_topic, system_prompt, user_prompt)
|
||
|
||
judgeSuccess = judged_data.get('judge_success', False)
|
||
|
||
if 'judge_success' in judged_data:
|
||
judged_data = {k: v for k, v in judged_data.items() if k != 'judge_success'}
|
||
judged_data['judgeSuccess'] = judgeSuccess
|
||
|
||
requestId = f"judge-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{str(uuid.uuid4())[:8]}"
|
||
|
||
logger.info(f"内容审核完成,请求ID: {requestId}, 选题索引: {topicIndex}, 审核结果: {judgeSuccess}")
|
||
return requestId, topicIndex, judged_data, judgeSuccess
|
||
|
||
async def run_pipeline(self, dates: Optional[str] = None, numTopics: int = 5,
|
||
styles: Optional[List[str]] = None,
|
||
audiences: Optional[List[str]] = None,
|
||
scenic_spots: Optional[List[str]] = None,
|
||
products: Optional[List[str]] = None,
|
||
skipJudge: bool = False,
|
||
autoJudge: bool = False) -> Tuple[str, List[Dict[str, Any]], Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
|
||
"""
|
||
运行完整的内容生成流水线:生成选题 → 生成内容 → 审核内容
|
||
|
||
Args:
|
||
dates: 日期字符串,可能为单个日期、多个日期用逗号分隔或范围
|
||
numTopics: 要生成的选题数量
|
||
styles: 风格列表
|
||
audiences: 受众列表
|
||
scenic_spots: 景区列表
|
||
products: 产品列表
|
||
skipJudge: 是否跳过内容审核步骤(与autoJudge互斥)
|
||
autoJudge: 是否在内容生成时进行内嵌审核
|
||
|
||
Returns:
|
||
请求ID、选题列表、内容字典和审核后内容字典
|
||
"""
|
||
logger.info(f"开始运行完整流水线,日期: {dates}, 数量: {numTopics}, 内嵌审核: {autoJudge}")
|
||
|
||
# 生成请求ID
|
||
requestId = f"pipeline-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{str(uuid.uuid4())[:8]}"
|
||
|
||
# 1. 生成选题
|
||
_, topics = await self.generate_topics(dates, numTopics, styles, audiences, scenic_spots, products)
|
||
if not topics:
|
||
logger.error("未能生成任何选题")
|
||
return requestId, [], {}, {}
|
||
|
||
# 2. 为每个选题生成内容
|
||
contents = {}
|
||
judgedContents = {}
|
||
|
||
for topic in topics:
|
||
topicIndex = topic.get('index', 'unknown')
|
||
# 直接传递带有ID的选题数据,不再需要传递额外的对象参数
|
||
_, _, content = await self.generate_content(topic, autoJudge=autoJudge)
|
||
|
||
if autoJudge:
|
||
# 内嵌审核模式:content已包含审核结果和judgeSuccess状态
|
||
# 创建原始内容副本(移除judgeSuccess状态,但保留其他字段)
|
||
original_content = {k: v for k, v in content.items() if k != 'judgeSuccess'}
|
||
contents[topicIndex] = original_content
|
||
judgedContents[topicIndex] = content # 包含审核结果和judgeSuccess状态
|
||
else:
|
||
# 无审核模式:直接保存内容
|
||
contents[topicIndex] = content
|
||
|
||
# 如果使用内嵌审核或跳过审核,直接返回结果
|
||
if autoJudge or skipJudge:
|
||
logger.info(f"{'使用内嵌审核' if autoJudge else '跳过内容审核步骤'},流水线完成,请求ID: {requestId}")
|
||
if autoJudge:
|
||
return requestId, topics, contents, judgedContents
|
||
else:
|
||
return requestId, topics, contents, contents
|
||
|
||
# 3. 对每个内容进行审核
|
||
judgedContents = {}
|
||
for topicIndex, content in contents.items():
|
||
topic = next((t for t in topics if t.get('index') == topicIndex), None)
|
||
if not topic:
|
||
logger.warning(f"找不到选题 {topicIndex} 的原始数据,跳过审核")
|
||
continue
|
||
|
||
try:
|
||
_, _, judged_data, _ = await self.judge_content(topic, content)
|
||
judgedContents[topicIndex] = judged_data
|
||
except Exception as e:
|
||
logger.critical(f"为选题 {topicIndex} 处理内容审核时发生意外错误: {e}", exc_info=True)
|
||
|
||
logger.info(f"流水线完成,请求ID: {requestId}")
|
||
return requestId, topics, contents, judgedContents |