Merge branch 'reinstruct' of http://8.134.70.224:8300/jinye_huang/TravelContentCreator into reinstruct
This commit is contained in:
commit
5fc2c1cd08
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -49,15 +49,40 @@ async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegr
|
|||||||
try:
|
try:
|
||||||
# 创建临时文件处理base64文档
|
# 创建临时文件处理base64文档
|
||||||
if request.documents:
|
if request.documents:
|
||||||
temp_files = []
|
|
||||||
for doc in request.documents:
|
for doc in request.documents:
|
||||||
try:
|
try:
|
||||||
|
# 从base64内容中提取实际内容(跳过data:image/jpeg;base64,这样的前缀)
|
||||||
|
content = doc.content
|
||||||
|
if ',' in content:
|
||||||
|
content = content.split(',', 1)[1]
|
||||||
|
|
||||||
# 创建临时文件
|
# 创建临时文件
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.filename)[1]) as temp_file:
|
suffix = os.path.splitext(doc.filename)[1]
|
||||||
|
if not suffix:
|
||||||
|
# 根据MIME类型推断后缀
|
||||||
|
mime_to_ext = {
|
||||||
|
'text/plain': '.txt',
|
||||||
|
'application/pdf': '.pdf',
|
||||||
|
'application/msword': '.doc',
|
||||||
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
||||||
|
'image/jpeg': '.jpg',
|
||||||
|
'image/png': '.png'
|
||||||
|
}
|
||||||
|
suffix = mime_to_ext.get(doc.mime_type, '.bin')
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
||||||
# 解码base64内容并写入临时文件
|
# 解码base64内容并写入临时文件
|
||||||
content = base64.b64decode(doc.content)
|
try:
|
||||||
temp_file.write(content)
|
decoded_content = base64.b64decode(content)
|
||||||
|
temp_file.write(decoded_content)
|
||||||
temp_files.append(temp_file.name)
|
temp_files.append(temp_file.name)
|
||||||
|
logger.info(f"成功保存临时文件: {temp_file.name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Base64解码失败: {e}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"文档 {doc.filename} 的Base64内容无效: {str(e)}"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"处理文档 {doc.filename} 失败: {e}")
|
logger.error(f"处理文档 {doc.filename} 失败: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
@ -70,8 +95,8 @@ async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegr
|
|||||||
# 调用服务层处理
|
# 调用服务层处理
|
||||||
result = await integration_service.integrate_content(
|
result = await integration_service.integrate_content(
|
||||||
document_paths=temp_files,
|
document_paths=temp_files,
|
||||||
keywords=request.keywords,
|
keywords=request.keywords or [],
|
||||||
cookies=request.cookies,
|
cookies=request.cookies or "",
|
||||||
sort_type=request.sort_type,
|
sort_type=request.sort_type,
|
||||||
note_type=request.note_type,
|
note_type=request.note_type,
|
||||||
note_time=request.note_time,
|
note_time=request.note_time,
|
||||||
@ -80,36 +105,7 @@ async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegr
|
|||||||
query_num=request.query_num
|
query_num=request.query_num
|
||||||
)
|
)
|
||||||
|
|
||||||
# 转换为响应模型
|
return result
|
||||||
if result["success"]:
|
|
||||||
response = ContentIntegrationResponse(
|
|
||||||
success=True,
|
|
||||||
timestamp=result["timestamp"],
|
|
||||||
processing_time=result["processing_time"],
|
|
||||||
input_summary=result["input_summary"],
|
|
||||||
document_info=result["document_info"],
|
|
||||||
xhs_info=result["xhs_info"],
|
|
||||||
integrated_content=result["integrated_content"],
|
|
||||||
search_config=result["search_config"],
|
|
||||||
error_message=None # 成功时无错误信息
|
|
||||||
)
|
|
||||||
logger.info(f"内容整合成功,处理时间:{result['processing_time']}")
|
|
||||||
else:
|
|
||||||
from datetime import datetime
|
|
||||||
response = ContentIntegrationResponse(
|
|
||||||
success=False,
|
|
||||||
timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")),
|
|
||||||
processing_time=result.get("processing_time", "0秒"),
|
|
||||||
input_summary=result.get("input_summary"),
|
|
||||||
document_info=result.get("document_info"),
|
|
||||||
xhs_info=result.get("xhs_info"),
|
|
||||||
integrated_content=result.get("integrated_content"),
|
|
||||||
search_config=result.get("search_config"),
|
|
||||||
error_message=result.get("error_message")
|
|
||||||
)
|
|
||||||
logger.error(f"内容整合失败:{result['error_message']}")
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"内容整合接口异常:{e}", exc_info=True)
|
logger.error(f"内容整合接口异常:{e}", exc_info=True)
|
||||||
|
|||||||
@ -125,38 +125,96 @@ def _add_ids_to_topics(topics: List[Dict[str, Any]], id_name_mappings: Dict[str,
|
|||||||
Returns:
|
Returns:
|
||||||
包含ID字段的选题列表
|
包含ID字段的选题列表
|
||||||
"""
|
"""
|
||||||
|
def find_best_match(target_name: str, mapping: Dict[str, int]) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
寻找最佳匹配的ID,支持模糊匹配
|
||||||
|
"""
|
||||||
|
if not target_name or not mapping:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 1. 精确匹配
|
||||||
|
if target_name in mapping:
|
||||||
|
return mapping[target_name]
|
||||||
|
|
||||||
|
# 2. 模糊匹配 - 去除空格后匹配
|
||||||
|
target_clean = target_name.replace(" ", "").strip()
|
||||||
|
for name, id_val in mapping.items():
|
||||||
|
if name.replace(" ", "").strip() == target_clean:
|
||||||
|
logger.info(f"模糊匹配成功: '{target_name}' -> '{name}' (ID: {id_val})")
|
||||||
|
return id_val
|
||||||
|
|
||||||
|
# 3. 包含匹配 - 检查是否互相包含
|
||||||
|
for name, id_val in mapping.items():
|
||||||
|
if target_clean in name.replace(" ", "") or name.replace(" ", "") in target_clean:
|
||||||
|
logger.info(f"包含匹配成功: '{target_name}' -> '{name}' (ID: {id_val})")
|
||||||
|
return id_val
|
||||||
|
|
||||||
|
# 4. 未找到匹配
|
||||||
|
logger.warning(f"未找到匹配的ID: '{target_name}', 可用选项: {list(mapping.keys())}")
|
||||||
|
return None
|
||||||
|
|
||||||
enriched_topics = []
|
enriched_topics = []
|
||||||
|
|
||||||
for topic in topics:
|
for topic in topics:
|
||||||
# 复制原topic
|
# 复制原topic
|
||||||
enriched_topic = topic.copy()
|
enriched_topic = topic.copy()
|
||||||
|
|
||||||
# 添加ID字段
|
# 初始化ID字段
|
||||||
enriched_topic['styleIds'] = []
|
enriched_topic['styleIds'] = []
|
||||||
enriched_topic['audienceIds'] = []
|
enriched_topic['audienceIds'] = []
|
||||||
enriched_topic['scenicSpotIds'] = []
|
enriched_topic['scenicSpotIds'] = []
|
||||||
enriched_topic['productIds'] = []
|
enriched_topic['productIds'] = []
|
||||||
|
|
||||||
|
# 记录匹配结果
|
||||||
|
match_results = {
|
||||||
|
'style_matched': False,
|
||||||
|
'audience_matched': False,
|
||||||
|
'scenic_spot_matched': False,
|
||||||
|
'product_matched': False
|
||||||
|
}
|
||||||
|
|
||||||
# 根据topic中的name查找对应的ID
|
# 根据topic中的name查找对应的ID
|
||||||
if 'style' in topic and topic['style']:
|
if 'style' in topic and topic['style']:
|
||||||
style_name = topic['style']
|
style_id = find_best_match(topic['style'], id_name_mappings['style_mapping'])
|
||||||
if style_name in id_name_mappings['style_mapping']:
|
if style_id:
|
||||||
enriched_topic['styleIds'] = [id_name_mappings['style_mapping'][style_name]]
|
enriched_topic['styleIds'] = [style_id]
|
||||||
|
match_results['style_matched'] = True
|
||||||
|
|
||||||
if 'targetAudience' in topic and topic['targetAudience']:
|
if 'targetAudience' in topic and topic['targetAudience']:
|
||||||
audience_name = topic['targetAudience']
|
audience_id = find_best_match(topic['targetAudience'], id_name_mappings['audience_mapping'])
|
||||||
if audience_name in id_name_mappings['audience_mapping']:
|
if audience_id:
|
||||||
enriched_topic['audienceIds'] = [id_name_mappings['audience_mapping'][audience_name]]
|
enriched_topic['audienceIds'] = [audience_id]
|
||||||
|
match_results['audience_matched'] = True
|
||||||
|
|
||||||
if 'object' in topic and topic['object']:
|
if 'object' in topic and topic['object']:
|
||||||
spot_name = topic['object']
|
spot_id = find_best_match(topic['object'], id_name_mappings['scenic_spot_mapping'])
|
||||||
if spot_name in id_name_mappings['scenic_spot_mapping']:
|
if spot_id:
|
||||||
enriched_topic['scenicSpotIds'] = [id_name_mappings['scenic_spot_mapping'][spot_name]]
|
enriched_topic['scenicSpotIds'] = [spot_id]
|
||||||
|
match_results['scenic_spot_matched'] = True
|
||||||
|
|
||||||
if 'product' in topic and topic['product']:
|
if 'product' in topic and topic['product']:
|
||||||
product_name = topic['product']
|
product_id = find_best_match(topic['product'], id_name_mappings['product_mapping'])
|
||||||
if product_name in id_name_mappings['product_mapping']:
|
if product_id:
|
||||||
enriched_topic['productIds'] = [id_name_mappings['product_mapping'][product_name]]
|
enriched_topic['productIds'] = [product_id]
|
||||||
|
match_results['product_matched'] = True
|
||||||
|
|
||||||
|
# 记录匹配情况
|
||||||
|
total_fields = sum(1 for key in ['style', 'targetAudience', 'object', 'product'] if key in topic and topic[key])
|
||||||
|
matched_fields = sum(match_results.values())
|
||||||
|
|
||||||
|
if total_fields > 0:
|
||||||
|
match_rate = matched_fields / total_fields * 100
|
||||||
|
logger.info(f"选题 {topic.get('index', 'N/A')} ID匹配率: {match_rate:.1f}% ({matched_fields}/{total_fields})")
|
||||||
|
|
||||||
|
# 如果匹配率低于50%,记录警告
|
||||||
|
if match_rate < 50:
|
||||||
|
logger.warning(f"选题 {topic.get('index', 'N/A')} ID匹配率较低: {match_rate:.1f}%")
|
||||||
|
|
||||||
|
# 添加匹配元数据
|
||||||
|
# enriched_topic['_id_match_metadata'] = {
|
||||||
|
# 'match_results': match_results,
|
||||||
|
# 'match_rate': matched_fields / max(total_fields, 1) * 100 if total_fields > 0 else 0
|
||||||
|
# }
|
||||||
|
|
||||||
enriched_topics.append(enriched_topic)
|
enriched_topics.append(enriched_topic)
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -78,7 +78,8 @@ class ContentIntegrationService:
|
|||||||
整合结果字典
|
整合结果字典
|
||||||
"""
|
"""
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
logger.info(f"开始整合任务:文档数量 {len(document_paths)}, 关键词数量 {len(keywords)}")
|
|
||||||
|
logger.info(f"开始整合任务:文档数量 {len(document_paths)}, 关键词数量 {len(keywords) if keywords else 0}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 确保输出目录存在
|
# 确保输出目录存在
|
||||||
|
|||||||
@ -55,10 +55,23 @@ class DatabaseService:
|
|||||||
logger.info(f"尝试连接数据库 ({attempt['desc']}): {connection_info}")
|
logger.info(f"尝试连接数据库 ({attempt['desc']}): {connection_info}")
|
||||||
|
|
||||||
# 创建连接池
|
# 创建连接池
|
||||||
|
# 从配置中分离MySQL连接池支持的参数和不支持的参数
|
||||||
|
config = attempt["config"].copy()
|
||||||
|
|
||||||
|
# MySQL连接池不支持的参数,需要移除
|
||||||
|
unsupported_params = [
|
||||||
|
'max_retry_attempts', 'query_timeout', 'soft_delete_field', 'active_record_value'
|
||||||
|
]
|
||||||
|
for param in unsupported_params:
|
||||||
|
config.pop(param, None)
|
||||||
|
|
||||||
|
# 设置连接池参数,使用配置文件中的值或默认值
|
||||||
|
pool_size = config.pop('pool_size', 10)
|
||||||
|
|
||||||
pool = pooling.MySQLConnectionPool(
|
pool = pooling.MySQLConnectionPool(
|
||||||
pool_name=f"database_service_pool_{int(time.time())}",
|
pool_name=f"database_service_pool_{int(time.time())}",
|
||||||
pool_size=10,
|
pool_size=pool_size,
|
||||||
**attempt["config"]
|
**config
|
||||||
)
|
)
|
||||||
|
|
||||||
# 测试连接
|
# 测试连接
|
||||||
@ -210,7 +223,8 @@ class DatabaseService:
|
|||||||
景区信息列表
|
景区信息列表
|
||||||
"""
|
"""
|
||||||
if not self.db_pool or not spot_ids:
|
if not self.db_pool or not spot_ids:
|
||||||
return []
|
logger.warning("数据库连接池未初始化或景区ID列表为空")
|
||||||
|
return self._get_fallback_scenic_spots(spot_ids)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self.db_pool.get_connection() as conn:
|
with self.db_pool.get_connection() as conn:
|
||||||
@ -220,12 +234,42 @@ class DatabaseService:
|
|||||||
query = f"SELECT * FROM scenicSpot WHERE id IN ({placeholders}) AND isDelete = 0"
|
query = f"SELECT * FROM scenicSpot WHERE id IN ({placeholders}) AND isDelete = 0"
|
||||||
cursor.execute(query, spot_ids)
|
cursor.execute(query, spot_ids)
|
||||||
results = cursor.fetchall()
|
results = cursor.fetchall()
|
||||||
logger.info(f"批量查询景区信息: 请求{len(spot_ids)}个,找到{len(results)}个")
|
|
||||||
|
# 检查是否所有ID都找到了对应的记录
|
||||||
|
found_ids = {result['id'] for result in results}
|
||||||
|
missing_ids = set(spot_ids) - found_ids
|
||||||
|
|
||||||
|
if missing_ids:
|
||||||
|
logger.warning(f"部分景区ID未找到: {missing_ids}")
|
||||||
|
# 添加兜底数据
|
||||||
|
fallback_spots = self._get_fallback_scenic_spots(list(missing_ids))
|
||||||
|
results.extend(fallback_spots)
|
||||||
|
|
||||||
|
logger.info(f"批量查询景区信息: 请求{len(spot_ids)}个,找到{len([r for r in results if r['id'] in spot_ids])}个")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"批量查询景区信息失败: {e}")
|
logger.error(f"批量查询景区信息失败: {e}")
|
||||||
return []
|
return self._get_fallback_scenic_spots(spot_ids)
|
||||||
|
|
||||||
|
def _get_fallback_scenic_spots(self, spot_ids: List[int]) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
获取景区的兜底数据
|
||||||
|
"""
|
||||||
|
fallback_spots = []
|
||||||
|
for spot_id in spot_ids:
|
||||||
|
fallback_spots.append({
|
||||||
|
'id': spot_id,
|
||||||
|
'name': f'景区{spot_id}',
|
||||||
|
'address': '默认地址',
|
||||||
|
'advantage': '优美的自然风光',
|
||||||
|
'highlight': '值得一游的景点',
|
||||||
|
'isPublic': 1,
|
||||||
|
'isDelete': 0,
|
||||||
|
'_is_fallback': True
|
||||||
|
})
|
||||||
|
logger.info(f"使用兜底景区数据: {len(fallback_spots)}个")
|
||||||
|
return fallback_spots
|
||||||
|
|
||||||
def get_products_by_ids(self, productIds: List[int]) -> List[Dict[str, Any]]:
|
def get_products_by_ids(self, productIds: List[int]) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
@ -238,7 +282,8 @@ class DatabaseService:
|
|||||||
产品信息列表
|
产品信息列表
|
||||||
"""
|
"""
|
||||||
if not self.db_pool or not productIds:
|
if not self.db_pool or not productIds:
|
||||||
return []
|
logger.warning("数据库连接池未初始化或产品ID列表为空")
|
||||||
|
return self._get_fallback_products(productIds)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self.db_pool.get_connection() as conn:
|
with self.db_pool.get_connection() as conn:
|
||||||
@ -248,12 +293,42 @@ class DatabaseService:
|
|||||||
query = f"SELECT * FROM product WHERE id IN ({placeholders}) AND isDelete = 0"
|
query = f"SELECT * FROM product WHERE id IN ({placeholders}) AND isDelete = 0"
|
||||||
cursor.execute(query, productIds)
|
cursor.execute(query, productIds)
|
||||||
results = cursor.fetchall()
|
results = cursor.fetchall()
|
||||||
logger.info(f"批量查询产品信息: 请求{len(productIds)}个,找到{len(results)}个")
|
|
||||||
|
# 检查是否所有ID都找到了对应的记录
|
||||||
|
found_ids = {result['id'] for result in results}
|
||||||
|
missing_ids = set(productIds) - found_ids
|
||||||
|
|
||||||
|
if missing_ids:
|
||||||
|
logger.warning(f"部分产品ID未找到: {missing_ids}")
|
||||||
|
# 添加兜底数据
|
||||||
|
fallback_products = self._get_fallback_products(list(missing_ids))
|
||||||
|
results.extend(fallback_products)
|
||||||
|
|
||||||
|
logger.info(f"批量查询产品信息: 请求{len(productIds)}个,找到{len([r for r in results if r['id'] in productIds])}个")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"批量查询产品信息失败: {e}")
|
logger.error(f"批量查询产品信息失败: {e}")
|
||||||
return []
|
return self._get_fallback_products(productIds)
|
||||||
|
|
||||||
|
def _get_fallback_products(self, productIds: List[int]) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
获取产品的兜底数据
|
||||||
|
"""
|
||||||
|
fallback_products = []
|
||||||
|
for product_id in productIds:
|
||||||
|
fallback_products.append({
|
||||||
|
'id': product_id,
|
||||||
|
'productName': f'产品{product_id}',
|
||||||
|
'originPrice': 999,
|
||||||
|
'realPrice': 888,
|
||||||
|
'packageInfo': '包含景区门票和导游服务',
|
||||||
|
'advantage': '性价比高,服务优质',
|
||||||
|
'isDelete': 0,
|
||||||
|
'_is_fallback': True
|
||||||
|
})
|
||||||
|
logger.info(f"使用兜底产品数据: {len(fallback_products)}个")
|
||||||
|
return fallback_products
|
||||||
|
|
||||||
def get_styles_by_ids(self, styleIds: List[int]) -> List[Dict[str, Any]]:
|
def get_styles_by_ids(self, styleIds: List[int]) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
@ -266,7 +341,8 @@ class DatabaseService:
|
|||||||
风格信息列表
|
风格信息列表
|
||||||
"""
|
"""
|
||||||
if not self.db_pool or not styleIds:
|
if not self.db_pool or not styleIds:
|
||||||
return []
|
logger.warning("数据库连接池未初始化或风格ID列表为空")
|
||||||
|
return self._get_fallback_styles(styleIds)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self.db_pool.get_connection() as conn:
|
with self.db_pool.get_connection() as conn:
|
||||||
@ -276,12 +352,39 @@ class DatabaseService:
|
|||||||
query = f"SELECT * FROM contentStyle WHERE id IN ({placeholders}) AND isDelete = 0"
|
query = f"SELECT * FROM contentStyle WHERE id IN ({placeholders}) AND isDelete = 0"
|
||||||
cursor.execute(query, styleIds)
|
cursor.execute(query, styleIds)
|
||||||
results = cursor.fetchall()
|
results = cursor.fetchall()
|
||||||
logger.info(f"批量查询风格信息: 请求{len(styleIds)}个,找到{len(results)}个")
|
|
||||||
|
# 检查是否所有ID都找到了对应的记录
|
||||||
|
found_ids = {result['id'] for result in results}
|
||||||
|
missing_ids = set(styleIds) - found_ids
|
||||||
|
|
||||||
|
if missing_ids:
|
||||||
|
logger.warning(f"部分风格ID未找到: {missing_ids}")
|
||||||
|
# 添加兜底数据
|
||||||
|
fallback_styles = self._get_fallback_styles(list(missing_ids))
|
||||||
|
results.extend(fallback_styles)
|
||||||
|
|
||||||
|
logger.info(f"批量查询风格信息: 请求{len(styleIds)}个,找到{len([r for r in results if r['id'] in styleIds])}个")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"批量查询风格信息失败: {e}")
|
logger.error(f"批量查询风格信息失败: {e}")
|
||||||
return []
|
return self._get_fallback_styles(styleIds)
|
||||||
|
|
||||||
|
def _get_fallback_styles(self, styleIds: List[int]) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
获取风格的兜底数据
|
||||||
|
"""
|
||||||
|
fallback_styles = []
|
||||||
|
for style_id in styleIds:
|
||||||
|
fallback_styles.append({
|
||||||
|
'id': style_id,
|
||||||
|
'styleName': f'风格{style_id}',
|
||||||
|
'description': '默认风格描述',
|
||||||
|
'isDelete': 0,
|
||||||
|
'_is_fallback': True
|
||||||
|
})
|
||||||
|
logger.info(f"使用兜底风格数据: {len(fallback_styles)}个")
|
||||||
|
return fallback_styles
|
||||||
|
|
||||||
def get_audiences_by_ids(self, audienceIds: List[int]) -> List[Dict[str, Any]]:
|
def get_audiences_by_ids(self, audienceIds: List[int]) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
@ -294,7 +397,8 @@ class DatabaseService:
|
|||||||
受众信息列表
|
受众信息列表
|
||||||
"""
|
"""
|
||||||
if not self.db_pool or not audienceIds:
|
if not self.db_pool or not audienceIds:
|
||||||
return []
|
logger.warning("数据库连接池未初始化或受众ID列表为空")
|
||||||
|
return self._get_fallback_audiences(audienceIds)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self.db_pool.get_connection() as conn:
|
with self.db_pool.get_connection() as conn:
|
||||||
@ -304,12 +408,39 @@ class DatabaseService:
|
|||||||
query = f"SELECT * FROM targetAudience WHERE id IN ({placeholders}) AND isDelete = 0"
|
query = f"SELECT * FROM targetAudience WHERE id IN ({placeholders}) AND isDelete = 0"
|
||||||
cursor.execute(query, audienceIds)
|
cursor.execute(query, audienceIds)
|
||||||
results = cursor.fetchall()
|
results = cursor.fetchall()
|
||||||
logger.info(f"批量查询受众信息: 请求{len(audienceIds)}个,找到{len(results)}个")
|
|
||||||
|
# 检查是否所有ID都找到了对应的记录
|
||||||
|
found_ids = {result['id'] for result in results}
|
||||||
|
missing_ids = set(audienceIds) - found_ids
|
||||||
|
|
||||||
|
if missing_ids:
|
||||||
|
logger.warning(f"部分受众ID未找到: {missing_ids}")
|
||||||
|
# 添加兜底数据
|
||||||
|
fallback_audiences = self._get_fallback_audiences(list(missing_ids))
|
||||||
|
results.extend(fallback_audiences)
|
||||||
|
|
||||||
|
logger.info(f"批量查询受众信息: 请求{len(audienceIds)}个,找到{len([r for r in results if r['id'] in audienceIds])}个")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"批量查询受众信息失败: {e}")
|
logger.error(f"批量查询受众信息失败: {e}")
|
||||||
return []
|
return self._get_fallback_audiences(audienceIds)
|
||||||
|
|
||||||
|
def _get_fallback_audiences(self, audienceIds: List[int]) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
获取受众的兜底数据
|
||||||
|
"""
|
||||||
|
fallback_audiences = []
|
||||||
|
for audience_id in audienceIds:
|
||||||
|
fallback_audiences.append({
|
||||||
|
'id': audience_id,
|
||||||
|
'audienceName': f'受众{audience_id}',
|
||||||
|
'description': '默认受众描述',
|
||||||
|
'isDelete': 0,
|
||||||
|
'_is_fallback': True
|
||||||
|
})
|
||||||
|
logger.info(f"使用兜底受众数据: {len(fallback_audiences)}个")
|
||||||
|
return fallback_audiences
|
||||||
|
|
||||||
def list_all_scenic_spots(self, user_id: Optional[int] = None, is_public: Optional[bool] = None) -> List[Dict[str, Any]]:
|
def list_all_scenic_spots(self, user_id: Optional[int] = None, is_public: Optional[bool] = None) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -114,10 +114,23 @@ class PromptService:
|
|||||||
logger.info(f"尝试连接数据库 ({attempt['desc']}): {connection_info}")
|
logger.info(f"尝试连接数据库 ({attempt['desc']}): {connection_info}")
|
||||||
|
|
||||||
# 创建连接池
|
# 创建连接池
|
||||||
|
# 从配置中分离MySQL连接池支持的参数和不支持的参数
|
||||||
|
config = attempt["config"].copy()
|
||||||
|
|
||||||
|
# MySQL连接池不支持的参数,需要移除
|
||||||
|
unsupported_params = [
|
||||||
|
'max_retry_attempts', 'query_timeout', 'soft_delete_field', 'active_record_value'
|
||||||
|
]
|
||||||
|
for param in unsupported_params:
|
||||||
|
config.pop(param, None)
|
||||||
|
|
||||||
|
# 设置连接池参数,使用配置文件中的值或默认值
|
||||||
|
pool_size = config.pop('pool_size', 5)
|
||||||
|
|
||||||
pool = pooling.MySQLConnectionPool(
|
pool = pooling.MySQLConnectionPool(
|
||||||
pool_name=f"prompt_service_pool_{int(time.time())}",
|
pool_name=f"prompt_service_pool_{int(time.time())}",
|
||||||
pool_size=5,
|
pool_size=pool_size,
|
||||||
**attempt["config"]
|
**config
|
||||||
)
|
)
|
||||||
|
|
||||||
# 测试连接
|
# 测试连接
|
||||||
|
|||||||
@ -19,6 +19,7 @@ from tweet.content_generator import ContentGenerator
|
|||||||
from tweet.content_judger import ContentJudger
|
from tweet.content_judger import ContentJudger
|
||||||
from api.services.prompt_builder import PromptBuilderService
|
from api.services.prompt_builder import PromptBuilderService
|
||||||
from api.services.prompt_service import PromptService
|
from api.services.prompt_service import PromptService
|
||||||
|
from api.services.database_service import DatabaseService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -109,22 +110,21 @@ class TweetService:
|
|||||||
logger.info(f"选题生成完成,请求ID: {requestId}, 数量: {len(topics)}")
|
logger.info(f"选题生成完成,请求ID: {requestId}, 数量: {len(topics)}")
|
||||||
return requestId, topics
|
return requestId, topics
|
||||||
|
|
||||||
async def generate_content(self, topic: Optional[Dict[str, Any]] = None,
|
async def generate_content(self, topic: Optional[Dict[str, Any]] = None, autoJudge: bool = False,
|
||||||
style_objects: Optional[List[Dict[str, Any]]] = None,
|
style_objects: Optional[List[Dict[str, Any]]] = None,
|
||||||
audience_objects: Optional[List[Dict[str, Any]]] = None,
|
audience_objects: Optional[List[Dict[str, Any]]] = None,
|
||||||
scenic_spot_objects: Optional[List[Dict[str, Any]]] = None,
|
scenic_spot_objects: Optional[List[Dict[str, Any]]] = None,
|
||||||
product_objects: Optional[List[Dict[str, Any]]] = None,
|
product_objects: Optional[List[Dict[str, Any]]] = None) -> Tuple[str, str, Dict[str, Any]]:
|
||||||
autoJudge: bool = False) -> Tuple[str, str, Dict[str, Any]]:
|
|
||||||
"""
|
"""
|
||||||
为选题生成内容
|
为单个选题生成内容
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
topic: 选题信息
|
topic: 选题信息(可能包含ID字段)
|
||||||
styles: 风格列表
|
autoJudge: 是否进行内嵌审核
|
||||||
audiences: 受众列表
|
style_objects: 风格对象列表(可选,用于兼容)
|
||||||
scenic_spots: 景区列表
|
audience_objects: 受众对象列表(可选,用于兼容)
|
||||||
products: 产品列表
|
scenic_spot_objects: 景区对象列表(可选,用于兼容)
|
||||||
autoJudge: 是否自动进行内容审核
|
product_objects: 产品对象列表(可选,用于兼容)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
请求ID、选题索引和生成的内容(包含judgeSuccess状态)
|
请求ID、选题索引和生成的内容(包含judgeSuccess状态)
|
||||||
@ -135,22 +135,24 @@ class TweetService:
|
|||||||
topicIndex = topic.get('index', 'N/A')
|
topicIndex = topic.get('index', 'N/A')
|
||||||
logger.info(f"开始为选题 {topicIndex} 生成内容{'(含审核)' if autoJudge else ''}")
|
logger.info(f"开始为选题 {topicIndex} 生成内容{'(含审核)' if autoJudge else ''}")
|
||||||
|
|
||||||
# 核心修改:创建一个增强版的topic,将所有需要的信息预先填充好
|
# 增强版的topic处理:优先使用ID获取最新数据
|
||||||
enhanced_topic = topic.copy()
|
enhanced_topic = await self._enhance_topic_with_database_data(topic)
|
||||||
if style_objects:
|
|
||||||
|
# 如果没有通过ID获取到数据,使用传入的对象参数作为兜底
|
||||||
|
if style_objects and not enhanced_topic.get('style_object'):
|
||||||
enhanced_topic['style_object'] = style_objects[0]
|
enhanced_topic['style_object'] = style_objects[0]
|
||||||
enhanced_topic['style'] = style_objects[0].get('styleName')
|
enhanced_topic['style'] = style_objects[0].get('styleName')
|
||||||
if audience_objects:
|
if audience_objects and not enhanced_topic.get('audience_object'):
|
||||||
enhanced_topic['audience_object'] = audience_objects[0]
|
enhanced_topic['audience_object'] = audience_objects[0]
|
||||||
enhanced_topic['targetAudience'] = audience_objects[0].get('audienceName')
|
enhanced_topic['targetAudience'] = audience_objects[0].get('audienceName')
|
||||||
if scenic_spot_objects:
|
if scenic_spot_objects and not enhanced_topic.get('scenic_spot_object'):
|
||||||
enhanced_topic['scenic_spot_object'] = scenic_spot_objects[0]
|
enhanced_topic['scenic_spot_object'] = scenic_spot_objects[0]
|
||||||
enhanced_topic['object'] = scenic_spot_objects[0].get('name')
|
enhanced_topic['object'] = scenic_spot_objects[0].get('name')
|
||||||
if product_objects:
|
if product_objects and not enhanced_topic.get('product_object'):
|
||||||
enhanced_topic['product_object'] = product_objects[0]
|
enhanced_topic['product_object'] = product_objects[0]
|
||||||
enhanced_topic['product'] = product_objects[0].get('productName')
|
enhanced_topic['product'] = product_objects[0].get('productName')
|
||||||
|
|
||||||
# 使用PromptBuilderService构建提示词,现在它只需要enhanced_topic
|
# 使用PromptBuilderService构建提示词
|
||||||
system_prompt, user_prompt = self.prompt_builder.build_content_prompt(enhanced_topic, "content")
|
system_prompt, user_prompt = self.prompt_builder.build_content_prompt(enhanced_topic, "content")
|
||||||
|
|
||||||
# 使用预构建的提示词生成内容
|
# 使用预构建的提示词生成内容
|
||||||
@ -179,24 +181,82 @@ class TweetService:
|
|||||||
content = {k: v for k, v in judged_content.items() if k != 'judge_success'}
|
content = {k: v for k, v in judged_content.items() if k != 'judge_success'}
|
||||||
content['judgeSuccess'] = True
|
content['judgeSuccess'] = True
|
||||||
else:
|
else:
|
||||||
logger.warning(f"选题 {topicIndex} 内容审核失败,保持原始内容")
|
logger.warning(f"选题 {topicIndex} 内容审核未通过")
|
||||||
# 审核失败:保持原始内容,添加judgeSuccess=False标记
|
# 审核失败:使用原始内容,添加judgeSuccess状态
|
||||||
content['judgeSuccess'] = False
|
content['judgeSuccess'] = False
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"选题 {topicIndex} 内嵌审核失败: {e},保持原始内容")
|
logger.error(f"选题 {topicIndex} 内容审核过程中发生错误: {e}", exc_info=True)
|
||||||
# 审核异常:保持原始内容,添加judgeSuccess=False标记
|
# 审核出错:使用原始内容,标记审核失败
|
||||||
content['judgeSuccess'] = False
|
content['judgeSuccess'] = False
|
||||||
else:
|
|
||||||
# 未启用审核:添加judgeSuccess=None标记,表示未进行审核
|
|
||||||
content['judgeSuccess'] = None
|
|
||||||
|
|
||||||
# 生成请求ID
|
# 生成请求ID
|
||||||
requestId = f"content-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{str(uuid.uuid4())[:8]}"
|
requestId = f"content-{datetime.now().strftime('%Y%m%d-%H%M%S')}-{str(uuid.uuid4())[:8]}"
|
||||||
|
|
||||||
logger.info(f"内容生成完成,请求ID: {requestId}, 选题索引: {topicIndex}")
|
logger.info(f"选题 {topicIndex} 内容生成完成,请求ID: {requestId}")
|
||||||
return requestId, topicIndex, content
|
return requestId, topicIndex, content
|
||||||
|
|
||||||
|
async def _enhance_topic_with_database_data(self, topic: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
使用数据库数据增强选题信息
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: 原始选题数据
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
增强后的选题数据
|
||||||
|
"""
|
||||||
|
enhanced_topic = topic.copy()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 通过数据库服务获取详细信息
|
||||||
|
db_service = DatabaseService(self.config_manager)
|
||||||
|
|
||||||
|
if not db_service.is_available():
|
||||||
|
logger.warning("数据库服务不可用,无法增强选题数据")
|
||||||
|
return enhanced_topic
|
||||||
|
|
||||||
|
# 处理风格ID
|
||||||
|
if 'styleIds' in topic and topic['styleIds']:
|
||||||
|
style_id = topic['styleIds'][0] if isinstance(topic['styleIds'], list) else topic['styleIds']
|
||||||
|
style_data = db_service.get_style_by_id(style_id)
|
||||||
|
if style_data:
|
||||||
|
enhanced_topic['style_object'] = style_data
|
||||||
|
enhanced_topic['style'] = style_data.get('styleName')
|
||||||
|
logger.info(f"从数据库加载风格数据: {style_data.get('styleName')} (ID: {style_id})")
|
||||||
|
|
||||||
|
# 处理受众ID
|
||||||
|
if 'audienceIds' in topic and topic['audienceIds']:
|
||||||
|
audience_id = topic['audienceIds'][0] if isinstance(topic['audienceIds'], list) else topic['audienceIds']
|
||||||
|
audience_data = db_service.get_audience_by_id(audience_id)
|
||||||
|
if audience_data:
|
||||||
|
enhanced_topic['audience_object'] = audience_data
|
||||||
|
enhanced_topic['targetAudience'] = audience_data.get('audienceName')
|
||||||
|
logger.info(f"从数据库加载受众数据: {audience_data.get('audienceName')} (ID: {audience_id})")
|
||||||
|
|
||||||
|
# 处理景区ID
|
||||||
|
if 'scenicSpotIds' in topic and topic['scenicSpotIds']:
|
||||||
|
spot_id = topic['scenicSpotIds'][0] if isinstance(topic['scenicSpotIds'], list) else topic['scenicSpotIds']
|
||||||
|
spot_data = db_service.get_scenic_spot_by_id(spot_id)
|
||||||
|
if spot_data:
|
||||||
|
enhanced_topic['scenic_spot_object'] = spot_data
|
||||||
|
enhanced_topic['object'] = spot_data.get('name')
|
||||||
|
logger.info(f"从数据库加载景区数据: {spot_data.get('name')} (ID: {spot_id})")
|
||||||
|
|
||||||
|
# 处理产品ID
|
||||||
|
if 'productIds' in topic and topic['productIds']:
|
||||||
|
product_id = topic['productIds'][0] if isinstance(topic['productIds'], list) else topic['productIds']
|
||||||
|
product_data = db_service.get_product_by_id(product_id)
|
||||||
|
if product_data:
|
||||||
|
enhanced_topic['product_object'] = product_data
|
||||||
|
enhanced_topic['product'] = product_data.get('productName')
|
||||||
|
logger.info(f"从数据库加载产品数据: {product_data.get('productName')} (ID: {product_id})")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"增强选题数据时发生错误: {e}", exc_info=True)
|
||||||
|
|
||||||
|
return enhanced_topic
|
||||||
|
|
||||||
async def generate_content_with_prompt(self, topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Tuple[str, str, Dict[str, Any]]:
|
async def generate_content_with_prompt(self, topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Tuple[str, str, Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
使用预构建的提示词为选题生成内容
|
使用预构建的提示词为选题生成内容
|
||||||
@ -303,6 +363,7 @@ class TweetService:
|
|||||||
|
|
||||||
for topic in topics:
|
for topic in topics:
|
||||||
topicIndex = topic.get('index', 'unknown')
|
topicIndex = topic.get('index', 'unknown')
|
||||||
|
# 直接传递带有ID的选题数据,不再需要传递额外的对象参数
|
||||||
_, _, content = await self.generate_content(topic, autoJudge=autoJudge)
|
_, _, content = await self.generate_content(topic, autoJudge=autoJudge)
|
||||||
|
|
||||||
if autoJudge:
|
if autoJudge:
|
||||||
|
|||||||
@ -4,5 +4,10 @@
|
|||||||
"password": "civmek-rezTed-0hovre",
|
"password": "civmek-rezTed-0hovre",
|
||||||
"database": "travel_content",
|
"database": "travel_content",
|
||||||
"port": 3306,
|
"port": 3306,
|
||||||
"charset": "utf8mb4"
|
"charset": "utf8mb4",
|
||||||
|
"pool_size": 10,
|
||||||
|
"max_retry_attempts": 3,
|
||||||
|
"query_timeout": 30,
|
||||||
|
"soft_delete_field": "isDelete",
|
||||||
|
"active_record_value": 0
|
||||||
}
|
}
|
||||||
8
core/algorithms/__init__.py
Normal file
8
core/algorithms/__init__.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
Tweet模块 - 负责文字内容的生成、审核和管理
|
||||||
|
"""
|
||||||
|
|
||||||
|
__version__ = '1.0.0'
|
||||||
BIN
core/algorithms/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
core/algorithms/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
core/algorithms/__pycache__/topic_generator.cpython-312.pyc
Normal file
BIN
core/algorithms/__pycache__/topic_generator.cpython-312.pyc
Normal file
Binary file not shown.
BIN
core/algorithms/__pycache__/topic_parser.cpython-312.pyc
Normal file
BIN
core/algorithms/__pycache__/topic_parser.cpython-312.pyc
Normal file
Binary file not shown.
142
core/algorithms/content_generator.py
Normal file
142
core/algorithms/content_generator.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
内容生成模块
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any, Tuple, Optional
|
||||||
|
|
||||||
|
from core.ai import AIAgent
|
||||||
|
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
||||||
|
from utils.prompts import ContentPromptBuilder
|
||||||
|
from utils.file_io import OutputManager, process_llm_json_text
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ContentGenerator:
|
||||||
|
"""负责为单个选题生成内容"""
|
||||||
|
|
||||||
|
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager: OutputManager):
|
||||||
|
self.ai_agent = ai_agent
|
||||||
|
self.config_manager = config_manager
|
||||||
|
self.topic_config = config_manager.get_config('topic_gen', GenerateTopicConfig)
|
||||||
|
self.content_config = config_manager.get_config('content_gen', GenerateContentConfig)
|
||||||
|
self.output_manager = output_manager
|
||||||
|
self.prompt_builder = ContentPromptBuilder(config_manager)
|
||||||
|
|
||||||
|
async def generate_content_for_topic(self, topic: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
为单个选题生成内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: 选题信息字典
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
包含生成内容的字典
|
||||||
|
"""
|
||||||
|
topic_index = topic.get('index', 'N/A')
|
||||||
|
logger.info(f"开始为选题 {topic_index} 生成内容...")
|
||||||
|
|
||||||
|
# 1. 构建提示
|
||||||
|
# 使用模板构建器分别获取系统和用户提示
|
||||||
|
system_prompt = self.prompt_builder.get_system_prompt()
|
||||||
|
user_prompt = self.prompt_builder.build_user_prompt(topic=topic)
|
||||||
|
|
||||||
|
# 保存提示以供调试
|
||||||
|
output_dir = self.output_manager.get_topic_dir(topic_index)
|
||||||
|
self.output_manager.save_text(system_prompt, "content_system_prompt.txt", subdir=output_dir.name)
|
||||||
|
self.output_manager.save_text(user_prompt, "content_user_prompt.txt", subdir=output_dir.name)
|
||||||
|
|
||||||
|
# 获取模型参数
|
||||||
|
model_params = {}
|
||||||
|
if hasattr(self.content_config, 'model') and isinstance(self.content_config.model, dict):
|
||||||
|
model_params = {
|
||||||
|
'temperature': self.content_config.model.get('temperature'),
|
||||||
|
'top_p': self.content_config.model.get('top_p'),
|
||||||
|
'presence_penalty': self.content_config.model.get('presence_penalty')
|
||||||
|
}
|
||||||
|
# 移除None值
|
||||||
|
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||||||
|
|
||||||
|
# 2. 调用AI
|
||||||
|
try:
|
||||||
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
use_stream=True,
|
||||||
|
stage="内容生成",
|
||||||
|
**model_params
|
||||||
|
)
|
||||||
|
self.output_manager.save_text(raw_result, "content_raw_response.txt", subdir=output_dir.name)
|
||||||
|
except Exception as e:
|
||||||
|
logger.critical(f"为选题 {topic_index} 生成内容时AI调用失败: {e}", exc_info=True)
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
# 3. 解析和保存结果
|
||||||
|
content_data = process_llm_json_text(raw_result)
|
||||||
|
if content_data:
|
||||||
|
self.output_manager.save_json(content_data, "article.json", subdir=output_dir.name)
|
||||||
|
logger.info(f"成功为选题 {topic_index} 生成并保存内容。")
|
||||||
|
return content_data
|
||||||
|
else:
|
||||||
|
logger.error(f"解析内容JSON失败 for {topic_index}")
|
||||||
|
return {"error": "JSONDecodeError", "raw_content": raw_result}
|
||||||
|
|
||||||
|
async def generate_content_with_prompt(self, topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
使用已构建的提示词生成内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: 选题信息字典
|
||||||
|
system_prompt: 已构建好的系统提示词
|
||||||
|
user_prompt: 已构建好的用户提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
包含生成内容的字典
|
||||||
|
"""
|
||||||
|
topic_index = topic.get('index', 'N/A')
|
||||||
|
logger.info(f"使用预构建提示词为选题 {topic_index} 生成内容...")
|
||||||
|
|
||||||
|
# 保存提示以供调试
|
||||||
|
output_dir = self.output_manager.get_topic_dir(topic_index)
|
||||||
|
self.output_manager.save_text(system_prompt, "content_system_prompt.txt", subdir=output_dir.name)
|
||||||
|
self.output_manager.save_text(user_prompt, "content_user_prompt.txt", subdir=output_dir.name)
|
||||||
|
|
||||||
|
# 获取模型参数
|
||||||
|
model_params = {}
|
||||||
|
if hasattr(self.content_config, 'model') and isinstance(self.content_config.model, dict):
|
||||||
|
model_params = {
|
||||||
|
'temperature': self.content_config.model.get('temperature'),
|
||||||
|
'top_p': self.content_config.model.get('top_p'),
|
||||||
|
'presence_penalty': self.content_config.model.get('presence_penalty')
|
||||||
|
}
|
||||||
|
# 移除None值
|
||||||
|
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||||||
|
|
||||||
|
# 调用AI
|
||||||
|
try:
|
||||||
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
use_stream=True,
|
||||||
|
stage="内容生成",
|
||||||
|
**model_params
|
||||||
|
)
|
||||||
|
self.output_manager.save_text(raw_result, "content_raw_response.txt", subdir=output_dir.name)
|
||||||
|
except Exception as e:
|
||||||
|
logger.critical(f"为选题 {topic_index} 生成内容时AI调用失败: {e}", exc_info=True)
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
# 解析和保存结果
|
||||||
|
content_data = process_llm_json_text(raw_result)
|
||||||
|
if content_data:
|
||||||
|
self.output_manager.save_json(content_data, "article.json", subdir=output_dir.name)
|
||||||
|
logger.info(f"成功为选题 {topic_index} 生成并保存内容。")
|
||||||
|
return content_data
|
||||||
|
else:
|
||||||
|
logger.error(f"解析内容JSON失败 for {topic_index}")
|
||||||
|
return {"error": "JSONDecodeError", "raw_content": raw_result}
|
||||||
212
core/algorithms/content_judger.py
Normal file
212
core/algorithms/content_judger.py
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
内容审核模块
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any, Union
|
||||||
|
|
||||||
|
from core.ai import AIAgent
|
||||||
|
from core.config import ConfigManager, GenerateTopicConfig, GenerateContentConfig
|
||||||
|
from utils.prompts import JudgerPromptBuilder
|
||||||
|
from utils.file_io import process_llm_json_text
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ContentJudger:
|
||||||
|
"""内容审核类,使用AI评估和修正内容"""
|
||||||
|
|
||||||
|
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager=None):
|
||||||
|
"""
|
||||||
|
初始化内容审核器
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ai_agent: AIAgent实例
|
||||||
|
config_manager: 配置管理器
|
||||||
|
output_manager: 输出管理器,用于保存提示词和响应
|
||||||
|
"""
|
||||||
|
self.ai_agent = ai_agent
|
||||||
|
self.config_manager = config_manager
|
||||||
|
self.topic_config = config_manager.get_config('topic_gen', GenerateTopicConfig)
|
||||||
|
self.content_config = config_manager.get_config('content_gen', GenerateContentConfig)
|
||||||
|
self.prompt_builder = JudgerPromptBuilder(config_manager)
|
||||||
|
self.output_manager = output_manager
|
||||||
|
|
||||||
|
async def judge_content(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
调用AI审核生成的内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
generated_content: 已生成的原始内容(JSON字符串或字典对象)
|
||||||
|
topic: 与内容相关的原始选题字典
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
一个包含审核结果的字典
|
||||||
|
"""
|
||||||
|
logger.info("开始审核生成的内容...")
|
||||||
|
|
||||||
|
# 获取主题索引,用于保存文件
|
||||||
|
topic_index = topic.get('index', 'unknown')
|
||||||
|
topic_dir = f"topic_{topic_index}"
|
||||||
|
|
||||||
|
# 从原始内容中提取tag
|
||||||
|
original_tag = []
|
||||||
|
original_content = process_llm_json_text(generated_content)
|
||||||
|
if original_content and isinstance(original_content, dict) and "tag" in original_content:
|
||||||
|
original_tag = original_content.get("tag", [])
|
||||||
|
logger.info(f"从原始内容中提取到标签: {original_tag}")
|
||||||
|
else:
|
||||||
|
logger.warning("从原始内容提取标签失败")
|
||||||
|
|
||||||
|
# 将字典转换为JSON字符串,以便在提示中使用
|
||||||
|
if isinstance(generated_content, dict):
|
||||||
|
generated_content_str = json.dumps(generated_content, ensure_ascii=False, indent=2)
|
||||||
|
else:
|
||||||
|
generated_content_str = str(generated_content)
|
||||||
|
|
||||||
|
# 1. 构建提示
|
||||||
|
system_prompt = self.prompt_builder.get_system_prompt()
|
||||||
|
user_prompt = self.prompt_builder.build_user_prompt(
|
||||||
|
generated_content=generated_content_str,
|
||||||
|
topic=topic
|
||||||
|
)
|
||||||
|
|
||||||
|
# 保存提示词
|
||||||
|
if self.output_manager:
|
||||||
|
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
|
||||||
|
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
|
||||||
|
|
||||||
|
# 获取模型参数
|
||||||
|
model_params = {}
|
||||||
|
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
|
||||||
|
model_params = {
|
||||||
|
'temperature': self.content_config.judger_model.get('temperature'),
|
||||||
|
'top_p': self.content_config.judger_model.get('top_p'),
|
||||||
|
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
|
||||||
|
}
|
||||||
|
# 移除None值
|
||||||
|
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||||||
|
|
||||||
|
# 2. 调用AI进行审核
|
||||||
|
try:
|
||||||
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
use_stream=True,
|
||||||
|
stage="内容审核",
|
||||||
|
**model_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# 保存原始响应
|
||||||
|
if self.output_manager:
|
||||||
|
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
|
||||||
|
return {"judge_success": False, "error": str(e)}
|
||||||
|
|
||||||
|
# 3. 解析结果
|
||||||
|
judged_data = process_llm_json_text(raw_result)
|
||||||
|
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
|
||||||
|
judged_data["judge_success"] = True
|
||||||
|
|
||||||
|
# 直接使用原始内容中的标签
|
||||||
|
if original_tag:
|
||||||
|
judged_data["tag"] = original_tag
|
||||||
|
# 如果原始内容中没有标签,则使用默认标签
|
||||||
|
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
|
||||||
|
|
||||||
|
# 保存审核后的内容
|
||||||
|
if self.output_manager:
|
||||||
|
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
|
||||||
|
|
||||||
|
return judged_data
|
||||||
|
else:
|
||||||
|
logger.warning(f"审核响应JSON格式不正确或缺少键")
|
||||||
|
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
|
||||||
|
|
||||||
|
async def judge_content_with_prompt(self, generated_content: Union[str, Dict[str, Any]], topic: Dict[str, Any], system_prompt: str, user_prompt: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
使用预构建的提示词审核生成的内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
generated_content: 已生成的原始内容(JSON字符串或字典对象)
|
||||||
|
topic: 与内容相关的原始选题字典
|
||||||
|
system_prompt: 系统提示词
|
||||||
|
user_prompt: 用户提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
一个包含审核结果的字典
|
||||||
|
"""
|
||||||
|
logger.info("开始使用预构建提示词审核生成的内容...")
|
||||||
|
|
||||||
|
# 获取主题索引,用于保存文件
|
||||||
|
topic_index = topic.get('index', 'unknown')
|
||||||
|
topic_dir = f"topic_{topic_index}"
|
||||||
|
|
||||||
|
# 从原始内容中提取tag
|
||||||
|
original_tag = []
|
||||||
|
original_content = process_llm_json_text(generated_content)
|
||||||
|
if original_content and isinstance(original_content, dict) and "tag" in original_content:
|
||||||
|
original_tag = original_content.get("tag", [])
|
||||||
|
logger.info(f"从原始内容中提取到标签: {original_tag}")
|
||||||
|
else:
|
||||||
|
logger.warning("从原始内容提取标签失败")
|
||||||
|
|
||||||
|
# 保存提示词
|
||||||
|
if self.output_manager:
|
||||||
|
self.output_manager.save_text(system_prompt, f"{topic_dir}/judger_system_prompt.txt")
|
||||||
|
self.output_manager.save_text(user_prompt, f"{topic_dir}/judger_user_prompt.txt")
|
||||||
|
|
||||||
|
# 获取模型参数
|
||||||
|
model_params = {}
|
||||||
|
if hasattr(self.content_config, 'judger_model') and isinstance(self.content_config.judger_model, dict):
|
||||||
|
model_params = {
|
||||||
|
'temperature': self.content_config.judger_model.get('temperature'),
|
||||||
|
'top_p': self.content_config.judger_model.get('top_p'),
|
||||||
|
'presence_penalty': self.content_config.judger_model.get('presence_penalty')
|
||||||
|
}
|
||||||
|
# 移除None值
|
||||||
|
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||||||
|
|
||||||
|
# 2. 调用AI进行审核
|
||||||
|
try:
|
||||||
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
use_stream=True,
|
||||||
|
stage="内容审核",
|
||||||
|
**model_params
|
||||||
|
)
|
||||||
|
|
||||||
|
# 保存原始响应
|
||||||
|
if self.output_manager:
|
||||||
|
self.output_manager.save_text(raw_result, f"{topic_dir}/judger_raw_response.txt")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.critical(f"内容审核时AI调用失败: {e}", exc_info=True)
|
||||||
|
return {"judge_success": False, "error": str(e)}
|
||||||
|
|
||||||
|
# 3. 解析结果
|
||||||
|
judged_data = process_llm_json_text(raw_result)
|
||||||
|
if judged_data and isinstance(judged_data, dict) and "title" in judged_data and "content" in judged_data:
|
||||||
|
judged_data["judge_success"] = True
|
||||||
|
judged_data.pop("analysis")
|
||||||
|
# 直接使用原始内容中的标签
|
||||||
|
if original_tag:
|
||||||
|
judged_data["tag"] = original_tag
|
||||||
|
# 如果原始内容中没有标签,则使用默认标签
|
||||||
|
logger.info(f"内容审核成功完成,使用标签: {judged_data.get('tag', [])}")
|
||||||
|
|
||||||
|
# 保存审核后的内容
|
||||||
|
if self.output_manager:
|
||||||
|
self.output_manager.save_json(judged_data, f"{topic_dir}/article_judged.json")
|
||||||
|
|
||||||
|
return judged_data
|
||||||
|
else:
|
||||||
|
logger.warning(f"审核响应JSON格式不正确或缺少键")
|
||||||
|
return {"judge_success": False, "error": "Invalid JSON response", "raw_response": raw_result}
|
||||||
149
core/algorithms/topic_generator.py
Normal file
149
core/algorithms/topic_generator.py
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
选题生成模块
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List, Optional, Tuple
|
||||||
|
|
||||||
|
from core.ai import AIAgent
|
||||||
|
from core.config import ConfigManager, GenerateTopicConfig
|
||||||
|
from utils.prompts import TopicPromptBuilder
|
||||||
|
from utils.file_io import OutputManager, process_llm_json_text
|
||||||
|
from .topic_parser import TopicParser
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TopicGenerator:
|
||||||
|
"""
|
||||||
|
选题生成器
|
||||||
|
负责生成旅游相关的选题
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, ai_agent: AIAgent, config_manager: ConfigManager, output_manager: OutputManager):
|
||||||
|
"""
|
||||||
|
初始化选题生成器
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ai_agent: AI代理
|
||||||
|
config_manager: 配置管理器
|
||||||
|
output_manager: 输出管理器
|
||||||
|
"""
|
||||||
|
self.ai_agent = ai_agent
|
||||||
|
self.config_manager = config_manager
|
||||||
|
self.config = config_manager.get_config('topic_gen', GenerateTopicConfig)
|
||||||
|
self.output_manager = output_manager
|
||||||
|
self.prompt_builder = TopicPromptBuilder(config_manager)
|
||||||
|
self.parser = TopicParser()
|
||||||
|
|
||||||
|
async def generate_topics(self) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
执行完整的选题生成流程:构建提示 -> 调用AI -> 解析结果 -> 保存产物
|
||||||
|
"""
|
||||||
|
logger.info("开始执行选题生成流程...")
|
||||||
|
|
||||||
|
# 1. 构建提示
|
||||||
|
system_prompt = self.prompt_builder.get_system_prompt()
|
||||||
|
user_prompt = self.prompt_builder.build_user_prompt(
|
||||||
|
numTopics=self.config.topic.num,
|
||||||
|
month=self.config.topic.date
|
||||||
|
)
|
||||||
|
self.output_manager.save_text(system_prompt, "topic_system_prompt.txt")
|
||||||
|
self.output_manager.save_text(user_prompt, "topic_user_prompt.txt")
|
||||||
|
|
||||||
|
# 获取模型参数
|
||||||
|
model_params = {}
|
||||||
|
if hasattr(self.config, 'model') and isinstance(self.config.model, dict):
|
||||||
|
model_params = {
|
||||||
|
'temperature': self.config.model.get('temperature'),
|
||||||
|
'top_p': self.config.model.get('top_p'),
|
||||||
|
'presence_penalty': self.config.model.get('presence_penalty')
|
||||||
|
}
|
||||||
|
# 移除None值
|
||||||
|
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||||||
|
|
||||||
|
# 2. 调用AI生成
|
||||||
|
try:
|
||||||
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
use_stream=True, # 选题生成通常不需要流式输出
|
||||||
|
stage="选题生成",
|
||||||
|
**model_params
|
||||||
|
)
|
||||||
|
self.output_manager.save_text(raw_result, "topics_raw_response.txt")
|
||||||
|
except Exception as e:
|
||||||
|
logger.critical(f"AI调用失败,无法生成选题: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 3. 解析结果
|
||||||
|
topics = self.parser.parse(raw_result)
|
||||||
|
if not topics:
|
||||||
|
logger.error("未能从AI响应中解析出任何有效选题")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 4. 保存结果
|
||||||
|
self.output_manager.save_json(topics, "topics.json")
|
||||||
|
logger.info(f"成功生成并保存 {len(topics)} 个选题")
|
||||||
|
|
||||||
|
return topics
|
||||||
|
|
||||||
|
async def generate_topics_with_prompt(self, system_prompt: str, user_prompt: str) -> Optional[List[Dict[str, Any]]]:
|
||||||
|
"""
|
||||||
|
使用预构建的提示词生成选题
|
||||||
|
|
||||||
|
Args:
|
||||||
|
system_prompt: 已构建好的系统提示词
|
||||||
|
user_prompt: 已构建好的用户提示词
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
生成的选题列表,如果失败则返回None
|
||||||
|
"""
|
||||||
|
logger.info("使用预构建提示词开始执行选题生成流程...")
|
||||||
|
|
||||||
|
# 保存提示以供调试
|
||||||
|
self.output_manager.save_text(system_prompt, "topic_system_prompt.txt")
|
||||||
|
self.output_manager.save_text(user_prompt, "topic_user_prompt.txt")
|
||||||
|
|
||||||
|
# 获取模型参数
|
||||||
|
model_params = {}
|
||||||
|
if hasattr(self.config, 'model') and isinstance(self.config.model, dict):
|
||||||
|
model_params = {
|
||||||
|
'temperature': self.config.model.get('temperature'),
|
||||||
|
'top_p': self.config.model.get('top_p'),
|
||||||
|
'presence_penalty': self.config.model.get('presence_penalty')
|
||||||
|
}
|
||||||
|
# 移除None值
|
||||||
|
model_params = {k: v for k, v in model_params.items() if v is not None}
|
||||||
|
|
||||||
|
# 调用AI生成
|
||||||
|
try:
|
||||||
|
raw_result, _, _, _ = await self.ai_agent.generate_text(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=user_prompt,
|
||||||
|
use_stream=True,
|
||||||
|
stage="选题生成",
|
||||||
|
**model_params
|
||||||
|
)
|
||||||
|
self.output_manager.save_text(raw_result, "topics_raw_response.txt")
|
||||||
|
except Exception as e:
|
||||||
|
logger.critical(f"AI调用失败,无法生成选题: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 解析结果
|
||||||
|
topics = self.parser.parse(raw_result)
|
||||||
|
if not topics:
|
||||||
|
logger.error("未能从AI响应中解析出任何有效选题")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 保存结果
|
||||||
|
self.output_manager.save_json(topics, "topics.json")
|
||||||
|
logger.info(f"成功生成并保存 {len(topics)} 个选题")
|
||||||
|
|
||||||
|
return topics
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
59
core/algorithms/topic_parser.py
Normal file
59
core/algorithms/topic_parser.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
AI响应解析器模块
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
from utils.file_io import process_llm_json_text
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TopicParser:
|
||||||
|
"""
|
||||||
|
解析和验证由AI模型生成的选题列表
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse(raw_text: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
从原始文本解析、修复和验证JSON
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_text: AI模型返回的原始字符串
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
一个字典列表,每个字典代表一个有效的选题
|
||||||
|
"""
|
||||||
|
logger.info("开始解析AI生成的选题...")
|
||||||
|
|
||||||
|
# 使用通用JSON解析函数解析原始文本
|
||||||
|
parsed_json = process_llm_json_text(raw_text)
|
||||||
|
|
||||||
|
if not parsed_json:
|
||||||
|
logger.error("解析AI响应失败,无法获取JSON数据")
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not isinstance(parsed_json, list):
|
||||||
|
logger.error(f"解析结果不是列表,而是 {type(parsed_json)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.info(f"成功解析 {len(parsed_json)} 个选题对象。开始验证...")
|
||||||
|
|
||||||
|
# 验证每个选题是否包含所有必需的键
|
||||||
|
valid_topics = []
|
||||||
|
required_keys = {"index", "date", "logic", "object", "product", "style", "targetAudience"}
|
||||||
|
optional_keys = {"productLogic", "styleLogic", "targetAudienceLogic"}
|
||||||
|
|
||||||
|
for i, item in enumerate(parsed_json):
|
||||||
|
if isinstance(item, dict) and required_keys.issubset(item.keys()):
|
||||||
|
valid_topics.append(item)
|
||||||
|
else:
|
||||||
|
logger.warning(f"第 {i+1} 个选题缺少必需键或格式不正确: {item}")
|
||||||
|
|
||||||
|
logger.info(f"验证完成,获得 {len(valid_topics)} 个有效选题。")
|
||||||
|
return valid_topics
|
||||||
Binary file not shown.
148
docs/ID映射机制优化说明.md
Normal file
148
docs/ID映射机制优化说明.md
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
# ID映射机制优化说明
|
||||||
|
|
||||||
|
## 🎯 解决的问题
|
||||||
|
|
||||||
|
### 原有问题
|
||||||
|
1. **数据库查询失败**:ID查询时经常找不到对应数据
|
||||||
|
2. **名称匹配不准确**:AI生成的选题名称与数据库中的名称不完全匹配
|
||||||
|
3. **缺少兜底机制**:查询失败时没有备用方案
|
||||||
|
4. **ID追踪缺失**:选题生成后无法保持ID的连续性
|
||||||
|
|
||||||
|
## 🔧 优化方案
|
||||||
|
|
||||||
|
### 1. 增强ID映射逻辑 (`api/routers/tweet.py`)
|
||||||
|
|
||||||
|
#### 模糊匹配机制
|
||||||
|
```python
|
||||||
|
def find_best_match(target_name: str, mapping: Dict[str, int]) -> Optional[int]:
|
||||||
|
# 1. 精确匹配
|
||||||
|
if target_name in mapping:
|
||||||
|
return mapping[target_name]
|
||||||
|
|
||||||
|
# 2. 模糊匹配 - 去除空格后匹配
|
||||||
|
target_clean = target_name.replace(" ", "").strip()
|
||||||
|
for name, id_val in mapping.items():
|
||||||
|
if name.replace(" ", "").strip() == target_clean:
|
||||||
|
return id_val
|
||||||
|
|
||||||
|
# 3. 包含匹配 - 检查是否互相包含
|
||||||
|
for name, id_val in mapping.items():
|
||||||
|
if target_clean in name.replace(" ", "") or name.replace(" ", "") in target_clean:
|
||||||
|
return id_val
|
||||||
|
|
||||||
|
# 4. 未找到匹配
|
||||||
|
logger.warning(f"未找到匹配的ID: '{target_name}'")
|
||||||
|
return None
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 匹配率监控
|
||||||
|
- 记录每个选题的ID匹配情况
|
||||||
|
- 计算匹配率并在匹配率低于50%时发出警告
|
||||||
|
- 添加匹配元数据用于调试
|
||||||
|
|
||||||
|
### 2. 数据库服务兜底机制 (`api/services/database_service.py`)
|
||||||
|
|
||||||
|
#### 批量查询增强
|
||||||
|
```python
|
||||||
|
def get_styles_by_ids(self, styleIds: List[int]) -> List[Dict[str, Any]]:
|
||||||
|
# 检查哪些ID没有找到对应记录
|
||||||
|
found_ids = {result['id'] for result in results}
|
||||||
|
missing_ids = set(styleIds) - found_ids
|
||||||
|
|
||||||
|
if missing_ids:
|
||||||
|
# 添加兜底数据
|
||||||
|
fallback_styles = self._get_fallback_styles(list(missing_ids))
|
||||||
|
results.extend(fallback_styles)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 兜底数据提供
|
||||||
|
- 当数据库查询失败时,提供默认的结构化数据
|
||||||
|
- 标记兜底数据 (`_is_fallback: True`)
|
||||||
|
- 确保系统可以继续运行
|
||||||
|
|
||||||
|
### 3. 内容生成阶段优化 (`api/services/tweet.py`)
|
||||||
|
|
||||||
|
#### 智能数据增强
|
||||||
|
```python
|
||||||
|
async def _enhance_topic_with_database_data(self, topic: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
# 优先使用ID从数据库获取最新数据
|
||||||
|
if 'styleIds' in topic and topic['styleIds']:
|
||||||
|
style_data = db_service.get_style_by_id(style_id)
|
||||||
|
if style_data:
|
||||||
|
enhanced_topic['style_object'] = style_data
|
||||||
|
enhanced_topic['style'] = style_data.get('styleName')
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 多级兜底策略
|
||||||
|
1. **第一级**:通过ID从数据库获取最新数据
|
||||||
|
2. **第二级**:使用传入的对象参数作为兜底
|
||||||
|
3. **第三级**:使用数据库服务的兜底数据
|
||||||
|
|
||||||
|
## 🔄 完整流程
|
||||||
|
|
||||||
|
### 选题生成阶段
|
||||||
|
1. 接收ID列表 → 查询数据库获取完整对象
|
||||||
|
2. 构建ID到名称的映射关系
|
||||||
|
3. AI生成选题(包含名称)
|
||||||
|
4. 将生成的选题名称映射回ID
|
||||||
|
5. 返回包含ID的选题数据
|
||||||
|
|
||||||
|
### 内容生成阶段
|
||||||
|
1. 接收带ID的选题数据
|
||||||
|
2. 通过ID从数据库获取最新的详细信息
|
||||||
|
3. 增强选题数据
|
||||||
|
4. 生成内容
|
||||||
|
|
||||||
|
## 🎉 预期效果
|
||||||
|
|
||||||
|
### 1. 数据一致性
|
||||||
|
- 确保整个流程中ID的连续性
|
||||||
|
- 避免名称不匹配导致的数据丢失
|
||||||
|
|
||||||
|
### 2. 系统稳定性
|
||||||
|
- 多级兜底机制确保系统不会因为数据库问题而崩溃
|
||||||
|
- 详细的日志记录便于问题排查
|
||||||
|
|
||||||
|
### 3. 数据准确性
|
||||||
|
- 内容生成时使用最新的数据库数据
|
||||||
|
- 避免使用过期或不准确的缓存数据
|
||||||
|
|
||||||
|
### 4. 可观测性
|
||||||
|
- 匹配率监控
|
||||||
|
- 详细的日志记录
|
||||||
|
- 兜底数据标记
|
||||||
|
|
||||||
|
## 🚀 使用建议
|
||||||
|
|
||||||
|
### 1. 监控日志
|
||||||
|
关注以下日志信息:
|
||||||
|
- ID匹配率低于50%的警告
|
||||||
|
- 兜底数据使用情况
|
||||||
|
- 数据库查询失败的频率
|
||||||
|
|
||||||
|
### 2. 数据维护
|
||||||
|
- 定期清理重复数据
|
||||||
|
- 更新"请修改产品名字"等占位数据
|
||||||
|
- 确保软删除字段的正确使用
|
||||||
|
|
||||||
|
### 3. 性能优化
|
||||||
|
- 考虑为常用数据添加缓存
|
||||||
|
- 优化数据库查询性能
|
||||||
|
- 定期清理无效数据
|
||||||
|
|
||||||
|
## 📋 测试验证
|
||||||
|
|
||||||
|
### 1. 功能测试
|
||||||
|
- 测试各种ID组合的选题生成
|
||||||
|
- 验证名称匹配的准确性
|
||||||
|
- 确认兜底机制的有效性
|
||||||
|
|
||||||
|
### 2. 性能测试
|
||||||
|
- 大批量选题生成的性能
|
||||||
|
- 数据库查询的响应时间
|
||||||
|
- 内存使用情况
|
||||||
|
|
||||||
|
### 3. 错误处理测试
|
||||||
|
- 数据库连接失败时的行为
|
||||||
|
- 无效ID的处理
|
||||||
|
- 数据缺失时的兜底效果
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user