bangbang-aigc-server/api/routers/content_integration.py

205 lines
6.6 KiB
Python
Raw Permalink Normal View History

2025-07-31 15:35:23 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容整合API路由
"""
import logging
import tempfile
import os
import base64
from fastapi import APIRouter, HTTPException
from typing import Dict, Any
from api.models.content_integration import (
ContentIntegrationRequest,
ContentIntegrationResponse
)
from api.services.content_integration_service import ContentIntegrationService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/content-integration", tags=["content-integration"])
# 全局服务实例
integration_service = ContentIntegrationService()
@router.post("/integrate", response_model=ContentIntegrationResponse)
async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse:
"""
整合文档和小红书笔记内容
该接口将
1. 处理用户上传的base64编码文档支持PDFWord图片等格式
2. 根据关键词搜索小红书相关笔记可选
3. 使用LLM将两者整合成综合性旅游资料
Args:
request: 整合请求参数
Returns:
整合结果
Raises:
HTTPException: 当请求参数无效或处理失败时
"""
temp_files = []
try:
# 创建临时文件处理base64文档
if request.documents:
temp_files = []
for doc in request.documents:
try:
# 创建临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.filename)[1]) as temp_file:
# 解码base64内容并写入临时文件
content = base64.b64decode(doc.content)
temp_file.write(content)
temp_files.append(temp_file.name)
except Exception as e:
logger.error(f"处理文档 {doc.filename} 失败: {e}")
raise HTTPException(
status_code=400,
detail=f"文档 {doc.filename} 处理失败: {str(e)}"
)
logger.info(f"收到内容整合请求:文档 {len(temp_files) if temp_files else 0} 个,关键词 {len(request.keywords) if request.keywords else 0}")
# 调用服务层处理
result = await integration_service.integrate_content(
document_paths=temp_files,
keywords=request.keywords,
cookies=request.cookies,
sort_type=request.sort_type,
note_type=request.note_type,
note_time=request.note_time,
note_range=request.note_range,
pos_distance=request.pos_distance,
query_num=request.query_num
)
# 转换为响应模型
if result["success"]:
response = ContentIntegrationResponse(
success=True,
timestamp=result["timestamp"],
processing_time=result["processing_time"],
input_summary=result["input_summary"],
document_info=result["document_info"],
xhs_info=result["xhs_info"],
integrated_content=result["integrated_content"],
search_config=result["search_config"],
error_message=None # 成功时无错误信息
)
logger.info(f"内容整合成功,处理时间:{result['processing_time']}")
else:
from datetime import datetime
response = ContentIntegrationResponse(
success=False,
timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")),
processing_time=result.get("processing_time", "0秒"),
input_summary=result.get("input_summary"),
document_info=result.get("document_info"),
xhs_info=result.get("xhs_info"),
integrated_content=result.get("integrated_content"),
search_config=result.get("search_config"),
error_message=result.get("error_message")
)
logger.error(f"内容整合失败:{result['error_message']}")
return response
except Exception as e:
logger.error(f"内容整合接口异常:{e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"内容整合处理失败:{str(e)}"
)
finally:
# 清理临时文件
for temp_file in temp_files:
try:
if os.path.exists(temp_file):
os.unlink(temp_file)
except Exception as e:
logger.error(f"清理临时文件 {temp_file} 失败: {e}")
@router.get("/health")
async def health_check() -> Dict[str, str]:
"""
健康检查接口
Returns:
服务状态信息
"""
try:
# 检查服务是否正常初始化
if not integration_service:
raise Exception("服务未正确初始化")
return {
"status": "healthy",
"service": "content-integration",
"message": "内容整合服务运行正常"
}
except Exception as e:
logger.error(f"健康检查失败:{e}")
raise HTTPException(
status_code=503,
detail=f"服务不可用:{str(e)}"
)
@router.get("/config/options")
async def get_config_options() -> Dict[str, Any]:
"""
获取配置选项说明
Returns:
各配置项的可选值和说明
"""
return {
"sort_type": {
"0": "综合排序",
"1": "最新",
"2": "最多点赞",
"3": "最多评论",
"4": "最多收藏"
},
"note_type": {
"0": "不限",
"1": "视频笔记",
"2": "普通笔记"
},
"note_time": {
"0": "不限",
"1": "一天内",
"2": "一周内",
"3": "半年内"
},
"note_range": {
"0": "不限",
"1": "已看过",
"2": "未看过",
"3": "已关注"
},
"pos_distance": {
"0": "不限",
"1": "同城",
"2": "附近"
},
"query_num": "每个关键词搜索的笔记数量1-50",
"supported_document_formats": [
"PDF (.pdf)",
"Word (.docx, .doc)",
"PowerPoint (.pptx, .ppt)",
"Excel (.xlsx, .xls)",
"Text (.txt)",
"Markdown (.md)",
"Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)",
"CSV (.csv)"
]
}