bangbang-aigc-server/api/routers/content_integration.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
内容整合API路由
"""

import logging
import tempfile
import os
import base64
from fastapi import APIRouter, HTTPException
from typing import Dict, Any

from api.models.content_integration import (
    ContentIntegrationRequest, 
    ContentIntegrationResponse
)
from api.services.content_integration_service import ContentIntegrationService

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/content-integration", tags=["content-integration"])

# 全局服务实例
integration_service = ContentIntegrationService()


@router.post("/integrate", response_model=ContentIntegrationResponse)
async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse:
    """
    整合文档和小红书笔记内容
    
    该接口将：
    1. 处理用户上传的base64编码文档（支持PDF、Word、图片等格式）
    2. 根据关键词搜索小红书相关笔记（可选）
    3. 使用LLM将两者整合成综合性旅游资料
    
    Args:
        request: 整合请求参数
        
    Returns:
        整合结果
        
    Raises:
        HTTPException: 当请求参数无效或处理失败时
    """
    temp_files = []
    try:
        # 创建临时文件处理base64文档
        if request.documents:
            temp_files = []
            for doc in request.documents:
                try:
                    # 创建临时文件
                    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.filename)[1]) as temp_file:
                        # 解码base64内容并写入临时文件
                        content = base64.b64decode(doc.content)
                        temp_file.write(content)
                        temp_files.append(temp_file.name)
                except Exception as e:
                    logger.error(f"处理文档 {doc.filename} 失败: {e}")
                    raise HTTPException(
                        status_code=400,
                        detail=f"文档 {doc.filename} 处理失败: {str(e)}"
                    )

        logger.info(f"收到内容整合请求：文档 {len(temp_files) if temp_files else 0} 个，关键词 {len(request.keywords) if request.keywords else 0} 个")
        
        # 调用服务层处理
        result = await integration_service.integrate_content(
            document_paths=temp_files,
            keywords=request.keywords,
            cookies=request.cookies,
            sort_type=request.sort_type,
            note_type=request.note_type,
            note_time=request.note_time,
            note_range=request.note_range,
            pos_distance=request.pos_distance,
            query_num=request.query_num
        )
        
        # 转换为响应模型
        if result["success"]:
            response = ContentIntegrationResponse(
                success=True,
                timestamp=result["timestamp"],
                processing_time=result["processing_time"],
                input_summary=result["input_summary"],
                document_info=result["document_info"],
                xhs_info=result["xhs_info"],
                integrated_content=result["integrated_content"],
                search_config=result["search_config"],
                error_message=None  # 成功时无错误信息
            )
            logger.info(f"内容整合成功，处理时间：{result['processing_time']}")
        else:
            from datetime import datetime
            response = ContentIntegrationResponse(
                success=False,
                timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")),
                processing_time=result.get("processing_time", "0秒"),
                input_summary=result.get("input_summary"),
                document_info=result.get("document_info"),
                xhs_info=result.get("xhs_info"),
                integrated_content=result.get("integrated_content"),
                search_config=result.get("search_config"),
                error_message=result.get("error_message")
            )
            logger.error(f"内容整合失败：{result['error_message']}")
        
        return response
        
    except Exception as e:
        logger.error(f"内容整合接口异常：{e}", exc_info=True)
        raise HTTPException(
            status_code=500,
            detail=f"内容整合处理失败：{str(e)}"
        )
    finally:
        # 清理临时文件
        for temp_file in temp_files:
            try:
                if os.path.exists(temp_file):
                    os.unlink(temp_file)
            except Exception as e:
                logger.error(f"清理临时文件 {temp_file} 失败: {e}")


@router.get("/health")
async def health_check() -> Dict[str, str]:
    """
    健康检查接口
    
    Returns:
        服务状态信息
    """
    try:
        # 检查服务是否正常初始化
        if not integration_service:
            raise Exception("服务未正确初始化")
        
        return {
            "status": "healthy",
            "service": "content-integration",
            "message": "内容整合服务运行正常"
        }
    except Exception as e:
        logger.error(f"健康检查失败：{e}")
        raise HTTPException(
            status_code=503,
            detail=f"服务不可用：{str(e)}"
        )


@router.get("/config/options")
async def get_config_options() -> Dict[str, Any]:
    """
    获取配置选项说明
    
    Returns:
        各配置项的可选值和说明
    """
    return {
        "sort_type": {
            "0": "综合排序",
            "1": "最新",
            "2": "最多点赞", 
            "3": "最多评论",
            "4": "最多收藏"
        },
        "note_type": {
            "0": "不限",
            "1": "视频笔记",
            "2": "普通笔记"
        },
        "note_time": {
            "0": "不限",
            "1": "一天内",
            "2": "一周内",
            "3": "半年内"
        },
        "note_range": {
            "0": "不限",
            "1": "已看过",
            "2": "未看过",
            "3": "已关注"
        },
        "pos_distance": {
            "0": "不限",
            "1": "同城",
            "2": "附近"
        },
        "query_num": "每个关键词搜索的笔记数量（1-50）",
        "supported_document_formats": [
            "PDF (.pdf)",
            "Word (.docx, .doc)",
            "PowerPoint (.pptx, .ppt)",
            "Excel (.xlsx, .xls)",
            "Text (.txt)",
            "Markdown (.md)",
            "Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)",
            "CSV (.csv)"
        ]
    }