#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 内容整合API路由 """ import logging import tempfile import os import base64 from fastapi import APIRouter, HTTPException from typing import Dict, Any from api.models.content_integration import ( ContentIntegrationRequest, ContentIntegrationResponse ) from api.services.content_integration_service import ContentIntegrationService logger = logging.getLogger(__name__) router = APIRouter(prefix="/content-integration", tags=["content-integration"]) # 全局服务实例 integration_service = ContentIntegrationService() @router.post("/integrate", response_model=ContentIntegrationResponse) async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse: """ 整合文档和小红书笔记内容 该接口将: 1. 处理用户上传的base64编码文档(支持PDF、Word、图片等格式) 2. 根据关键词搜索小红书相关笔记(可选) 3. 使用LLM将两者整合成综合性旅游资料 Args: request: 整合请求参数 Returns: 整合结果 Raises: HTTPException: 当请求参数无效或处理失败时 """ temp_files = [] try: # 创建临时文件处理base64文档 if request.documents: for doc in request.documents: try: # 从base64内容中提取实际内容(跳过data:image/jpeg;base64,这样的前缀) content = doc.content if ',' in content: content = content.split(',', 1)[1] # 创建临时文件 suffix = os.path.splitext(doc.filename)[1] if not suffix: # 根据MIME类型推断后缀 mime_to_ext = { 'text/plain': '.txt', 'application/pdf': '.pdf', 'application/msword': '.doc', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx', 'image/jpeg': '.jpg', 'image/png': '.png' } suffix = mime_to_ext.get(doc.mime_type, '.bin') with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: # 解码base64内容并写入临时文件 try: decoded_content = base64.b64decode(content) temp_file.write(decoded_content) temp_files.append(temp_file.name) logger.info(f"成功保存临时文件: {temp_file.name}") except Exception as e: logger.error(f"Base64解码失败: {e}") raise HTTPException( status_code=400, detail=f"文档 {doc.filename} 的Base64内容无效: {str(e)}" ) except Exception as e: logger.error(f"处理文档 {doc.filename} 失败: {e}") raise HTTPException( status_code=400, detail=f"文档 {doc.filename} 处理失败: {str(e)}" ) logger.info(f"收到内容整合请求:文档 {len(temp_files) if temp_files else 0} 个,关键词 {len(request.keywords) if request.keywords else 0} 个") # 调用服务层处理 result = await integration_service.integrate_content( document_paths=temp_files, keywords=request.keywords or [], cookies=request.cookies or "", sort_type=request.sort_type, note_type=request.note_type, note_time=request.note_time, note_range=request.note_range, pos_distance=request.pos_distance, query_num=request.query_num ) return result except Exception as e: logger.error(f"内容整合接口异常:{e}", exc_info=True) raise HTTPException( status_code=500, detail=f"内容整合处理失败:{str(e)}" ) finally: # 清理临时文件 for temp_file in temp_files: try: if os.path.exists(temp_file): os.unlink(temp_file) except Exception as e: logger.error(f"清理临时文件 {temp_file} 失败: {e}") @router.get("/health") async def health_check() -> Dict[str, str]: """ 健康检查接口 Returns: 服务状态信息 """ try: # 检查服务是否正常初始化 if not integration_service: raise Exception("服务未正确初始化") return { "status": "healthy", "service": "content-integration", "message": "内容整合服务运行正常" } except Exception as e: logger.error(f"健康检查失败:{e}") raise HTTPException( status_code=503, detail=f"服务不可用:{str(e)}" ) @router.get("/config/options") async def get_config_options() -> Dict[str, Any]: """ 获取配置选项说明 Returns: 各配置项的可选值和说明 """ return { "sort_type": { "0": "综合排序", "1": "最新", "2": "最多点赞", "3": "最多评论", "4": "最多收藏" }, "note_type": { "0": "不限", "1": "视频笔记", "2": "普通笔记" }, "note_time": { "0": "不限", "1": "一天内", "2": "一周内", "3": "半年内" }, "note_range": { "0": "不限", "1": "已看过", "2": "未看过", "3": "已关注" }, "pos_distance": { "0": "不限", "1": "同城", "2": "附近" }, "query_num": "每个关键词搜索的笔记数量(1-50)", "supported_document_formats": [ "PDF (.pdf)", "Word (.docx, .doc)", "PowerPoint (.pptx, .ppt)", "Excel (.xlsx, .xls)", "Text (.txt)", "Markdown (.md)", "Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)", "CSV (.csv)" ] }