#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 内容整合API路由 """ import logging import tempfile import os import base64 from fastapi import APIRouter, HTTPException from typing import Dict, Any from api.models.content_integration import ( ContentIntegrationRequest, ContentIntegrationResponse ) from api.services.content_integration_service import ContentIntegrationService logger = logging.getLogger(__name__) router = APIRouter(prefix="/content-integration", tags=["content-integration"]) # 全局服务实例 integration_service = ContentIntegrationService() @router.post("/integrate", response_model=ContentIntegrationResponse) async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse: """ 整合文档和小红书笔记内容 该接口将: 1. 处理用户上传的base64编码文档(支持PDF、Word、图片等格式) 2. 根据关键词搜索小红书相关笔记(可选) 3. 使用LLM将两者整合成综合性旅游资料 Args: request: 整合请求参数 Returns: 整合结果 Raises: HTTPException: 当请求参数无效或处理失败时 """ temp_files = [] try: # 创建临时文件处理base64文档 if request.documents: temp_files = [] for doc in request.documents: try: # 创建临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(doc.filename)[1]) as temp_file: # 解码base64内容并写入临时文件 content = base64.b64decode(doc.content) temp_file.write(content) temp_files.append(temp_file.name) except Exception as e: logger.error(f"处理文档 {doc.filename} 失败: {e}") raise HTTPException( status_code=400, detail=f"文档 {doc.filename} 处理失败: {str(e)}" ) logger.info(f"收到内容整合请求:文档 {len(temp_files) if temp_files else 0} 个,关键词 {len(request.keywords) if request.keywords else 0} 个") # 调用服务层处理 result = await integration_service.integrate_content( document_paths=temp_files, keywords=request.keywords, cookies=request.cookies, sort_type=request.sort_type, note_type=request.note_type, note_time=request.note_time, note_range=request.note_range, pos_distance=request.pos_distance, query_num=request.query_num ) # 转换为响应模型 if result["success"]: response = ContentIntegrationResponse( success=True, timestamp=result["timestamp"], processing_time=result["processing_time"], input_summary=result["input_summary"], document_info=result["document_info"], xhs_info=result["xhs_info"], integrated_content=result["integrated_content"], search_config=result["search_config"], error_message=None # 成功时无错误信息 ) logger.info(f"内容整合成功,处理时间:{result['processing_time']}") else: from datetime import datetime response = ContentIntegrationResponse( success=False, timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")), processing_time=result.get("processing_time", "0秒"), input_summary=result.get("input_summary"), document_info=result.get("document_info"), xhs_info=result.get("xhs_info"), integrated_content=result.get("integrated_content"), search_config=result.get("search_config"), error_message=result.get("error_message") ) logger.error(f"内容整合失败:{result['error_message']}") return response except Exception as e: logger.error(f"内容整合接口异常:{e}", exc_info=True) raise HTTPException( status_code=500, detail=f"内容整合处理失败:{str(e)}" ) finally: # 清理临时文件 for temp_file in temp_files: try: if os.path.exists(temp_file): os.unlink(temp_file) except Exception as e: logger.error(f"清理临时文件 {temp_file} 失败: {e}") @router.get("/health") async def health_check() -> Dict[str, str]: """ 健康检查接口 Returns: 服务状态信息 """ try: # 检查服务是否正常初始化 if not integration_service: raise Exception("服务未正确初始化") return { "status": "healthy", "service": "content-integration", "message": "内容整合服务运行正常" } except Exception as e: logger.error(f"健康检查失败:{e}") raise HTTPException( status_code=503, detail=f"服务不可用:{str(e)}" ) @router.get("/config/options") async def get_config_options() -> Dict[str, Any]: """ 获取配置选项说明 Returns: 各配置项的可选值和说明 """ return { "sort_type": { "0": "综合排序", "1": "最新", "2": "最多点赞", "3": "最多评论", "4": "最多收藏" }, "note_type": { "0": "不限", "1": "视频笔记", "2": "普通笔记" }, "note_time": { "0": "不限", "1": "一天内", "2": "一周内", "3": "半年内" }, "note_range": { "0": "不限", "1": "已看过", "2": "未看过", "3": "已关注" }, "pos_distance": { "0": "不限", "1": "同城", "2": "附近" }, "query_num": "每个关键词搜索的笔记数量(1-50)", "supported_document_formats": [ "PDF (.pdf)", "Word (.docx, .doc)", "PowerPoint (.pptx, .ppt)", "Excel (.xlsx, .xls)", "Text (.txt)", "Markdown (.md)", "Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)", "CSV (.csv)" ] }