#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 内容整合API路由 """ import logging from fastapi import APIRouter, HTTPException, BackgroundTasks from typing import Dict, Any from api.models.content_integration import ( ContentIntegrationRequest, ContentIntegrationResponse ) from api.services.content_integration_service import ContentIntegrationService logger = logging.getLogger(__name__) router = APIRouter(prefix="/content-integration", tags=["content-integration"]) # 全局服务实例 integration_service = ContentIntegrationService() @router.post("/integrate", response_model=ContentIntegrationResponse) async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse: """ 整合文档和小红书笔记内容 该接口将: 1. 读取用户上传的文档文件(支持PDF、Word、图片等格式) 2. 根据关键词搜索小红书相关笔记 3. 使用LLM将两者整合成综合性旅游资料 Args: request: 整合请求参数 Returns: 整合结果 Raises: HTTPException: 当请求参数无效或处理失败时 """ try: if request.document_paths is None: request.document_paths = [] logger.info(f"收到内容整合请求:文档 {len(request.document_paths)} 个,关键词 {len(request.keywords)} 个") # 调用服务层处理 result = await integration_service.integrate_content( document_paths=request.document_paths, keywords=request.keywords, cookies=request.cookies, output_path=request.output_path, sort_type=request.sort_type, note_type=request.note_type, note_time=request.note_time, note_range=request.note_range, pos_distance=request.pos_distance, query_num=request.query_num ) # 转换为响应模型 if result["success"]: response = ContentIntegrationResponse( success=True, timestamp=result["timestamp"], processing_time=result["processing_time"], input_summary=result["input_summary"], document_info=result["document_info"], xhs_info=result["xhs_info"], integrated_content=result["integrated_content"], search_config=result["search_config"], error_message=None # 成功时无错误信息 ) logger.info(f"内容整合成功,处理时间:{result['processing_time']}") else: from datetime import datetime response = ContentIntegrationResponse( success=False, timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")), processing_time=result.get("processing_time", "0秒"), input_summary=result.get("input_summary"), document_info=result.get("document_info"), xhs_info=result.get("xhs_info"), integrated_content=result.get("integrated_content"), search_config=result.get("search_config"), error_message=result.get("error_message") ) logger.error(f"内容整合失败:{result['error_message']}") return response except Exception as e: logger.error(f"内容整合接口异常:{e}", exc_info=True) raise HTTPException( status_code=500, detail=f"内容整合处理失败:{str(e)}" ) @router.get("/health") async def health_check() -> Dict[str, str]: """ 健康检查接口 Returns: 服务状态信息 """ try: # 检查服务是否正常初始化 if not integration_service: raise Exception("服务未正确初始化") return { "status": "healthy", "service": "content-integration", "message": "内容整合服务运行正常" } except Exception as e: logger.error(f"健康检查失败:{e}") raise HTTPException( status_code=503, detail=f"服务不可用:{str(e)}" ) @router.get("/config/options") async def get_config_options() -> Dict[str, Any]: """ 获取配置选项说明 Returns: 各配置项的可选值和说明 """ return { "sort_type": { "0": "综合排序", "1": "最新", "2": "最多点赞", "3": "最多评论", "4": "最多收藏" }, "note_type": { "0": "不限", "1": "视频笔记", "2": "普通笔记" }, "note_time": { "0": "不限", "1": "一天内", "2": "一周内", "3": "半年内" }, "note_range": { "0": "不限", "1": "已看过", "2": "未看过", "3": "已关注" }, "pos_distance": { "0": "不限", "1": "同城", "2": "附近" }, "query_num": "每个关键词搜索的笔记数量(1-50)", "supported_document_formats": [ "PDF (.pdf)", "Word (.docx, .doc)", "PowerPoint (.pptx, .ppt)", "Excel (.xlsx, .xls)", "Text (.txt)", "Markdown (.md)", "Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)", "CSV (.csv)" ] }