TravelContentCreator/api/routers/content_integration.py

178 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容整合API路由
"""
import logging
from fastapi import APIRouter, HTTPException, BackgroundTasks
from typing import Dict, Any
from api.models.content_integration import (
ContentIntegrationRequest,
ContentIntegrationResponse
)
from api.services.content_integration_service import ContentIntegrationService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/content-integration", tags=["content-integration"])
# 全局服务实例
integration_service = ContentIntegrationService()
@router.post("/integrate", response_model=ContentIntegrationResponse)
async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse:
"""
整合文档和小红书笔记内容
该接口将:
1. 读取用户上传的文档文件支持PDF、Word、图片等格式
2. 根据关键词搜索小红书相关笔记
3. 使用LLM将两者整合成综合性旅游资料
Args:
request: 整合请求参数
Returns:
整合结果
Raises:
HTTPException: 当请求参数无效或处理失败时
"""
try:
if request.document_paths is None:
request.document_paths = []
logger.info(f"收到内容整合请求:文档 {len(request.document_paths)} 个,关键词 {len(request.keywords)}")
# 调用服务层处理
result = await integration_service.integrate_content(
document_paths=request.document_paths,
keywords=request.keywords,
cookies=request.cookies,
output_path=request.output_path,
sort_type=request.sort_type,
note_type=request.note_type,
note_time=request.note_time,
note_range=request.note_range,
pos_distance=request.pos_distance,
query_num=request.query_num
)
# 转换为响应模型
if result["success"]:
response = ContentIntegrationResponse(
success=True,
timestamp=result["timestamp"],
processing_time=result["processing_time"],
input_summary=result["input_summary"],
document_info=result["document_info"],
xhs_info=result["xhs_info"],
integrated_content=result["integrated_content"],
search_config=result["search_config"],
error_message=None # 成功时无错误信息
)
logger.info(f"内容整合成功,处理时间:{result['processing_time']}")
else:
from datetime import datetime
response = ContentIntegrationResponse(
success=False,
timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")),
processing_time=result.get("processing_time", "0秒"),
input_summary=result.get("input_summary"),
document_info=result.get("document_info"),
xhs_info=result.get("xhs_info"),
integrated_content=result.get("integrated_content"),
search_config=result.get("search_config"),
error_message=result.get("error_message")
)
logger.error(f"内容整合失败:{result['error_message']}")
return response
except Exception as e:
logger.error(f"内容整合接口异常:{e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"内容整合处理失败:{str(e)}"
)
@router.get("/health")
async def health_check() -> Dict[str, str]:
"""
健康检查接口
Returns:
服务状态信息
"""
try:
# 检查服务是否正常初始化
if not integration_service:
raise Exception("服务未正确初始化")
return {
"status": "healthy",
"service": "content-integration",
"message": "内容整合服务运行正常"
}
except Exception as e:
logger.error(f"健康检查失败:{e}")
raise HTTPException(
status_code=503,
detail=f"服务不可用:{str(e)}"
)
@router.get("/config/options")
async def get_config_options() -> Dict[str, Any]:
"""
获取配置选项说明
Returns:
各配置项的可选值和说明
"""
return {
"sort_type": {
"0": "综合排序",
"1": "最新",
"2": "最多点赞",
"3": "最多评论",
"4": "最多收藏"
},
"note_type": {
"0": "不限",
"1": "视频笔记",
"2": "普通笔记"
},
"note_time": {
"0": "不限",
"1": "一天内",
"2": "一周内",
"3": "半年内"
},
"note_range": {
"0": "不限",
"1": "已看过",
"2": "未看过",
"3": "已关注"
},
"pos_distance": {
"0": "不限",
"1": "同城",
"2": "附近"
},
"query_num": "每个关键词搜索的笔记数量1-50",
"supported_document_formats": [
"PDF (.pdf)",
"Word (.docx, .doc)",
"PowerPoint (.pptx, .ppt)",
"Excel (.xlsx, .xls)",
"Text (.txt)",
"Markdown (.md)",
"Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)",
"CSV (.csv)"
]
}