TravelContentCreator/api/routers/content_integration.py

201 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容整合API路由
"""
import logging
import tempfile
import os
import base64
from fastapi import APIRouter, HTTPException
from typing import Dict, Any
from api.models.content_integration import (
ContentIntegrationRequest,
ContentIntegrationResponse
)
from api.services.content_integration_service import ContentIntegrationService
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/content-integration", tags=["content-integration"])
# 全局服务实例
integration_service = ContentIntegrationService()
@router.post("/integrate", response_model=ContentIntegrationResponse)
async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegrationResponse:
"""
整合文档和小红书笔记内容
该接口将:
1. 处理用户上传的base64编码文档支持PDF、Word、图片等格式
2. 根据关键词搜索小红书相关笔记(可选)
3. 使用LLM将两者整合成综合性旅游资料
Args:
request: 整合请求参数
Returns:
整合结果
Raises:
HTTPException: 当请求参数无效或处理失败时
"""
temp_files = []
try:
# 创建临时文件处理base64文档
if request.documents:
for doc in request.documents:
try:
# 从base64内容中提取实际内容跳过data:image/jpeg;base64,这样的前缀)
content = doc.content
if ',' in content:
content = content.split(',', 1)[1]
# 创建临时文件
suffix = os.path.splitext(doc.filename)[1]
if not suffix:
# 根据MIME类型推断后缀
mime_to_ext = {
'text/plain': '.txt',
'application/pdf': '.pdf',
'application/msword': '.doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
'image/jpeg': '.jpg',
'image/png': '.png'
}
suffix = mime_to_ext.get(doc.mime_type, '.bin')
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
# 解码base64内容并写入临时文件
try:
decoded_content = base64.b64decode(content)
temp_file.write(decoded_content)
temp_files.append(temp_file.name)
logger.info(f"成功保存临时文件: {temp_file.name}")
except Exception as e:
logger.error(f"Base64解码失败: {e}")
raise HTTPException(
status_code=400,
detail=f"文档 {doc.filename} 的Base64内容无效: {str(e)}"
)
except Exception as e:
logger.error(f"处理文档 {doc.filename} 失败: {e}")
raise HTTPException(
status_code=400,
detail=f"文档 {doc.filename} 处理失败: {str(e)}"
)
logger.info(f"收到内容整合请求:文档 {len(temp_files) if temp_files else 0} 个,关键词 {len(request.keywords) if request.keywords else 0}")
# 调用服务层处理
result = await integration_service.integrate_content(
document_paths=temp_files,
keywords=request.keywords or [],
cookies=request.cookies or "",
sort_type=request.sort_type,
note_type=request.note_type,
note_time=request.note_time,
note_range=request.note_range,
pos_distance=request.pos_distance,
query_num=request.query_num
)
return result
except Exception as e:
logger.error(f"内容整合接口异常:{e}", exc_info=True)
raise HTTPException(
status_code=500,
detail=f"内容整合处理失败:{str(e)}"
)
finally:
# 清理临时文件
for temp_file in temp_files:
try:
if os.path.exists(temp_file):
os.unlink(temp_file)
except Exception as e:
logger.error(f"清理临时文件 {temp_file} 失败: {e}")
@router.get("/health")
async def health_check() -> Dict[str, str]:
"""
健康检查接口
Returns:
服务状态信息
"""
try:
# 检查服务是否正常初始化
if not integration_service:
raise Exception("服务未正确初始化")
return {
"status": "healthy",
"service": "content-integration",
"message": "内容整合服务运行正常"
}
except Exception as e:
logger.error(f"健康检查失败:{e}")
raise HTTPException(
status_code=503,
detail=f"服务不可用:{str(e)}"
)
@router.get("/config/options")
async def get_config_options() -> Dict[str, Any]:
"""
获取配置选项说明
Returns:
各配置项的可选值和说明
"""
return {
"sort_type": {
"0": "综合排序",
"1": "最新",
"2": "最多点赞",
"3": "最多评论",
"4": "最多收藏"
},
"note_type": {
"0": "不限",
"1": "视频笔记",
"2": "普通笔记"
},
"note_time": {
"0": "不限",
"1": "一天内",
"2": "一周内",
"3": "半年内"
},
"note_range": {
"0": "不限",
"1": "已看过",
"2": "未看过",
"3": "已关注"
},
"pos_distance": {
"0": "不限",
"1": "同城",
"2": "附近"
},
"query_num": "每个关键词搜索的笔记数量1-50",
"supported_document_formats": [
"PDF (.pdf)",
"Word (.docx, .doc)",
"PowerPoint (.pptx, .ppt)",
"Excel (.xlsx, .xls)",
"Text (.txt)",
"Markdown (.md)",
"Images (.jpg, .jpeg, .png, .gif, .bmp, .tiff)",
"CSV (.csv)"
]
}