#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 内容整合API模型 """ from typing import List, Optional, Dict, Any, Union from pydantic import BaseModel, Field, field_validator, model_validator from typing import Dict class Base64Document(BaseModel): """Base64编码的文档模型""" filename: str = Field(..., description="文件名") content: str = Field(..., description="Base64编码的文件内容") mime_type: str = Field(..., description="文件MIME类型") class ContentIntegrationRequest(BaseModel): """内容整合请求模型""" documents: Optional[List[Base64Document]] = Field(default=None, description="Base64编码的文档列表") keywords: Optional[List[str]] = Field(default=None, description="搜索关键词列表") cookies: Optional[str] = Field(default=None, description="小红书Cookie字符串") # 小红书搜索配置 sort_type: Optional[int] = Field(default=2, ge=0, le=4, description="排序方式: 0综合排序, 1最新, 2最多点赞, 3最多评论, 4最多收藏") note_type: Optional[int] = Field(default=2, ge=0, le=2, description="笔记类型: 0不限, 1视频笔记, 2普通笔记") note_time: Optional[int] = Field(default=0, ge=0, le=3, description="笔记时间: 0不限, 1一天内, 2一周内, 3半年内") note_range: Optional[int] = Field(default=0, ge=0, le=3, description="笔记范围: 0不限, 1已看过, 2未看过, 3已关注") pos_distance: Optional[int] = Field(default=0, ge=0, le=2, description="位置距离: 0不限, 1同城, 2附近") query_num: Optional[int] = Field(default=10, ge=1, le=50, description="每个关键词搜索的笔记数量") # 文档校验 @field_validator('documents') def validate_documents(cls, v): if v is not None and not v: raise ValueError("如果提供文档,列表不能为空") return v # 关键词校验 @field_validator('keywords') def validate_keywords(cls, v): if v is not None: if not v: raise ValueError("如果提供关键词,列表不能为空") cleaned = list({k.strip() for k in v if k.strip()}) if not cleaned: raise ValueError("关键词列表不能全为空") return cleaned return v # Cookie校验 @field_validator('cookies') def validate_cookies(cls, v): if v is not None: if not v.strip(): raise ValueError("如果提供Cookie,不能为空") return v.strip() return v # 跨字段校验 @model_validator(mode='after') def validate_request(cls, values): has_documents = values.documents is not None has_keywords = values.keywords is not None has_cookies = values.cookies is not None if not has_documents and not (has_keywords and has_cookies): raise ValueError("必须提供文档或(关键词和Cookie)中的至少一项") if has_keywords and not has_cookies: raise ValueError("提供关键词时必须提供Cookie") if has_cookies and not has_keywords: raise ValueError("提供Cookie时必须提供关键词") return values class Config: json_schema_extra = { "example": { "documents": [ { "filename": "travel_guide.pdf", "content": "base64_encoded_content_here", "mime_type": "application/pdf" } ], "keywords": ["北京旅游", "故宫攻略", "长城一日游"], "cookies": "a1=your_cookie_value; web_session=your_session_value", "sort_type": 2, "note_type": 2, "note_time": 0, "note_range": 0, "pos_distance": 0, "query_num": 10 } } class DocumentInfo(BaseModel): """文档信息模型""" file_path: str = Field(..., description="文件路径") file_type: str = Field(..., description="文件类型") content_length: int = Field(..., description="内容长度") class XHSInfo(BaseModel): """小红书信息模型""" total_notes: int = Field(..., description="笔记总数") authors: List[str] = Field(..., description="作者列表") total_interactions: int = Field(..., description="总互动数") class SearchConfig(BaseModel): """搜索配置模型""" sort_type: int = Field(..., description="排序方式") note_type: int = Field(..., description="笔记类型") note_time: int = Field(..., description="笔记时间") note_range: int = Field(..., description="笔记范围") pos_distance: int = Field(..., description="位置距离") query_num: int = Field(..., description="每个关键词搜索的笔记数量") class InputSummary(BaseModel): """输入摘要模型""" document_count: int = Field(..., description="文档数量") xhs_notes_count: int = Field(..., description="小红书笔记数量") keywords: List[str] = Field(..., description="关键词列表") class DocumentInfoDetail(BaseModel): """详细文档信息模型""" documents: List[DocumentInfo] = Field(..., description="文档列表") integrated_text_length: int = Field(..., description="整合文本长度") class ContentIntegrationResponse(BaseModel): """内容整合响应模型""" success: bool = Field(..., description="是否成功") timestamp: str = Field(..., description="时间戳") processing_time: str = Field(..., description="处理时间") # 成功时的字段 input_summary: Optional[InputSummary] = Field(None, description="输入摘要") document_info: Optional[DocumentInfoDetail] = Field(None, description="文档信息") xhs_info: Optional[XHSInfo] = Field(None, description="小红书信息") integrated_content: Optional[str] = Field(None, description="整合后的内容") search_config: Optional[SearchConfig] = Field(None, description="搜索配置") # 失败时的字段 error_message: Optional[str] = Field(None, description="错误信息") class Config: schema_extra = { "example": { "success": True, "timestamp": "20250715_143022", "processing_time": "45.32秒", "input_summary": { "document_count": 2, "xhs_notes_count": 25, "keywords": ["北京旅游", "故宫攻略"] }, "document_info": { "documents": [ { "file_path": "uploads/travel_guide.pdf", "file_type": "pdf", "content_length": 5420 } ], "integrated_text_length": 8650 }, "xhs_info": { "total_notes": 25, "authors": ["旅游达人小王", "北京导游张三"], "total_interactions": 15420 }, "integrated_content": "# 北京旅游全攻略...", "search_config": { "sort_type": 2, "note_type": 2, "note_time": 0, "note_range": 0, "pos_distance": 0, "query_num": 10 } } }