#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 内容整合API模型定义 """ from typing import List, Dict, Any, Optional, Union from datetime import datetime from pydantic import BaseModel, Field class XHSSearchRequest(BaseModel): """小红书搜索请求模型""" keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词") max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量") sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)") note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)") download_media: bool = Field(default=True, description="是否下载媒体文件") class Config: schema_extra = { "example": { "keyword": "上海迪士尼攻略", "max_notes": 20, "sort_type": 0, "note_type": 0, "download_media": True } } class IntegrationRequest(BaseModel): """内容整合请求模型""" keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词") document_paths: List[str] = Field(..., description="文档路径列表") max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量") sort_type: int = Field(default=0, ge=0, le=4, description="排序类型") note_type: int = Field(default=0, ge=0, le=2, description="笔记类型") download_media: bool = Field(default=True, description="是否下载媒体文件") output_format: str = Field(default='summary', description="输出格式") include_llm_processing: bool = Field(default=True, description="是否包含LLM处理") class Config: schema_extra = { "example": { "keyword": "上海迪士尼攻略", "document_paths": [ "/path/to/document1.pdf", "/path/to/document2.docx" ], "max_notes": 20, "sort_type": 0, "note_type": 0, "download_media": True, "output_format": "summary", "include_llm_processing": True } } class BatchIntegrationRequest(BaseModel): """批量整合请求模型""" tasks: List[IntegrationRequest] = Field(..., description="整合任务列表") class Config: schema_extra = { "example": { "tasks": [ { "keyword": "上海迪士尼攻略", "document_paths": ["/path/to/document1.pdf"], "max_notes": 20 }, { "keyword": "北京环球影城攻略", "document_paths": ["/path/to/document2.pdf"], "max_notes": 15 } ] } } class CookieManagementRequest(BaseModel): """Cookie管理请求模型""" name: str = Field(..., min_length=1, max_length=50, description="Cookie名称") cookie_string: str = Field(..., min_length=10, description="Cookie字符串") user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息") class Config: schema_extra = { "example": { "name": "user1", "cookie_string": "your_cookie_string_here", "user_info": { "username": "用户1", "description": "测试用户" } } } class ExportRequest(BaseModel): """导出请求模型""" task_id: str = Field(..., description="任务ID") output_format: str = Field(default='summary', description="输出格式") filename: Optional[str] = Field(default=None, description="文件名") class Config: schema_extra = { "example": { "task_id": "task_123456", "output_format": "summary", "filename": "result.txt" } } class XHSNoteResponse(BaseModel): """小红书笔记响应模型""" note_id: str = Field(..., description="笔记ID") title: str = Field(..., description="笔记标题") content: str = Field(..., description="笔记内容") author: str = Field(..., description="作者昵称") author_id: str = Field(..., description="作者ID") tags: List[str] = Field(default_factory=list, description="标签列表") images: List[str] = Field(default_factory=list, description="图片列表") videos: List[str] = Field(default_factory=list, description="视频列表") likes: int = Field(default=0, description="点赞数") comments: int = Field(default=0, description="评论数") shares: int = Field(default=0, description="分享数") created_time: str = Field(..., description="创建时间") note_url: str = Field(..., description="笔记URL") class XHSSearchResponse(BaseModel): """小红书搜索响应模型""" keyword: str = Field(..., description="搜索关键词") notes: List[XHSNoteResponse] = Field(default_factory=list, description="笔记列表") total_count: int = Field(..., description="总数量") success: bool = Field(..., description="是否成功") error_message: str = Field(default="", description="错误信息") processing_time: float = Field(default=0.0, description="处理时间") class DocumentContentResponse(BaseModel): """文档内容响应模型""" file_path: str = Field(..., description="文件路径") content: str = Field(..., description="文档内容") file_type: str = Field(..., description="文件类型") metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据") class IntegratedContentResponse(BaseModel): """整合内容响应模型""" documents: List[DocumentContentResponse] = Field(default_factory=list, description="文档列表") integrated_text: str = Field(..., description="整合后的文本") summary: str = Field(..., description="摘要") key_topics: List[str] = Field(default_factory=list, description="关键主题") total_length: int = Field(..., description="总长度") class IntegrationResponse(BaseModel): """整合响应模型""" task_id: str = Field(..., description="任务ID") keyword: str = Field(..., description="关键词") success: bool = Field(..., description="是否成功") xhs_content: Optional[XHSSearchResponse] = Field(default=None, description="小红书内容") document_content: Optional[IntegratedContentResponse] = Field(default=None, description="文档内容") integrated_summary: str = Field(default="", description="整合摘要") processing_time: float = Field(default=0.0, description="处理时间") created_time: datetime = Field(default_factory=datetime.now, description="创建时间") error_message: str = Field(default="", description="错误信息") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class CookieStatsResponse(BaseModel): """Cookie统计响应模型""" total_cookies: int = Field(..., description="总Cookie数量") valid_cookies: int = Field(..., description="有效Cookie数量") invalid_cookies: int = Field(..., description="无效Cookie数量") current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie") cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情") class ServiceStatusResponse(BaseModel): """服务状态响应模型""" status: str = Field(..., description="服务状态") startup_time: datetime = Field(..., description="启动时间") output_path: str = Field(..., description="输出路径") xhs_service_available: bool = Field(..., description="小红书服务是否可用") document_service_available: bool = Field(..., description="文档服务是否可用") integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class HealthCheckResponse(BaseModel): """健康检查响应模型""" status: str = Field(..., description="健康状态") timestamp: datetime = Field(default_factory=datetime.now, description="检查时间") services: Dict[str, bool] = Field(default_factory=dict, description="服务状态") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class TaskSummaryResponse(BaseModel): """任务摘要响应模型""" task_id: str = Field(..., description="任务ID") keyword: str = Field(..., description="关键词") success: bool = Field(..., description="是否成功") processing_time: float = Field(..., description="处理时间") created_time: datetime = Field(..., description="创建时间") notes_count: int = Field(default=0, description="笔记数量") documents_count: int = Field(default=0, description="文档数量") content_preview: str = Field(default="", description="内容预览") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class ValidationResponse(BaseModel): """验证响应模型""" validation_results: Dict[str, bool] = Field(..., description="验证结果") valid_count: int = Field(..., description="有效文档数量") invalid_count: int = Field(..., description="无效文档数量") invalid_documents: List[str] = Field(default_factory=list, description="无效文档列表") class ApiResponse(BaseModel): """通用API响应模型""" success: bool = Field(..., description="是否成功") message: str = Field(default="", description="响应消息") data: Optional[Any] = Field(default=None, description="响应数据") timestamp: datetime = Field(default_factory=datetime.now, description="时间戳") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class ErrorResponse(BaseModel): """错误响应模型""" success: bool = Field(default=False, description="是否成功") error_code: str = Field(..., description="错误码") error_message: str = Field(..., description="错误信息") timestamp: datetime = Field(default_factory=datetime.now, description="时间戳") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class CookieConfig(BaseModel): """Cookie配置模型""" cookie_string: str = Field(..., description="完整的Cookie字符串") name: Optional[str] = Field(None, description="Cookie名称/标识") user_info: Optional[Dict[str, Any]] = Field(None, description="用户信息") priority: int = Field(default=1, ge=1, le=10, description="优先级 (1-10, 数字越大优先级越高)") class Config: schema_extra = { "example": { "cookie_string": "a1=your_a1_value; web_session=your_session_value; webId=your_webid", "name": "user1", "user_info": {"username": "用户1", "description": "主要账号"}, "priority": 5 } } class BatchSearchRequest(BaseModel): """批量搜索请求模型""" # Cookie配置 - 必填 cookies: Union[ List[CookieConfig], # 方式1: 详细的Cookie配置列表 Dict[str, str], # 方式2: 简单的键值对 str # 方式3: 单一Cookie字符串 ] = Field(..., description="Cookie配置信息,支持多种格式") keywords: List[str] = Field(..., description="批量搜索关键词列表") max_notes_per_keyword: int = Field(default=20, ge=1, le=100, description="每个关键词的最大笔记数量") sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)") note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)") save_images: bool = Field(default=True, description="是否保存图片") image_storage_path: str = Field(default="data/images", description="图像存储路径") get_detailed_content: bool = Field(default=True, description="是否获取详细内容") output_path: str = Field(default="data/output", description="输出路径") cookie_rotation: bool = Field(default=True, description="是否启用Cookie轮换") max_retries: int = Field(default=3, ge=1, le=10, description="每个关键词的最大重试次数") class Config: schema_extra = { "example": { "cookies": [ { "cookie_string": "a1=main_user_a1; web_session=main_session; webId=main_webid", "name": "main_user", "priority": 5, "user_info": {"username": "主要用户"} }, { "cookie_string": "a1=backup_user_a1; web_session=backup_session; webId=backup_webid", "name": "backup_user", "priority": 3 } ], "keywords": ["北京旅游", "上海美食", "广州攻略"], "max_notes_per_keyword": 20, "cookie_rotation": True, "max_retries": 3, "save_images": True, "get_detailed_content": True } } class NoteInfo(BaseModel): """笔记信息模型""" note_id: str = Field(..., description="笔记ID") title: str = Field(..., description="笔记标题") author: str = Field(..., description="作者昵称") content: Optional[str] = Field(None, description="笔记内容") like_count: int = Field(default=0, description="点赞数") comment_count: int = Field(default=0, description="评论数") share_count: int = Field(default=0, description="分享数") collect_count: int = Field(default=0, description="收藏数") note_url: str = Field(..., description="笔记链接") images: List[str] = Field(default_factory=list, description="图片URL列表") saved_image_paths: List[str] = Field(default_factory=list, description="保存的图片本地路径") publish_time: Optional[str] = Field(None, description="发布时间") location: Optional[str] = Field(None, description="位置信息") tags: List[str] = Field(default_factory=list, description="标签列表") class KeywordSearchResult(BaseModel): """单个关键词搜索结果""" keyword: str = Field(..., description="搜索关键词") total_notes: int = Field(..., description="搜索到的笔记总数") notes: List[NoteInfo] = Field(..., description="笔记列表") search_time: str = Field(..., description="搜索时间") success: bool = Field(..., description="搜索是否成功") error_message: Optional[str] = Field(None, description="错误信息") class BatchSearchResponse(BaseModel): """批量搜索响应模型""" total_keywords: int = Field(..., description="总关键词数量") successful_searches: int = Field(..., description="成功搜索数量") failed_searches: int = Field(..., description="失败搜索数量") total_notes: int = Field(..., description="总笔记数量") results: List[KeywordSearchResult] = Field(..., description="搜索结果列表") image_storage_path: str = Field(..., description="图像存储路径") output_path: str = Field(..., description="输出路径") processing_time: float = Field(..., description="处理时间(秒)") summary: Dict[str, Any] = Field(..., description="结果摘要") class Config: schema_extra = { "example": { "total_keywords": 3, "successful_searches": 2, "failed_searches": 1, "total_notes": 35, "results": [ { "keyword": "北京旅游", "total_notes": 20, "notes": [ { "note_id": "64a1b2c3d4e5f6g7", "title": "北京三日游攻略", "author": "旅游达人", "content": "详细的北京旅游攻略...", "like_count": 150, "comment_count": 30, "note_url": "https://www.xiaohongshu.com/discovery/item/64a1b2c3d4e5f6g7", "images": ["https://sns-img-hw.xhscdn.com/example1.jpg"], "saved_image_paths": ["data/images/beijing_travel_1.jpg"] } ], "search_time": "2024-01-01 12:00:00", "success": True } ], "image_storage_path": "data/images", "output_path": "data/output", "processing_time": 45.5, "summary": { "top_authors": ["旅游达人", "美食博主"], "total_interactions": 5000, "saved_images": 25 } } }