#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Core Models 统一的 Pydantic 数据模型定义 """ from typing import Dict, List, Optional, Any, Union from datetime import datetime from pathlib import Path from pydantic import BaseModel, Field, validator, root_validator import logging logger = logging.getLogger(__name__) # ============================================================================ # Cookie 管理相关模型 # ============================================================================ class CookieInfo(BaseModel): """Cookie信息""" name: str = Field(..., description="Cookie名称") cookie_string: str = Field(..., description="Cookie字符串") last_used: datetime = Field(default_factory=datetime.now, description="最后使用时间") use_count: int = Field(default=0, description="使用次数") is_valid: bool = Field(default=True, description="是否有效") failure_count: int = Field(default=0, description="失败次数") user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息") @validator('last_used', pre=True) def parse_last_used(cls, v): if isinstance(v, str): return datetime.fromisoformat(v) return v class Config: json_encoders = { datetime: lambda v: v.isoformat() } class CookieStats(BaseModel): """Cookie统计信息""" total_cookies: int = Field(..., description="总Cookie数量") valid_cookies: int = Field(..., description="有效Cookie数量") invalid_cookies: int = Field(..., description="无效Cookie数量") current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie") cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情") # ============================================================================ # 小红书相关模型 # ============================================================================ class XHSNote(BaseModel): """小红书笔记数据模型""" note_id: str = Field(..., description="笔记ID") title: str = Field(..., description="笔记标题") content: str = Field(..., description="笔记内容") author: str = Field(..., description="作者昵称") author_id: str = Field(..., description="作者ID") tags: List[str] = Field(default_factory=list, description="标签列表") images: List[str] = Field(default_factory=list, description="图片列表") videos: List[str] = Field(default_factory=list, description="视频列表") likes: int = Field(default=0, description="点赞数") comments: int = Field(default=0, description="评论数") shares: int = Field(default=0, description="分享数") created_time: str = Field(..., description="创建时间") note_url: str = Field(..., description="笔记URL") @validator('likes', 'comments', 'shares', pre=True) def validate_counts(cls, v): return max(0, int(v)) if v is not None else 0 class XHSSearchResult(BaseModel): """小红书搜索结果""" keyword: str = Field(..., description="搜索关键词") notes: List[XHSNote] = Field(default_factory=list, description="笔记列表") total_count: int = Field(..., description="总数量") success: bool = Field(..., description="是否成功") error_message: str = Field(default="", description="错误信息") @validator('total_count') def validate_total_count(cls, v, values): if 'notes' in values: return max(v, len(values['notes'])) return v class SearchConfig(BaseModel): """搜索配置""" keyword: str = Field(..., description="搜索关键词") max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量") sort_type: int = Field(default=0, ge=0, le=4, description="排序类型") note_type: int = Field(default=0, ge=0, le=2, description="笔记类型") download_images: bool = Field(default=True, description="是否下载图片") download_videos: bool = Field(default=True, description="是否下载视频") @validator('sort_type') def validate_sort_type(cls, v): # 0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏 if v not in [0, 1, 2, 3, 4]: raise ValueError('sort_type must be 0-4') return v @validator('note_type') def validate_note_type(cls, v): # 0:不限, 1:视频笔记, 2:普通笔记 if v not in [0, 1, 2]: raise ValueError('note_type must be 0-2') return v # ============================================================================ # 文档处理相关模型 # ============================================================================ class DocumentContent(BaseModel): """文档内容数据模型""" file_path: str = Field(..., description="文件路径") content: str = Field(..., description="文档内容") file_type: str = Field(..., description="文件类型") metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据") @validator('file_type', pre=True) def normalize_file_type(cls, v): return v.lower() if v else "" @validator('metadata', pre=True) def ensure_metadata(cls, v): if v is None: return {} return v class IntegratedContent(BaseModel): """整合后的内容""" documents: List[DocumentContent] = Field(default_factory=list, description="文档列表") integrated_text: str = Field(..., description="整合后的文本") summary: str = Field(..., description="摘要") key_topics: List[str] = Field(default_factory=list, description="关键主题") total_length: int = Field(..., description="总长度") @validator('total_length') def validate_total_length(cls, v, values): if 'documents' in values: calculated_length = sum(len(doc.content) for doc in values['documents']) return max(v, calculated_length) return v class ProcessingTask(BaseModel): """处理任务""" task_id: str = Field(..., description="任务ID") keyword: str = Field(..., description="关键词") document_paths: List[str] = Field(default_factory=list, description="文档路径列表") search_config: SearchConfig = Field(..., description="搜索配置") status: str = Field(default="pending", description="任务状态") created_time: datetime = Field(default_factory=datetime.now, description="创建时间") started_time: Optional[datetime] = Field(default=None, description="开始时间") completed_time: Optional[datetime] = Field(default=None, description="完成时间") error_message: str = Field(default="", description="错误信息") @validator('status') def validate_status(cls, v): valid_statuses = ['pending', 'processing', 'completed', 'failed'] if v not in valid_statuses: raise ValueError(f'status must be one of {valid_statuses}') return v class Config: json_encoders = { datetime: lambda v: v.isoformat() } class ProcessedContent(BaseModel): """处理后的内容""" task_id: str = Field(..., description="任务ID") xhs_content: XHSSearchResult = Field(..., description="小红书内容") document_content: IntegratedContent = Field(..., description="文档内容") integrated_summary: str = Field(..., description="整合摘要") statistics: Dict[str, Any] = Field(default_factory=dict, description="统计信息") processing_time: float = Field(default=0.0, description="处理时间") def get_summary(self) -> str: """获取内容摘要""" return f"处理了 {len(self.xhs_content.notes)} 条笔记和 {len(self.document_content.documents)} 个文档" # ============================================================================ # 整合服务相关模型 # ============================================================================ class IntegrationConfig(BaseModel): """整合配置""" keyword: str = Field(..., description="关键词") document_paths: List[str] = Field(default_factory=list, description="文档路径列表") max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量") sort_type: int = Field(default=0, ge=0, le=4, description="排序类型") note_type: int = Field(default=0, ge=0, le=2, description="笔记类型") download_media: bool = Field(default=True, description="是否下载媒体") output_format: str = Field(default='summary', description="输出格式") include_llm_processing: bool = Field(default=True, description="是否包含LLM处理") @validator('output_format') def validate_output_format(cls, v): valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard'] if v not in valid_formats: raise ValueError(f'output_format must be one of {valid_formats}') return v class IntegrationResult(BaseModel): """整合结果""" task_id: str = Field(..., description="任务ID") config: IntegrationConfig = Field(..., description="整合配置") success: bool = Field(..., description="是否成功") processed_content: Optional[ProcessedContent] = Field(default=None, description="处理后的内容") error_message: str = Field(default="", description="错误信息") processing_time: float = Field(default=0.0, description="处理时间") created_time: datetime = Field(default_factory=datetime.now, description="创建时间") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class ProcessingStats(BaseModel): """处理统计信息""" total_tasks: int = Field(default=0, description="总任务数") successful_tasks: int = Field(default=0, description="成功任务数") failed_tasks: int = Field(default=0, description="失败任务数") total_processing_time: float = Field(default=0.0, description="总处理时间") average_processing_time: float = Field(default=0.0, description="平均处理时间") total_notes_processed: int = Field(default=0, description="总处理笔记数") total_documents_processed: int = Field(default=0, description="总处理文档数") total_media_downloaded: int = Field(default=0, description="总下载媒体数") start_time: datetime = Field(default_factory=datetime.now, description="开始时间") last_updated: datetime = Field(default_factory=datetime.now, description="最后更新时间") class Config: json_encoders = { datetime: lambda v: v.isoformat() } # ============================================================================ # API 请求/响应模型 # ============================================================================ class SearchRequest(BaseModel): """搜索请求""" keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词") max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量") sort_type: int = Field(default=0, ge=0, le=4, description="排序类型") note_type: int = Field(default=0, ge=0, le=2, description="笔记类型") download_media: bool = Field(default=True, description="是否下载媒体") class IntegrationRequest(BaseModel): """整合请求""" keyword: str = Field(..., description="关键词", min_length=1, max_length=100) document_paths: List[str] = Field(default_factory=list, description="文档路径列表") max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量") sort_type: int = Field(default=0, ge=0, le=4, description="排序类型") note_type: int = Field(default=0, ge=0, le=2, description="笔记类型") download_media: bool = Field(default=True, description="是否下载媒体") output_format: str = Field(default='summary', description="输出格式") include_llm_processing: bool = Field(default=True, description="是否包含LLM处理") @validator('output_format') def validate_output_format(cls, v): valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard'] if v not in valid_formats: raise ValueError(f'output_format must be one of {valid_formats}') return v class BatchIntegrationRequest(BaseModel): """批量整合请求""" tasks: List[IntegrationRequest] = Field(default_factory=list, description="任务列表") @validator('tasks') def validate_tasks(cls, v): if not v: raise ValueError('tasks cannot be empty') if len(v) > 10: raise ValueError('maximum 10 tasks allowed') return v class CookieRequest(BaseModel): """Cookie请求""" name: str = Field(..., min_length=1, max_length=50, description="Cookie名称") cookie_string: str = Field(..., min_length=10, description="Cookie字符串") user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息") class ExportRequest(BaseModel): """导出请求""" task_id: str = Field(..., description="任务ID") output_format: str = Field(default='summary', description="输出格式") filename: Optional[str] = Field(default=None, description="文件名") class ApiResponse(BaseModel): """API响应基类""" success: bool = Field(..., description="是否成功") message: str = Field(default="", description="响应消息") data: Optional[Any] = Field(default=None, description="响应数据") timestamp: datetime = Field(default_factory=datetime.now, description="时间戳") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class ErrorResponse(BaseModel): """错误响应""" success: bool = Field(default=False, description="是否成功") error_code: str = Field(..., description="错误码") error_message: str = Field(..., description="错误信息") timestamp: datetime = Field(default_factory=datetime.now, description="时间戳") class Config: json_encoders = { datetime: lambda v: v.isoformat() } # ============================================================================ # 服务状态模型 # ============================================================================ class ServiceStatus(BaseModel): """服务状态""" status: str = Field(..., description="服务状态") startup_time: datetime = Field(..., description="启动时间") output_path: str = Field(..., description="输出路径") xhs_service: Dict[str, Any] = Field(default_factory=dict, description="小红书服务状态") integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class HealthCheck(BaseModel): """健康检查""" status: str = Field(..., description="健康状态") timestamp: datetime = Field(default_factory=datetime.now, description="检查时间") services: Dict[str, bool] = Field(default_factory=dict, description="服务状态") class Config: json_encoders = { datetime: lambda v: v.isoformat() } # ============================================================================ # 景区和产品相关模型 # ============================================================================ class ScenicSpot(BaseModel): """景区数据模型""" id: Optional[int] = Field(default=None, description="主键id") userId: int = Field(..., description="用户ID") name: str = Field(..., description="景区名称") address: Optional[str] = Field(default=None, description="地址") trafficInfo: Optional[str] = Field(default=None, description="交通指南") description: Optional[str] = Field(default=None, description="描述") advantage: Optional[str] = Field(default=None, description="景区优势") highlight: Optional[str] = Field(default=None, description="景区亮点") isPublic: int = Field(default=0, description="是否公开(0私有1公开)") isDelete: int = Field(default=0, description="是否删除") createTime: datetime = Field(default_factory=datetime.now, description="创建时间") updateTime: datetime = Field(default_factory=datetime.now, description="更新时间") class Config: json_encoders = { datetime: lambda v: v.isoformat() } class Product(BaseModel): """产品数据模型""" id: Optional[int] = Field(default=None, description="主键id") userId: int = Field(..., description="用户ID") productName: str = Field(..., description="产品名称") originPrice: Optional[float] = Field(default=None, description="原价") realPrice: Optional[float] = Field(default=None, description="实际价格") packageInfo: Optional[str] = Field(default=None, description="套票详情") salesPeriod: Optional[str] = Field(default=None, description="售卖期") stock: Optional[int] = Field(default=None, description="库存") keyAdvantages: Optional[str] = Field(default=None, description="产品优势") highlights: Optional[str] = Field(default=None, description="产品亮点") detailedDescription: Optional[str] = Field(default=None, description="产品描述") usageRules: Optional[str] = Field(default=None, description="使用规则详细说明") surcharge: Optional[str] = Field(default=None, description="加价说明") reservation: Optional[str] = Field(default=None, description="预约规则") refund: Optional[str] = Field(default=None, description="退改政策") discounts: Optional[str] = Field(default=None, description="优惠内容") isPublic: int = Field(default=0, description="是否公开(0私有1公开)") isDelete: int = Field(default=0, description="是否删除") createTime: datetime = Field(default_factory=datetime.now, description="创建时间") updateTime: datetime = Field(default_factory=datetime.now, description="更新时间") class Config: json_encoders = { datetime: lambda v: v.isoformat() }