395 lines
18 KiB
Python
Raw Normal View History

2025-07-31 15:35:23 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Core Models
统一的 Pydantic 数据模型定义
"""
from typing import Dict, List, Optional, Any, Union
from datetime import datetime
from pathlib import Path
from pydantic import BaseModel, Field, validator, root_validator
import logging
logger = logging.getLogger(__name__)
# ============================================================================
# Cookie 管理相关模型
# ============================================================================
class CookieInfo(BaseModel):
"""Cookie信息"""
name: str = Field(..., description="Cookie名称")
cookie_string: str = Field(..., description="Cookie字符串")
last_used: datetime = Field(default_factory=datetime.now, description="最后使用时间")
use_count: int = Field(default=0, description="使用次数")
is_valid: bool = Field(default=True, description="是否有效")
failure_count: int = Field(default=0, description="失败次数")
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
@validator('last_used', pre=True)
def parse_last_used(cls, v):
if isinstance(v, str):
return datetime.fromisoformat(v)
return v
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class CookieStats(BaseModel):
"""Cookie统计信息"""
total_cookies: int = Field(..., description="总Cookie数量")
valid_cookies: int = Field(..., description="有效Cookie数量")
invalid_cookies: int = Field(..., description="无效Cookie数量")
current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie")
cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情")
# ============================================================================
# 小红书相关模型
# ============================================================================
class XHSNote(BaseModel):
"""小红书笔记数据模型"""
note_id: str = Field(..., description="笔记ID")
title: str = Field(..., description="笔记标题")
content: str = Field(..., description="笔记内容")
author: str = Field(..., description="作者昵称")
author_id: str = Field(..., description="作者ID")
tags: List[str] = Field(default_factory=list, description="标签列表")
images: List[str] = Field(default_factory=list, description="图片列表")
videos: List[str] = Field(default_factory=list, description="视频列表")
likes: int = Field(default=0, description="点赞数")
comments: int = Field(default=0, description="评论数")
shares: int = Field(default=0, description="分享数")
created_time: str = Field(..., description="创建时间")
note_url: str = Field(..., description="笔记URL")
@validator('likes', 'comments', 'shares', pre=True)
def validate_counts(cls, v):
return max(0, int(v)) if v is not None else 0
class XHSSearchResult(BaseModel):
"""小红书搜索结果"""
keyword: str = Field(..., description="搜索关键词")
notes: List[XHSNote] = Field(default_factory=list, description="笔记列表")
total_count: int = Field(..., description="总数量")
success: bool = Field(..., description="是否成功")
error_message: str = Field(default="", description="错误信息")
@validator('total_count')
def validate_total_count(cls, v, values):
if 'notes' in values:
return max(v, len(values['notes']))
return v
class SearchConfig(BaseModel):
"""搜索配置"""
keyword: str = Field(..., description="搜索关键词")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_images: bool = Field(default=True, description="是否下载图片")
download_videos: bool = Field(default=True, description="是否下载视频")
@validator('sort_type')
def validate_sort_type(cls, v):
# 0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏
if v not in [0, 1, 2, 3, 4]:
raise ValueError('sort_type must be 0-4')
return v
@validator('note_type')
def validate_note_type(cls, v):
# 0:不限, 1:视频笔记, 2:普通笔记
if v not in [0, 1, 2]:
raise ValueError('note_type must be 0-2')
return v
# ============================================================================
# 文档处理相关模型
# ============================================================================
class DocumentContent(BaseModel):
"""文档内容数据模型"""
file_path: str = Field(..., description="文件路径")
content: str = Field(..., description="文档内容")
file_type: str = Field(..., description="文件类型")
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
@validator('file_type', pre=True)
def normalize_file_type(cls, v):
return v.lower() if v else ""
@validator('metadata', pre=True)
def ensure_metadata(cls, v):
if v is None:
return {}
return v
class IntegratedContent(BaseModel):
"""整合后的内容"""
documents: List[DocumentContent] = Field(default_factory=list, description="文档列表")
integrated_text: str = Field(..., description="整合后的文本")
summary: str = Field(..., description="摘要")
key_topics: List[str] = Field(default_factory=list, description="关键主题")
total_length: int = Field(..., description="总长度")
@validator('total_length')
def validate_total_length(cls, v, values):
if 'documents' in values:
calculated_length = sum(len(doc.content) for doc in values['documents'])
return max(v, calculated_length)
return v
class ProcessingTask(BaseModel):
"""处理任务"""
task_id: str = Field(..., description="任务ID")
keyword: str = Field(..., description="关键词")
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
search_config: SearchConfig = Field(..., description="搜索配置")
status: str = Field(default="pending", description="任务状态")
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
started_time: Optional[datetime] = Field(default=None, description="开始时间")
completed_time: Optional[datetime] = Field(default=None, description="完成时间")
error_message: str = Field(default="", description="错误信息")
@validator('status')
def validate_status(cls, v):
valid_statuses = ['pending', 'processing', 'completed', 'failed']
if v not in valid_statuses:
raise ValueError(f'status must be one of {valid_statuses}')
return v
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ProcessedContent(BaseModel):
"""处理后的内容"""
task_id: str = Field(..., description="任务ID")
xhs_content: XHSSearchResult = Field(..., description="小红书内容")
document_content: IntegratedContent = Field(..., description="文档内容")
integrated_summary: str = Field(..., description="整合摘要")
statistics: Dict[str, Any] = Field(default_factory=dict, description="统计信息")
processing_time: float = Field(default=0.0, description="处理时间")
def get_summary(self) -> str:
"""获取内容摘要"""
return f"处理了 {len(self.xhs_content.notes)} 条笔记和 {len(self.document_content.documents)} 个文档"
# ============================================================================
# 整合服务相关模型
# ============================================================================
class IntegrationConfig(BaseModel):
"""整合配置"""
keyword: str = Field(..., description="关键词")
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体")
output_format: str = Field(default='summary', description="输出格式")
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
@validator('output_format')
def validate_output_format(cls, v):
valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard']
if v not in valid_formats:
raise ValueError(f'output_format must be one of {valid_formats}')
return v
class IntegrationResult(BaseModel):
"""整合结果"""
task_id: str = Field(..., description="任务ID")
config: IntegrationConfig = Field(..., description="整合配置")
success: bool = Field(..., description="是否成功")
processed_content: Optional[ProcessedContent] = Field(default=None, description="处理后的内容")
error_message: str = Field(default="", description="错误信息")
processing_time: float = Field(default=0.0, description="处理时间")
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ProcessingStats(BaseModel):
"""处理统计信息"""
total_tasks: int = Field(default=0, description="总任务数")
successful_tasks: int = Field(default=0, description="成功任务数")
failed_tasks: int = Field(default=0, description="失败任务数")
total_processing_time: float = Field(default=0.0, description="总处理时间")
average_processing_time: float = Field(default=0.0, description="平均处理时间")
total_notes_processed: int = Field(default=0, description="总处理笔记数")
total_documents_processed: int = Field(default=0, description="总处理文档数")
total_media_downloaded: int = Field(default=0, description="总下载媒体数")
start_time: datetime = Field(default_factory=datetime.now, description="开始时间")
last_updated: datetime = Field(default_factory=datetime.now, description="最后更新时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
# ============================================================================
# API 请求/响应模型
# ============================================================================
class SearchRequest(BaseModel):
"""搜索请求"""
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体")
class IntegrationRequest(BaseModel):
"""整合请求"""
keyword: str = Field(..., description="关键词", min_length=1, max_length=100)
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体")
output_format: str = Field(default='summary', description="输出格式")
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
@validator('output_format')
def validate_output_format(cls, v):
valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard']
if v not in valid_formats:
raise ValueError(f'output_format must be one of {valid_formats}')
return v
class BatchIntegrationRequest(BaseModel):
"""批量整合请求"""
tasks: List[IntegrationRequest] = Field(default_factory=list, description="任务列表")
@validator('tasks')
def validate_tasks(cls, v):
if not v:
raise ValueError('tasks cannot be empty')
if len(v) > 10:
raise ValueError('maximum 10 tasks allowed')
return v
class CookieRequest(BaseModel):
"""Cookie请求"""
name: str = Field(..., min_length=1, max_length=50, description="Cookie名称")
cookie_string: str = Field(..., min_length=10, description="Cookie字符串")
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
class ExportRequest(BaseModel):
"""导出请求"""
task_id: str = Field(..., description="任务ID")
output_format: str = Field(default='summary', description="输出格式")
filename: Optional[str] = Field(default=None, description="文件名")
class ApiResponse(BaseModel):
"""API响应基类"""
success: bool = Field(..., description="是否成功")
message: str = Field(default="", description="响应消息")
data: Optional[Any] = Field(default=None, description="响应数据")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ErrorResponse(BaseModel):
"""错误响应"""
success: bool = Field(default=False, description="是否成功")
error_code: str = Field(..., description="错误码")
error_message: str = Field(..., description="错误信息")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
# ============================================================================
# 服务状态模型
# ============================================================================
class ServiceStatus(BaseModel):
"""服务状态"""
status: str = Field(..., description="服务状态")
startup_time: datetime = Field(..., description="启动时间")
output_path: str = Field(..., description="输出路径")
xhs_service: Dict[str, Any] = Field(default_factory=dict, description="小红书服务状态")
integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class HealthCheck(BaseModel):
"""健康检查"""
status: str = Field(..., description="健康状态")
timestamp: datetime = Field(default_factory=datetime.now, description="检查时间")
services: Dict[str, bool] = Field(default_factory=dict, description="服务状态")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
# ============================================================================
# 景区和产品相关模型
# ============================================================================
class ScenicSpot(BaseModel):
"""景区数据模型"""
id: Optional[int] = Field(default=None, description="主键id")
userId: int = Field(..., description="用户ID")
name: str = Field(..., description="景区名称")
address: Optional[str] = Field(default=None, description="地址")
trafficInfo: Optional[str] = Field(default=None, description="交通指南")
description: Optional[str] = Field(default=None, description="描述")
advantage: Optional[str] = Field(default=None, description="景区优势")
highlight: Optional[str] = Field(default=None, description="景区亮点")
isPublic: int = Field(default=0, description="是否公开0私有1公开")
isDelete: int = Field(default=0, description="是否删除")
createTime: datetime = Field(default_factory=datetime.now, description="创建时间")
updateTime: datetime = Field(default_factory=datetime.now, description="更新时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class Product(BaseModel):
"""产品数据模型"""
id: Optional[int] = Field(default=None, description="主键id")
userId: int = Field(..., description="用户ID")
productName: str = Field(..., description="产品名称")
originPrice: Optional[float] = Field(default=None, description="原价")
realPrice: Optional[float] = Field(default=None, description="实际价格")
packageInfo: Optional[str] = Field(default=None, description="套票详情")
salesPeriod: Optional[str] = Field(default=None, description="售卖期")
stock: Optional[int] = Field(default=None, description="库存")
keyAdvantages: Optional[str] = Field(default=None, description="产品优势")
highlights: Optional[str] = Field(default=None, description="产品亮点")
detailedDescription: Optional[str] = Field(default=None, description="产品描述")
usageRules: Optional[str] = Field(default=None, description="使用规则详细说明")
surcharge: Optional[str] = Field(default=None, description="加价说明")
reservation: Optional[str] = Field(default=None, description="预约规则")
refund: Optional[str] = Field(default=None, description="退改政策")
discounts: Optional[str] = Field(default=None, description="优惠内容")
isPublic: int = Field(default=0, description="是否公开0私有1公开")
isDelete: int = Field(default=0, description="是否删除")
createTime: datetime = Field(default_factory=datetime.now, description="创建时间")
updateTime: datetime = Field(default_factory=datetime.now, description="更新时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}