395 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Core Models
统一的 Pydantic 数据模型定义
"""
from typing import Dict, List, Optional, Any, Union
from datetime import datetime
from pathlib import Path
from pydantic import BaseModel, Field, validator, root_validator
import logging
logger = logging.getLogger(__name__)
# ============================================================================
# Cookie 管理相关模型
# ============================================================================
class CookieInfo(BaseModel):
"""Cookie信息"""
name: str = Field(..., description="Cookie名称")
cookie_string: str = Field(..., description="Cookie字符串")
last_used: datetime = Field(default_factory=datetime.now, description="最后使用时间")
use_count: int = Field(default=0, description="使用次数")
is_valid: bool = Field(default=True, description="是否有效")
failure_count: int = Field(default=0, description="失败次数")
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
@validator('last_used', pre=True)
def parse_last_used(cls, v):
if isinstance(v, str):
return datetime.fromisoformat(v)
return v
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class CookieStats(BaseModel):
"""Cookie统计信息"""
total_cookies: int = Field(..., description="总Cookie数量")
valid_cookies: int = Field(..., description="有效Cookie数量")
invalid_cookies: int = Field(..., description="无效Cookie数量")
current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie")
cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情")
# ============================================================================
# 小红书相关模型
# ============================================================================
class XHSNote(BaseModel):
"""小红书笔记数据模型"""
note_id: str = Field(..., description="笔记ID")
title: str = Field(..., description="笔记标题")
content: str = Field(..., description="笔记内容")
author: str = Field(..., description="作者昵称")
author_id: str = Field(..., description="作者ID")
tags: List[str] = Field(default_factory=list, description="标签列表")
images: List[str] = Field(default_factory=list, description="图片列表")
videos: List[str] = Field(default_factory=list, description="视频列表")
likes: int = Field(default=0, description="点赞数")
comments: int = Field(default=0, description="评论数")
shares: int = Field(default=0, description="分享数")
created_time: str = Field(..., description="创建时间")
note_url: str = Field(..., description="笔记URL")
@validator('likes', 'comments', 'shares', pre=True)
def validate_counts(cls, v):
return max(0, int(v)) if v is not None else 0
class XHSSearchResult(BaseModel):
"""小红书搜索结果"""
keyword: str = Field(..., description="搜索关键词")
notes: List[XHSNote] = Field(default_factory=list, description="笔记列表")
total_count: int = Field(..., description="总数量")
success: bool = Field(..., description="是否成功")
error_message: str = Field(default="", description="错误信息")
@validator('total_count')
def validate_total_count(cls, v, values):
if 'notes' in values:
return max(v, len(values['notes']))
return v
class SearchConfig(BaseModel):
"""搜索配置"""
keyword: str = Field(..., description="搜索关键词")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_images: bool = Field(default=True, description="是否下载图片")
download_videos: bool = Field(default=True, description="是否下载视频")
@validator('sort_type')
def validate_sort_type(cls, v):
# 0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏
if v not in [0, 1, 2, 3, 4]:
raise ValueError('sort_type must be 0-4')
return v
@validator('note_type')
def validate_note_type(cls, v):
# 0:不限, 1:视频笔记, 2:普通笔记
if v not in [0, 1, 2]:
raise ValueError('note_type must be 0-2')
return v
# ============================================================================
# 文档处理相关模型
# ============================================================================
class DocumentContent(BaseModel):
"""文档内容数据模型"""
file_path: str = Field(..., description="文件路径")
content: str = Field(..., description="文档内容")
file_type: str = Field(..., description="文件类型")
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
@validator('file_type', pre=True)
def normalize_file_type(cls, v):
return v.lower() if v else ""
@validator('metadata', pre=True)
def ensure_metadata(cls, v):
if v is None:
return {}
return v
class IntegratedContent(BaseModel):
"""整合后的内容"""
documents: List[DocumentContent] = Field(default_factory=list, description="文档列表")
integrated_text: str = Field(..., description="整合后的文本")
summary: str = Field(..., description="摘要")
key_topics: List[str] = Field(default_factory=list, description="关键主题")
total_length: int = Field(..., description="总长度")
@validator('total_length')
def validate_total_length(cls, v, values):
if 'documents' in values:
calculated_length = sum(len(doc.content) for doc in values['documents'])
return max(v, calculated_length)
return v
class ProcessingTask(BaseModel):
"""处理任务"""
task_id: str = Field(..., description="任务ID")
keyword: str = Field(..., description="关键词")
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
search_config: SearchConfig = Field(..., description="搜索配置")
status: str = Field(default="pending", description="任务状态")
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
started_time: Optional[datetime] = Field(default=None, description="开始时间")
completed_time: Optional[datetime] = Field(default=None, description="完成时间")
error_message: str = Field(default="", description="错误信息")
@validator('status')
def validate_status(cls, v):
valid_statuses = ['pending', 'processing', 'completed', 'failed']
if v not in valid_statuses:
raise ValueError(f'status must be one of {valid_statuses}')
return v
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ProcessedContent(BaseModel):
"""处理后的内容"""
task_id: str = Field(..., description="任务ID")
xhs_content: XHSSearchResult = Field(..., description="小红书内容")
document_content: IntegratedContent = Field(..., description="文档内容")
integrated_summary: str = Field(..., description="整合摘要")
statistics: Dict[str, Any] = Field(default_factory=dict, description="统计信息")
processing_time: float = Field(default=0.0, description="处理时间")
def get_summary(self) -> str:
"""获取内容摘要"""
return f"处理了 {len(self.xhs_content.notes)} 条笔记和 {len(self.document_content.documents)} 个文档"
# ============================================================================
# 整合服务相关模型
# ============================================================================
class IntegrationConfig(BaseModel):
"""整合配置"""
keyword: str = Field(..., description="关键词")
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体")
output_format: str = Field(default='summary', description="输出格式")
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
@validator('output_format')
def validate_output_format(cls, v):
valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard']
if v not in valid_formats:
raise ValueError(f'output_format must be one of {valid_formats}')
return v
class IntegrationResult(BaseModel):
"""整合结果"""
task_id: str = Field(..., description="任务ID")
config: IntegrationConfig = Field(..., description="整合配置")
success: bool = Field(..., description="是否成功")
processed_content: Optional[ProcessedContent] = Field(default=None, description="处理后的内容")
error_message: str = Field(default="", description="错误信息")
processing_time: float = Field(default=0.0, description="处理时间")
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ProcessingStats(BaseModel):
"""处理统计信息"""
total_tasks: int = Field(default=0, description="总任务数")
successful_tasks: int = Field(default=0, description="成功任务数")
failed_tasks: int = Field(default=0, description="失败任务数")
total_processing_time: float = Field(default=0.0, description="总处理时间")
average_processing_time: float = Field(default=0.0, description="平均处理时间")
total_notes_processed: int = Field(default=0, description="总处理笔记数")
total_documents_processed: int = Field(default=0, description="总处理文档数")
total_media_downloaded: int = Field(default=0, description="总下载媒体数")
start_time: datetime = Field(default_factory=datetime.now, description="开始时间")
last_updated: datetime = Field(default_factory=datetime.now, description="最后更新时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
# ============================================================================
# API 请求/响应模型
# ============================================================================
class SearchRequest(BaseModel):
"""搜索请求"""
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体")
class IntegrationRequest(BaseModel):
"""整合请求"""
keyword: str = Field(..., description="关键词", min_length=1, max_length=100)
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体")
output_format: str = Field(default='summary', description="输出格式")
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
@validator('output_format')
def validate_output_format(cls, v):
valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard']
if v not in valid_formats:
raise ValueError(f'output_format must be one of {valid_formats}')
return v
class BatchIntegrationRequest(BaseModel):
"""批量整合请求"""
tasks: List[IntegrationRequest] = Field(default_factory=list, description="任务列表")
@validator('tasks')
def validate_tasks(cls, v):
if not v:
raise ValueError('tasks cannot be empty')
if len(v) > 10:
raise ValueError('maximum 10 tasks allowed')
return v
class CookieRequest(BaseModel):
"""Cookie请求"""
name: str = Field(..., min_length=1, max_length=50, description="Cookie名称")
cookie_string: str = Field(..., min_length=10, description="Cookie字符串")
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
class ExportRequest(BaseModel):
"""导出请求"""
task_id: str = Field(..., description="任务ID")
output_format: str = Field(default='summary', description="输出格式")
filename: Optional[str] = Field(default=None, description="文件名")
class ApiResponse(BaseModel):
"""API响应基类"""
success: bool = Field(..., description="是否成功")
message: str = Field(default="", description="响应消息")
data: Optional[Any] = Field(default=None, description="响应数据")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ErrorResponse(BaseModel):
"""错误响应"""
success: bool = Field(default=False, description="是否成功")
error_code: str = Field(..., description="错误码")
error_message: str = Field(..., description="错误信息")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
# ============================================================================
# 服务状态模型
# ============================================================================
class ServiceStatus(BaseModel):
"""服务状态"""
status: str = Field(..., description="服务状态")
startup_time: datetime = Field(..., description="启动时间")
output_path: str = Field(..., description="输出路径")
xhs_service: Dict[str, Any] = Field(default_factory=dict, description="小红书服务状态")
integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class HealthCheck(BaseModel):
"""健康检查"""
status: str = Field(..., description="健康状态")
timestamp: datetime = Field(default_factory=datetime.now, description="检查时间")
services: Dict[str, bool] = Field(default_factory=dict, description="服务状态")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
# ============================================================================
# 景区和产品相关模型
# ============================================================================
class ScenicSpot(BaseModel):
"""景区数据模型"""
id: Optional[int] = Field(default=None, description="主键id")
userId: int = Field(..., description="用户ID")
name: str = Field(..., description="景区名称")
address: Optional[str] = Field(default=None, description="地址")
trafficInfo: Optional[str] = Field(default=None, description="交通指南")
description: Optional[str] = Field(default=None, description="描述")
advantage: Optional[str] = Field(default=None, description="景区优势")
highlight: Optional[str] = Field(default=None, description="景区亮点")
isPublic: int = Field(default=0, description="是否公开0私有1公开")
isDelete: int = Field(default=0, description="是否删除")
createTime: datetime = Field(default_factory=datetime.now, description="创建时间")
updateTime: datetime = Field(default_factory=datetime.now, description="更新时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class Product(BaseModel):
"""产品数据模型"""
id: Optional[int] = Field(default=None, description="主键id")
userId: int = Field(..., description="用户ID")
productName: str = Field(..., description="产品名称")
originPrice: Optional[float] = Field(default=None, description="原价")
realPrice: Optional[float] = Field(default=None, description="实际价格")
packageInfo: Optional[str] = Field(default=None, description="套票详情")
salesPeriod: Optional[str] = Field(default=None, description="售卖期")
stock: Optional[int] = Field(default=None, description="库存")
keyAdvantages: Optional[str] = Field(default=None, description="产品优势")
highlights: Optional[str] = Field(default=None, description="产品亮点")
detailedDescription: Optional[str] = Field(default=None, description="产品描述")
usageRules: Optional[str] = Field(default=None, description="使用规则详细说明")
surcharge: Optional[str] = Field(default=None, description="加价说明")
reservation: Optional[str] = Field(default=None, description="预约规则")
refund: Optional[str] = Field(default=None, description="退改政策")
discounts: Optional[str] = Field(default=None, description="优惠内容")
isPublic: int = Field(default=0, description="是否公开0私有1公开")
isDelete: int = Field(default=0, description="是否删除")
createTime: datetime = Field(default_factory=datetime.now, description="创建时间")
updateTime: datetime = Field(default_factory=datetime.now, description="更新时间")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}