395 lines
18 KiB
Python
395 lines
18 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
Core Models
|
||
统一的 Pydantic 数据模型定义
|
||
"""
|
||
|
||
from typing import Dict, List, Optional, Any, Union
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from pydantic import BaseModel, Field, validator, root_validator
|
||
import logging
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ============================================================================
|
||
# Cookie 管理相关模型
|
||
# ============================================================================
|
||
|
||
class CookieInfo(BaseModel):
|
||
"""Cookie信息"""
|
||
name: str = Field(..., description="Cookie名称")
|
||
cookie_string: str = Field(..., description="Cookie字符串")
|
||
last_used: datetime = Field(default_factory=datetime.now, description="最后使用时间")
|
||
use_count: int = Field(default=0, description="使用次数")
|
||
is_valid: bool = Field(default=True, description="是否有效")
|
||
failure_count: int = Field(default=0, description="失败次数")
|
||
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
|
||
|
||
@validator('last_used', pre=True)
|
||
def parse_last_used(cls, v):
|
||
if isinstance(v, str):
|
||
return datetime.fromisoformat(v)
|
||
return v
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
class CookieStats(BaseModel):
|
||
"""Cookie统计信息"""
|
||
total_cookies: int = Field(..., description="总Cookie数量")
|
||
valid_cookies: int = Field(..., description="有效Cookie数量")
|
||
invalid_cookies: int = Field(..., description="无效Cookie数量")
|
||
current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie")
|
||
cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情")
|
||
|
||
# ============================================================================
|
||
# 小红书相关模型
|
||
# ============================================================================
|
||
|
||
class XHSNote(BaseModel):
|
||
"""小红书笔记数据模型"""
|
||
note_id: str = Field(..., description="笔记ID")
|
||
title: str = Field(..., description="笔记标题")
|
||
content: str = Field(..., description="笔记内容")
|
||
author: str = Field(..., description="作者昵称")
|
||
author_id: str = Field(..., description="作者ID")
|
||
tags: List[str] = Field(default_factory=list, description="标签列表")
|
||
images: List[str] = Field(default_factory=list, description="图片列表")
|
||
videos: List[str] = Field(default_factory=list, description="视频列表")
|
||
likes: int = Field(default=0, description="点赞数")
|
||
comments: int = Field(default=0, description="评论数")
|
||
shares: int = Field(default=0, description="分享数")
|
||
created_time: str = Field(..., description="创建时间")
|
||
note_url: str = Field(..., description="笔记URL")
|
||
|
||
@validator('likes', 'comments', 'shares', pre=True)
|
||
def validate_counts(cls, v):
|
||
return max(0, int(v)) if v is not None else 0
|
||
|
||
class XHSSearchResult(BaseModel):
|
||
"""小红书搜索结果"""
|
||
keyword: str = Field(..., description="搜索关键词")
|
||
notes: List[XHSNote] = Field(default_factory=list, description="笔记列表")
|
||
total_count: int = Field(..., description="总数量")
|
||
success: bool = Field(..., description="是否成功")
|
||
error_message: str = Field(default="", description="错误信息")
|
||
|
||
@validator('total_count')
|
||
def validate_total_count(cls, v, values):
|
||
if 'notes' in values:
|
||
return max(v, len(values['notes']))
|
||
return v
|
||
|
||
class SearchConfig(BaseModel):
|
||
"""搜索配置"""
|
||
keyword: str = Field(..., description="搜索关键词")
|
||
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
|
||
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
|
||
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
|
||
download_images: bool = Field(default=True, description="是否下载图片")
|
||
download_videos: bool = Field(default=True, description="是否下载视频")
|
||
|
||
@validator('sort_type')
|
||
def validate_sort_type(cls, v):
|
||
# 0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏
|
||
if v not in [0, 1, 2, 3, 4]:
|
||
raise ValueError('sort_type must be 0-4')
|
||
return v
|
||
|
||
@validator('note_type')
|
||
def validate_note_type(cls, v):
|
||
# 0:不限, 1:视频笔记, 2:普通笔记
|
||
if v not in [0, 1, 2]:
|
||
raise ValueError('note_type must be 0-2')
|
||
return v
|
||
|
||
# ============================================================================
|
||
# 文档处理相关模型
|
||
# ============================================================================
|
||
|
||
class DocumentContent(BaseModel):
|
||
"""文档内容数据模型"""
|
||
file_path: str = Field(..., description="文件路径")
|
||
content: str = Field(..., description="文档内容")
|
||
file_type: str = Field(..., description="文件类型")
|
||
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
|
||
|
||
@validator('file_type', pre=True)
|
||
def normalize_file_type(cls, v):
|
||
return v.lower() if v else ""
|
||
|
||
@validator('metadata', pre=True)
|
||
def ensure_metadata(cls, v):
|
||
if v is None:
|
||
return {}
|
||
return v
|
||
|
||
class IntegratedContent(BaseModel):
|
||
"""整合后的内容"""
|
||
documents: List[DocumentContent] = Field(default_factory=list, description="文档列表")
|
||
integrated_text: str = Field(..., description="整合后的文本")
|
||
summary: str = Field(..., description="摘要")
|
||
key_topics: List[str] = Field(default_factory=list, description="关键主题")
|
||
total_length: int = Field(..., description="总长度")
|
||
|
||
@validator('total_length')
|
||
def validate_total_length(cls, v, values):
|
||
if 'documents' in values:
|
||
calculated_length = sum(len(doc.content) for doc in values['documents'])
|
||
return max(v, calculated_length)
|
||
return v
|
||
|
||
class ProcessingTask(BaseModel):
|
||
"""处理任务"""
|
||
task_id: str = Field(..., description="任务ID")
|
||
keyword: str = Field(..., description="关键词")
|
||
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
|
||
search_config: SearchConfig = Field(..., description="搜索配置")
|
||
status: str = Field(default="pending", description="任务状态")
|
||
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||
started_time: Optional[datetime] = Field(default=None, description="开始时间")
|
||
completed_time: Optional[datetime] = Field(default=None, description="完成时间")
|
||
error_message: str = Field(default="", description="错误信息")
|
||
|
||
@validator('status')
|
||
def validate_status(cls, v):
|
||
valid_statuses = ['pending', 'processing', 'completed', 'failed']
|
||
if v not in valid_statuses:
|
||
raise ValueError(f'status must be one of {valid_statuses}')
|
||
return v
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
class ProcessedContent(BaseModel):
|
||
"""处理后的内容"""
|
||
task_id: str = Field(..., description="任务ID")
|
||
xhs_content: XHSSearchResult = Field(..., description="小红书内容")
|
||
document_content: IntegratedContent = Field(..., description="文档内容")
|
||
integrated_summary: str = Field(..., description="整合摘要")
|
||
statistics: Dict[str, Any] = Field(default_factory=dict, description="统计信息")
|
||
processing_time: float = Field(default=0.0, description="处理时间")
|
||
|
||
def get_summary(self) -> str:
|
||
"""获取内容摘要"""
|
||
return f"处理了 {len(self.xhs_content.notes)} 条笔记和 {len(self.document_content.documents)} 个文档"
|
||
|
||
# ============================================================================
|
||
# 整合服务相关模型
|
||
# ============================================================================
|
||
|
||
class IntegrationConfig(BaseModel):
|
||
"""整合配置"""
|
||
keyword: str = Field(..., description="关键词")
|
||
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
|
||
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
|
||
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
|
||
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
|
||
download_media: bool = Field(default=True, description="是否下载媒体")
|
||
output_format: str = Field(default='summary', description="输出格式")
|
||
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
|
||
|
||
@validator('output_format')
|
||
def validate_output_format(cls, v):
|
||
valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard']
|
||
if v not in valid_formats:
|
||
raise ValueError(f'output_format must be one of {valid_formats}')
|
||
return v
|
||
|
||
class IntegrationResult(BaseModel):
|
||
"""整合结果"""
|
||
task_id: str = Field(..., description="任务ID")
|
||
config: IntegrationConfig = Field(..., description="整合配置")
|
||
success: bool = Field(..., description="是否成功")
|
||
processed_content: Optional[ProcessedContent] = Field(default=None, description="处理后的内容")
|
||
error_message: str = Field(default="", description="错误信息")
|
||
processing_time: float = Field(default=0.0, description="处理时间")
|
||
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
class ProcessingStats(BaseModel):
|
||
"""处理统计信息"""
|
||
total_tasks: int = Field(default=0, description="总任务数")
|
||
successful_tasks: int = Field(default=0, description="成功任务数")
|
||
failed_tasks: int = Field(default=0, description="失败任务数")
|
||
total_processing_time: float = Field(default=0.0, description="总处理时间")
|
||
average_processing_time: float = Field(default=0.0, description="平均处理时间")
|
||
total_notes_processed: int = Field(default=0, description="总处理笔记数")
|
||
total_documents_processed: int = Field(default=0, description="总处理文档数")
|
||
total_media_downloaded: int = Field(default=0, description="总下载媒体数")
|
||
start_time: datetime = Field(default_factory=datetime.now, description="开始时间")
|
||
last_updated: datetime = Field(default_factory=datetime.now, description="最后更新时间")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
# ============================================================================
|
||
# API 请求/响应模型
|
||
# ============================================================================
|
||
|
||
class SearchRequest(BaseModel):
|
||
"""搜索请求"""
|
||
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
|
||
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
|
||
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
|
||
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
|
||
download_media: bool = Field(default=True, description="是否下载媒体")
|
||
|
||
class IntegrationRequest(BaseModel):
|
||
"""整合请求"""
|
||
keyword: str = Field(..., description="关键词", min_length=1, max_length=100)
|
||
document_paths: List[str] = Field(default_factory=list, description="文档路径列表")
|
||
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
|
||
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
|
||
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
|
||
download_media: bool = Field(default=True, description="是否下载媒体")
|
||
output_format: str = Field(default='summary', description="输出格式")
|
||
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
|
||
|
||
@validator('output_format')
|
||
def validate_output_format(cls, v):
|
||
valid_formats = ['summary', 'blog_post', 'travel_guide', 'product_sales', 'attraction_standard']
|
||
if v not in valid_formats:
|
||
raise ValueError(f'output_format must be one of {valid_formats}')
|
||
return v
|
||
|
||
class BatchIntegrationRequest(BaseModel):
|
||
"""批量整合请求"""
|
||
tasks: List[IntegrationRequest] = Field(default_factory=list, description="任务列表")
|
||
|
||
@validator('tasks')
|
||
def validate_tasks(cls, v):
|
||
if not v:
|
||
raise ValueError('tasks cannot be empty')
|
||
if len(v) > 10:
|
||
raise ValueError('maximum 10 tasks allowed')
|
||
return v
|
||
|
||
class CookieRequest(BaseModel):
|
||
"""Cookie请求"""
|
||
name: str = Field(..., min_length=1, max_length=50, description="Cookie名称")
|
||
cookie_string: str = Field(..., min_length=10, description="Cookie字符串")
|
||
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
|
||
|
||
class ExportRequest(BaseModel):
|
||
"""导出请求"""
|
||
task_id: str = Field(..., description="任务ID")
|
||
output_format: str = Field(default='summary', description="输出格式")
|
||
filename: Optional[str] = Field(default=None, description="文件名")
|
||
|
||
class ApiResponse(BaseModel):
|
||
"""API响应基类"""
|
||
success: bool = Field(..., description="是否成功")
|
||
message: str = Field(default="", description="响应消息")
|
||
data: Optional[Any] = Field(default=None, description="响应数据")
|
||
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
class ErrorResponse(BaseModel):
|
||
"""错误响应"""
|
||
success: bool = Field(default=False, description="是否成功")
|
||
error_code: str = Field(..., description="错误码")
|
||
error_message: str = Field(..., description="错误信息")
|
||
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
# ============================================================================
|
||
# 服务状态模型
|
||
# ============================================================================
|
||
|
||
class ServiceStatus(BaseModel):
|
||
"""服务状态"""
|
||
status: str = Field(..., description="服务状态")
|
||
startup_time: datetime = Field(..., description="启动时间")
|
||
output_path: str = Field(..., description="输出路径")
|
||
xhs_service: Dict[str, Any] = Field(default_factory=dict, description="小红书服务状态")
|
||
integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
class HealthCheck(BaseModel):
|
||
"""健康检查"""
|
||
status: str = Field(..., description="健康状态")
|
||
timestamp: datetime = Field(default_factory=datetime.now, description="检查时间")
|
||
services: Dict[str, bool] = Field(default_factory=dict, description="服务状态")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
# ============================================================================
|
||
# 景区和产品相关模型
|
||
# ============================================================================
|
||
|
||
class ScenicSpot(BaseModel):
|
||
"""景区数据模型"""
|
||
id: Optional[int] = Field(default=None, description="主键id")
|
||
userId: int = Field(..., description="用户ID")
|
||
name: str = Field(..., description="景区名称")
|
||
address: Optional[str] = Field(default=None, description="地址")
|
||
trafficInfo: Optional[str] = Field(default=None, description="交通指南")
|
||
description: Optional[str] = Field(default=None, description="描述")
|
||
advantage: Optional[str] = Field(default=None, description="景区优势")
|
||
highlight: Optional[str] = Field(default=None, description="景区亮点")
|
||
isPublic: int = Field(default=0, description="是否公开(0私有1公开)")
|
||
isDelete: int = Field(default=0, description="是否删除")
|
||
createTime: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||
updateTime: datetime = Field(default_factory=datetime.now, description="更新时间")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
}
|
||
|
||
class Product(BaseModel):
|
||
"""产品数据模型"""
|
||
id: Optional[int] = Field(default=None, description="主键id")
|
||
userId: int = Field(..., description="用户ID")
|
||
productName: str = Field(..., description="产品名称")
|
||
originPrice: Optional[float] = Field(default=None, description="原价")
|
||
realPrice: Optional[float] = Field(default=None, description="实际价格")
|
||
packageInfo: Optional[str] = Field(default=None, description="套票详情")
|
||
salesPeriod: Optional[str] = Field(default=None, description="售卖期")
|
||
stock: Optional[int] = Field(default=None, description="库存")
|
||
keyAdvantages: Optional[str] = Field(default=None, description="产品优势")
|
||
highlights: Optional[str] = Field(default=None, description="产品亮点")
|
||
detailedDescription: Optional[str] = Field(default=None, description="产品描述")
|
||
usageRules: Optional[str] = Field(default=None, description="使用规则详细说明")
|
||
surcharge: Optional[str] = Field(default=None, description="加价说明")
|
||
reservation: Optional[str] = Field(default=None, description="预约规则")
|
||
refund: Optional[str] = Field(default=None, description="退改政策")
|
||
discounts: Optional[str] = Field(default=None, description="优惠内容")
|
||
isPublic: int = Field(default=0, description="是否公开(0私有1公开)")
|
||
isDelete: int = Field(default=0, description="是否删除")
|
||
createTime: datetime = Field(default_factory=datetime.now, description="创建时间")
|
||
updateTime: datetime = Field(default_factory=datetime.now, description="更新时间")
|
||
|
||
class Config:
|
||
json_encoders = {
|
||
datetime: lambda v: v.isoformat()
|
||
} |