410 lines
17 KiB
Python
Raw Normal View History

2025-07-15 15:47:47 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容整合API模型定义
"""
from typing import List, Dict, Any, Optional, Union
from datetime import datetime
from pydantic import BaseModel, Field
class XHSSearchRequest(BaseModel):
"""小红书搜索请求模型"""
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)")
download_media: bool = Field(default=True, description="是否下载媒体文件")
class Config:
schema_extra = {
"example": {
"keyword": "上海迪士尼攻略",
"max_notes": 20,
"sort_type": 0,
"note_type": 0,
"download_media": True
}
}
class IntegrationRequest(BaseModel):
"""内容整合请求模型"""
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
document_paths: List[str] = Field(..., description="文档路径列表")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体文件")
output_format: str = Field(default='summary', description="输出格式")
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
class Config:
schema_extra = {
"example": {
"keyword": "上海迪士尼攻略",
"document_paths": [
"/path/to/document1.pdf",
"/path/to/document2.docx"
],
"max_notes": 20,
"sort_type": 0,
"note_type": 0,
"download_media": True,
"output_format": "summary",
"include_llm_processing": True
}
}
class BatchIntegrationRequest(BaseModel):
"""批量整合请求模型"""
tasks: List[IntegrationRequest] = Field(..., description="整合任务列表")
class Config:
schema_extra = {
"example": {
"tasks": [
{
"keyword": "上海迪士尼攻略",
"document_paths": ["/path/to/document1.pdf"],
"max_notes": 20
},
{
"keyword": "北京环球影城攻略",
"document_paths": ["/path/to/document2.pdf"],
"max_notes": 15
}
]
}
}
class CookieManagementRequest(BaseModel):
"""Cookie管理请求模型"""
name: str = Field(..., min_length=1, max_length=50, description="Cookie名称")
cookie_string: str = Field(..., min_length=10, description="Cookie字符串")
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
class Config:
schema_extra = {
"example": {
"name": "user1",
"cookie_string": "your_cookie_string_here",
"user_info": {
"username": "用户1",
"description": "测试用户"
}
}
}
class ExportRequest(BaseModel):
"""导出请求模型"""
task_id: str = Field(..., description="任务ID")
output_format: str = Field(default='summary', description="输出格式")
filename: Optional[str] = Field(default=None, description="文件名")
class Config:
schema_extra = {
"example": {
"task_id": "task_123456",
"output_format": "summary",
"filename": "result.txt"
}
}
class XHSNoteResponse(BaseModel):
"""小红书笔记响应模型"""
note_id: str = Field(..., description="笔记ID")
title: str = Field(..., description="笔记标题")
content: str = Field(..., description="笔记内容")
author: str = Field(..., description="作者昵称")
author_id: str = Field(..., description="作者ID")
tags: List[str] = Field(default_factory=list, description="标签列表")
images: List[str] = Field(default_factory=list, description="图片列表")
videos: List[str] = Field(default_factory=list, description="视频列表")
likes: int = Field(default=0, description="点赞数")
comments: int = Field(default=0, description="评论数")
shares: int = Field(default=0, description="分享数")
created_time: str = Field(..., description="创建时间")
note_url: str = Field(..., description="笔记URL")
class XHSSearchResponse(BaseModel):
"""小红书搜索响应模型"""
keyword: str = Field(..., description="搜索关键词")
notes: List[XHSNoteResponse] = Field(default_factory=list, description="笔记列表")
total_count: int = Field(..., description="总数量")
success: bool = Field(..., description="是否成功")
error_message: str = Field(default="", description="错误信息")
processing_time: float = Field(default=0.0, description="处理时间")
class DocumentContentResponse(BaseModel):
"""文档内容响应模型"""
file_path: str = Field(..., description="文件路径")
content: str = Field(..., description="文档内容")
file_type: str = Field(..., description="文件类型")
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
class IntegratedContentResponse(BaseModel):
"""整合内容响应模型"""
documents: List[DocumentContentResponse] = Field(default_factory=list, description="文档列表")
integrated_text: str = Field(..., description="整合后的文本")
summary: str = Field(..., description="摘要")
key_topics: List[str] = Field(default_factory=list, description="关键主题")
total_length: int = Field(..., description="总长度")
class IntegrationResponse(BaseModel):
"""整合响应模型"""
task_id: str = Field(..., description="任务ID")
keyword: str = Field(..., description="关键词")
success: bool = Field(..., description="是否成功")
xhs_content: Optional[XHSSearchResponse] = Field(default=None, description="小红书内容")
document_content: Optional[IntegratedContentResponse] = Field(default=None, description="文档内容")
integrated_summary: str = Field(default="", description="整合摘要")
processing_time: float = Field(default=0.0, description="处理时间")
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
error_message: str = Field(default="", description="错误信息")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class CookieStatsResponse(BaseModel):
"""Cookie统计响应模型"""
total_cookies: int = Field(..., description="总Cookie数量")
valid_cookies: int = Field(..., description="有效Cookie数量")
invalid_cookies: int = Field(..., description="无效Cookie数量")
current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie")
cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情")
class ServiceStatusResponse(BaseModel):
"""服务状态响应模型"""
status: str = Field(..., description="服务状态")
startup_time: datetime = Field(..., description="启动时间")
output_path: str = Field(..., description="输出路径")
xhs_service_available: bool = Field(..., description="小红书服务是否可用")
document_service_available: bool = Field(..., description="文档服务是否可用")
integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class HealthCheckResponse(BaseModel):
"""健康检查响应模型"""
status: str = Field(..., description="健康状态")
timestamp: datetime = Field(default_factory=datetime.now, description="检查时间")
services: Dict[str, bool] = Field(default_factory=dict, description="服务状态")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class TaskSummaryResponse(BaseModel):
"""任务摘要响应模型"""
task_id: str = Field(..., description="任务ID")
keyword: str = Field(..., description="关键词")
success: bool = Field(..., description="是否成功")
processing_time: float = Field(..., description="处理时间")
created_time: datetime = Field(..., description="创建时间")
notes_count: int = Field(default=0, description="笔记数量")
documents_count: int = Field(default=0, description="文档数量")
content_preview: str = Field(default="", description="内容预览")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ValidationResponse(BaseModel):
"""验证响应模型"""
validation_results: Dict[str, bool] = Field(..., description="验证结果")
valid_count: int = Field(..., description="有效文档数量")
invalid_count: int = Field(..., description="无效文档数量")
invalid_documents: List[str] = Field(default_factory=list, description="无效文档列表")
class ApiResponse(BaseModel):
"""通用API响应模型"""
success: bool = Field(..., description="是否成功")
message: str = Field(default="", description="响应消息")
data: Optional[Any] = Field(default=None, description="响应数据")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ErrorResponse(BaseModel):
"""错误响应模型"""
success: bool = Field(default=False, description="是否成功")
error_code: str = Field(..., description="错误码")
error_message: str = Field(..., description="错误信息")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class CookieConfig(BaseModel):
"""Cookie配置模型"""
cookie_string: str = Field(..., description="完整的Cookie字符串")
name: Optional[str] = Field(None, description="Cookie名称/标识")
user_info: Optional[Dict[str, Any]] = Field(None, description="用户信息")
priority: int = Field(default=1, ge=1, le=10, description="优先级 (1-10, 数字越大优先级越高)")
class Config:
schema_extra = {
"example": {
"cookie_string": "a1=your_a1_value; web_session=your_session_value; webId=your_webid",
"name": "user1",
"user_info": {"username": "用户1", "description": "主要账号"},
"priority": 5
}
}
class BatchSearchRequest(BaseModel):
"""批量搜索请求模型"""
# Cookie配置 - 必填
cookies: Union[
List[CookieConfig], # 方式1: 详细的Cookie配置列表
Dict[str, str], # 方式2: 简单的键值对
str # 方式3: 单一Cookie字符串
] = Field(..., description="Cookie配置信息支持多种格式")
keywords: List[str] = Field(..., description="批量搜索关键词列表")
max_notes_per_keyword: int = Field(default=20, ge=1, le=100, description="每个关键词的最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)")
save_images: bool = Field(default=True, description="是否保存图片")
image_storage_path: str = Field(default="data/images", description="图像存储路径")
get_detailed_content: bool = Field(default=True, description="是否获取详细内容")
output_path: str = Field(default="data/output", description="输出路径")
cookie_rotation: bool = Field(default=True, description="是否启用Cookie轮换")
max_retries: int = Field(default=3, ge=1, le=10, description="每个关键词的最大重试次数")
class Config:
schema_extra = {
"example": {
"cookies": [
{
"cookie_string": "a1=main_user_a1; web_session=main_session; webId=main_webid",
"name": "main_user",
"priority": 5,
"user_info": {"username": "主要用户"}
},
{
"cookie_string": "a1=backup_user_a1; web_session=backup_session; webId=backup_webid",
"name": "backup_user",
"priority": 3
}
],
"keywords": ["北京旅游", "上海美食", "广州攻略"],
"max_notes_per_keyword": 20,
"cookie_rotation": True,
"max_retries": 3,
"save_images": True,
"get_detailed_content": True
}
}
class NoteInfo(BaseModel):
"""笔记信息模型"""
note_id: str = Field(..., description="笔记ID")
title: str = Field(..., description="笔记标题")
author: str = Field(..., description="作者昵称")
content: Optional[str] = Field(None, description="笔记内容")
like_count: int = Field(default=0, description="点赞数")
comment_count: int = Field(default=0, description="评论数")
share_count: int = Field(default=0, description="分享数")
collect_count: int = Field(default=0, description="收藏数")
note_url: str = Field(..., description="笔记链接")
images: List[str] = Field(default_factory=list, description="图片URL列表")
saved_image_paths: List[str] = Field(default_factory=list, description="保存的图片本地路径")
publish_time: Optional[str] = Field(None, description="发布时间")
location: Optional[str] = Field(None, description="位置信息")
tags: List[str] = Field(default_factory=list, description="标签列表")
class KeywordSearchResult(BaseModel):
"""单个关键词搜索结果"""
keyword: str = Field(..., description="搜索关键词")
total_notes: int = Field(..., description="搜索到的笔记总数")
notes: List[NoteInfo] = Field(..., description="笔记列表")
search_time: str = Field(..., description="搜索时间")
success: bool = Field(..., description="搜索是否成功")
error_message: Optional[str] = Field(None, description="错误信息")
class BatchSearchResponse(BaseModel):
"""批量搜索响应模型"""
total_keywords: int = Field(..., description="总关键词数量")
successful_searches: int = Field(..., description="成功搜索数量")
failed_searches: int = Field(..., description="失败搜索数量")
total_notes: int = Field(..., description="总笔记数量")
results: List[KeywordSearchResult] = Field(..., description="搜索结果列表")
image_storage_path: str = Field(..., description="图像存储路径")
output_path: str = Field(..., description="输出路径")
processing_time: float = Field(..., description="处理时间(秒)")
summary: Dict[str, Any] = Field(..., description="结果摘要")
class Config:
schema_extra = {
"example": {
"total_keywords": 3,
"successful_searches": 2,
"failed_searches": 1,
"total_notes": 35,
"results": [
{
"keyword": "北京旅游",
"total_notes": 20,
"notes": [
{
"note_id": "64a1b2c3d4e5f6g7",
"title": "北京三日游攻略",
"author": "旅游达人",
"content": "详细的北京旅游攻略...",
"like_count": 150,
"comment_count": 30,
"note_url": "https://www.xiaohongshu.com/discovery/item/64a1b2c3d4e5f6g7",
"images": ["https://sns-img-hw.xhscdn.com/example1.jpg"],
"saved_image_paths": ["data/images/beijing_travel_1.jpg"]
}
],
"search_time": "2024-01-01 12:00:00",
"success": True
}
],
"image_storage_path": "data/images",
"output_path": "data/output",
"processing_time": 45.5,
"summary": {
"top_authors": ["旅游达人", "美食博主"],
"total_interactions": 5000,
"saved_images": 25
}
}
}