410 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容整合API模型定义
"""
from typing import List, Dict, Any, Optional, Union
from datetime import datetime
from pydantic import BaseModel, Field
class XHSSearchRequest(BaseModel):
"""小红书搜索请求模型"""
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)")
download_media: bool = Field(default=True, description="是否下载媒体文件")
class Config:
schema_extra = {
"example": {
"keyword": "上海迪士尼攻略",
"max_notes": 20,
"sort_type": 0,
"note_type": 0,
"download_media": True
}
}
class IntegrationRequest(BaseModel):
"""内容整合请求模型"""
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
document_paths: List[str] = Field(..., description="文档路径列表")
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
download_media: bool = Field(default=True, description="是否下载媒体文件")
output_format: str = Field(default='summary', description="输出格式")
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
class Config:
schema_extra = {
"example": {
"keyword": "上海迪士尼攻略",
"document_paths": [
"/path/to/document1.pdf",
"/path/to/document2.docx"
],
"max_notes": 20,
"sort_type": 0,
"note_type": 0,
"download_media": True,
"output_format": "summary",
"include_llm_processing": True
}
}
class BatchIntegrationRequest(BaseModel):
"""批量整合请求模型"""
tasks: List[IntegrationRequest] = Field(..., description="整合任务列表")
class Config:
schema_extra = {
"example": {
"tasks": [
{
"keyword": "上海迪士尼攻略",
"document_paths": ["/path/to/document1.pdf"],
"max_notes": 20
},
{
"keyword": "北京环球影城攻略",
"document_paths": ["/path/to/document2.pdf"],
"max_notes": 15
}
]
}
}
class CookieManagementRequest(BaseModel):
"""Cookie管理请求模型"""
name: str = Field(..., min_length=1, max_length=50, description="Cookie名称")
cookie_string: str = Field(..., min_length=10, description="Cookie字符串")
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
class Config:
schema_extra = {
"example": {
"name": "user1",
"cookie_string": "your_cookie_string_here",
"user_info": {
"username": "用户1",
"description": "测试用户"
}
}
}
class ExportRequest(BaseModel):
"""导出请求模型"""
task_id: str = Field(..., description="任务ID")
output_format: str = Field(default='summary', description="输出格式")
filename: Optional[str] = Field(default=None, description="文件名")
class Config:
schema_extra = {
"example": {
"task_id": "task_123456",
"output_format": "summary",
"filename": "result.txt"
}
}
class XHSNoteResponse(BaseModel):
"""小红书笔记响应模型"""
note_id: str = Field(..., description="笔记ID")
title: str = Field(..., description="笔记标题")
content: str = Field(..., description="笔记内容")
author: str = Field(..., description="作者昵称")
author_id: str = Field(..., description="作者ID")
tags: List[str] = Field(default_factory=list, description="标签列表")
images: List[str] = Field(default_factory=list, description="图片列表")
videos: List[str] = Field(default_factory=list, description="视频列表")
likes: int = Field(default=0, description="点赞数")
comments: int = Field(default=0, description="评论数")
shares: int = Field(default=0, description="分享数")
created_time: str = Field(..., description="创建时间")
note_url: str = Field(..., description="笔记URL")
class XHSSearchResponse(BaseModel):
"""小红书搜索响应模型"""
keyword: str = Field(..., description="搜索关键词")
notes: List[XHSNoteResponse] = Field(default_factory=list, description="笔记列表")
total_count: int = Field(..., description="总数量")
success: bool = Field(..., description="是否成功")
error_message: str = Field(default="", description="错误信息")
processing_time: float = Field(default=0.0, description="处理时间")
class DocumentContentResponse(BaseModel):
"""文档内容响应模型"""
file_path: str = Field(..., description="文件路径")
content: str = Field(..., description="文档内容")
file_type: str = Field(..., description="文件类型")
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
class IntegratedContentResponse(BaseModel):
"""整合内容响应模型"""
documents: List[DocumentContentResponse] = Field(default_factory=list, description="文档列表")
integrated_text: str = Field(..., description="整合后的文本")
summary: str = Field(..., description="摘要")
key_topics: List[str] = Field(default_factory=list, description="关键主题")
total_length: int = Field(..., description="总长度")
class IntegrationResponse(BaseModel):
"""整合响应模型"""
task_id: str = Field(..., description="任务ID")
keyword: str = Field(..., description="关键词")
success: bool = Field(..., description="是否成功")
xhs_content: Optional[XHSSearchResponse] = Field(default=None, description="小红书内容")
document_content: Optional[IntegratedContentResponse] = Field(default=None, description="文档内容")
integrated_summary: str = Field(default="", description="整合摘要")
processing_time: float = Field(default=0.0, description="处理时间")
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
error_message: str = Field(default="", description="错误信息")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class CookieStatsResponse(BaseModel):
"""Cookie统计响应模型"""
total_cookies: int = Field(..., description="总Cookie数量")
valid_cookies: int = Field(..., description="有效Cookie数量")
invalid_cookies: int = Field(..., description="无效Cookie数量")
current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie")
cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情")
class ServiceStatusResponse(BaseModel):
"""服务状态响应模型"""
status: str = Field(..., description="服务状态")
startup_time: datetime = Field(..., description="启动时间")
output_path: str = Field(..., description="输出路径")
xhs_service_available: bool = Field(..., description="小红书服务是否可用")
document_service_available: bool = Field(..., description="文档服务是否可用")
integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class HealthCheckResponse(BaseModel):
"""健康检查响应模型"""
status: str = Field(..., description="健康状态")
timestamp: datetime = Field(default_factory=datetime.now, description="检查时间")
services: Dict[str, bool] = Field(default_factory=dict, description="服务状态")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class TaskSummaryResponse(BaseModel):
"""任务摘要响应模型"""
task_id: str = Field(..., description="任务ID")
keyword: str = Field(..., description="关键词")
success: bool = Field(..., description="是否成功")
processing_time: float = Field(..., description="处理时间")
created_time: datetime = Field(..., description="创建时间")
notes_count: int = Field(default=0, description="笔记数量")
documents_count: int = Field(default=0, description="文档数量")
content_preview: str = Field(default="", description="内容预览")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ValidationResponse(BaseModel):
"""验证响应模型"""
validation_results: Dict[str, bool] = Field(..., description="验证结果")
valid_count: int = Field(..., description="有效文档数量")
invalid_count: int = Field(..., description="无效文档数量")
invalid_documents: List[str] = Field(default_factory=list, description="无效文档列表")
class ApiResponse(BaseModel):
"""通用API响应模型"""
success: bool = Field(..., description="是否成功")
message: str = Field(default="", description="响应消息")
data: Optional[Any] = Field(default=None, description="响应数据")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class ErrorResponse(BaseModel):
"""错误响应模型"""
success: bool = Field(default=False, description="是否成功")
error_code: str = Field(..., description="错误码")
error_message: str = Field(..., description="错误信息")
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
class CookieConfig(BaseModel):
"""Cookie配置模型"""
cookie_string: str = Field(..., description="完整的Cookie字符串")
name: Optional[str] = Field(None, description="Cookie名称/标识")
user_info: Optional[Dict[str, Any]] = Field(None, description="用户信息")
priority: int = Field(default=1, ge=1, le=10, description="优先级 (1-10, 数字越大优先级越高)")
class Config:
schema_extra = {
"example": {
"cookie_string": "a1=your_a1_value; web_session=your_session_value; webId=your_webid",
"name": "user1",
"user_info": {"username": "用户1", "description": "主要账号"},
"priority": 5
}
}
class BatchSearchRequest(BaseModel):
"""批量搜索请求模型"""
# Cookie配置 - 必填
cookies: Union[
List[CookieConfig], # 方式1: 详细的Cookie配置列表
Dict[str, str], # 方式2: 简单的键值对
str # 方式3: 单一Cookie字符串
] = Field(..., description="Cookie配置信息支持多种格式")
keywords: List[str] = Field(..., description="批量搜索关键词列表")
max_notes_per_keyword: int = Field(default=20, ge=1, le=100, description="每个关键词的最大笔记数量")
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)")
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)")
save_images: bool = Field(default=True, description="是否保存图片")
image_storage_path: str = Field(default="data/images", description="图像存储路径")
get_detailed_content: bool = Field(default=True, description="是否获取详细内容")
output_path: str = Field(default="data/output", description="输出路径")
cookie_rotation: bool = Field(default=True, description="是否启用Cookie轮换")
max_retries: int = Field(default=3, ge=1, le=10, description="每个关键词的最大重试次数")
class Config:
schema_extra = {
"example": {
"cookies": [
{
"cookie_string": "a1=main_user_a1; web_session=main_session; webId=main_webid",
"name": "main_user",
"priority": 5,
"user_info": {"username": "主要用户"}
},
{
"cookie_string": "a1=backup_user_a1; web_session=backup_session; webId=backup_webid",
"name": "backup_user",
"priority": 3
}
],
"keywords": ["北京旅游", "上海美食", "广州攻略"],
"max_notes_per_keyword": 20,
"cookie_rotation": True,
"max_retries": 3,
"save_images": True,
"get_detailed_content": True
}
}
class NoteInfo(BaseModel):
"""笔记信息模型"""
note_id: str = Field(..., description="笔记ID")
title: str = Field(..., description="笔记标题")
author: str = Field(..., description="作者昵称")
content: Optional[str] = Field(None, description="笔记内容")
like_count: int = Field(default=0, description="点赞数")
comment_count: int = Field(default=0, description="评论数")
share_count: int = Field(default=0, description="分享数")
collect_count: int = Field(default=0, description="收藏数")
note_url: str = Field(..., description="笔记链接")
images: List[str] = Field(default_factory=list, description="图片URL列表")
saved_image_paths: List[str] = Field(default_factory=list, description="保存的图片本地路径")
publish_time: Optional[str] = Field(None, description="发布时间")
location: Optional[str] = Field(None, description="位置信息")
tags: List[str] = Field(default_factory=list, description="标签列表")
class KeywordSearchResult(BaseModel):
"""单个关键词搜索结果"""
keyword: str = Field(..., description="搜索关键词")
total_notes: int = Field(..., description="搜索到的笔记总数")
notes: List[NoteInfo] = Field(..., description="笔记列表")
search_time: str = Field(..., description="搜索时间")
success: bool = Field(..., description="搜索是否成功")
error_message: Optional[str] = Field(None, description="错误信息")
class BatchSearchResponse(BaseModel):
"""批量搜索响应模型"""
total_keywords: int = Field(..., description="总关键词数量")
successful_searches: int = Field(..., description="成功搜索数量")
failed_searches: int = Field(..., description="失败搜索数量")
total_notes: int = Field(..., description="总笔记数量")
results: List[KeywordSearchResult] = Field(..., description="搜索结果列表")
image_storage_path: str = Field(..., description="图像存储路径")
output_path: str = Field(..., description="输出路径")
processing_time: float = Field(..., description="处理时间(秒)")
summary: Dict[str, Any] = Field(..., description="结果摘要")
class Config:
schema_extra = {
"example": {
"total_keywords": 3,
"successful_searches": 2,
"failed_searches": 1,
"total_notes": 35,
"results": [
{
"keyword": "北京旅游",
"total_notes": 20,
"notes": [
{
"note_id": "64a1b2c3d4e5f6g7",
"title": "北京三日游攻略",
"author": "旅游达人",
"content": "详细的北京旅游攻略...",
"like_count": 150,
"comment_count": 30,
"note_url": "https://www.xiaohongshu.com/discovery/item/64a1b2c3d4e5f6g7",
"images": ["https://sns-img-hw.xhscdn.com/example1.jpg"],
"saved_image_paths": ["data/images/beijing_travel_1.jpg"]
}
],
"search_time": "2024-01-01 12:00:00",
"success": True
}
],
"image_storage_path": "data/images",
"output_path": "data/output",
"processing_time": 45.5,
"summary": {
"top_authors": ["旅游达人", "美食博主"],
"total_interactions": 5000,
"saved_images": 25
}
}
}