410 lines
17 KiB
Python
410 lines
17 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
"""
|
|||
|
|
内容整合API模型定义
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from typing import List, Dict, Any, Optional, Union
|
|||
|
|
from datetime import datetime
|
|||
|
|
from pydantic import BaseModel, Field
|
|||
|
|
|
|||
|
|
|
|||
|
|
class XHSSearchRequest(BaseModel):
|
|||
|
|
"""小红书搜索请求模型"""
|
|||
|
|
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
|
|||
|
|
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
|
|||
|
|
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)")
|
|||
|
|
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)")
|
|||
|
|
download_media: bool = Field(default=True, description="是否下载媒体文件")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"keyword": "上海迪士尼攻略",
|
|||
|
|
"max_notes": 20,
|
|||
|
|
"sort_type": 0,
|
|||
|
|
"note_type": 0,
|
|||
|
|
"download_media": True
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class IntegrationRequest(BaseModel):
|
|||
|
|
"""内容整合请求模型"""
|
|||
|
|
keyword: str = Field(..., min_length=1, max_length=100, description="搜索关键词")
|
|||
|
|
document_paths: List[str] = Field(..., description="文档路径列表")
|
|||
|
|
max_notes: int = Field(default=20, ge=1, le=100, description="最大笔记数量")
|
|||
|
|
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型")
|
|||
|
|
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型")
|
|||
|
|
download_media: bool = Field(default=True, description="是否下载媒体文件")
|
|||
|
|
output_format: str = Field(default='summary', description="输出格式")
|
|||
|
|
include_llm_processing: bool = Field(default=True, description="是否包含LLM处理")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"keyword": "上海迪士尼攻略",
|
|||
|
|
"document_paths": [
|
|||
|
|
"/path/to/document1.pdf",
|
|||
|
|
"/path/to/document2.docx"
|
|||
|
|
],
|
|||
|
|
"max_notes": 20,
|
|||
|
|
"sort_type": 0,
|
|||
|
|
"note_type": 0,
|
|||
|
|
"download_media": True,
|
|||
|
|
"output_format": "summary",
|
|||
|
|
"include_llm_processing": True
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class BatchIntegrationRequest(BaseModel):
|
|||
|
|
"""批量整合请求模型"""
|
|||
|
|
tasks: List[IntegrationRequest] = Field(..., description="整合任务列表")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"tasks": [
|
|||
|
|
{
|
|||
|
|
"keyword": "上海迪士尼攻略",
|
|||
|
|
"document_paths": ["/path/to/document1.pdf"],
|
|||
|
|
"max_notes": 20
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"keyword": "北京环球影城攻略",
|
|||
|
|
"document_paths": ["/path/to/document2.pdf"],
|
|||
|
|
"max_notes": 15
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CookieManagementRequest(BaseModel):
|
|||
|
|
"""Cookie管理请求模型"""
|
|||
|
|
name: str = Field(..., min_length=1, max_length=50, description="Cookie名称")
|
|||
|
|
cookie_string: str = Field(..., min_length=10, description="Cookie字符串")
|
|||
|
|
user_info: Optional[Dict[str, Any]] = Field(default=None, description="用户信息")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"name": "user1",
|
|||
|
|
"cookie_string": "your_cookie_string_here",
|
|||
|
|
"user_info": {
|
|||
|
|
"username": "用户1",
|
|||
|
|
"description": "测试用户"
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ExportRequest(BaseModel):
|
|||
|
|
"""导出请求模型"""
|
|||
|
|
task_id: str = Field(..., description="任务ID")
|
|||
|
|
output_format: str = Field(default='summary', description="输出格式")
|
|||
|
|
filename: Optional[str] = Field(default=None, description="文件名")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"task_id": "task_123456",
|
|||
|
|
"output_format": "summary",
|
|||
|
|
"filename": "result.txt"
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class XHSNoteResponse(BaseModel):
|
|||
|
|
"""小红书笔记响应模型"""
|
|||
|
|
note_id: str = Field(..., description="笔记ID")
|
|||
|
|
title: str = Field(..., description="笔记标题")
|
|||
|
|
content: str = Field(..., description="笔记内容")
|
|||
|
|
author: str = Field(..., description="作者昵称")
|
|||
|
|
author_id: str = Field(..., description="作者ID")
|
|||
|
|
tags: List[str] = Field(default_factory=list, description="标签列表")
|
|||
|
|
images: List[str] = Field(default_factory=list, description="图片列表")
|
|||
|
|
videos: List[str] = Field(default_factory=list, description="视频列表")
|
|||
|
|
likes: int = Field(default=0, description="点赞数")
|
|||
|
|
comments: int = Field(default=0, description="评论数")
|
|||
|
|
shares: int = Field(default=0, description="分享数")
|
|||
|
|
created_time: str = Field(..., description="创建时间")
|
|||
|
|
note_url: str = Field(..., description="笔记URL")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class XHSSearchResponse(BaseModel):
|
|||
|
|
"""小红书搜索响应模型"""
|
|||
|
|
keyword: str = Field(..., description="搜索关键词")
|
|||
|
|
notes: List[XHSNoteResponse] = Field(default_factory=list, description="笔记列表")
|
|||
|
|
total_count: int = Field(..., description="总数量")
|
|||
|
|
success: bool = Field(..., description="是否成功")
|
|||
|
|
error_message: str = Field(default="", description="错误信息")
|
|||
|
|
processing_time: float = Field(default=0.0, description="处理时间")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class DocumentContentResponse(BaseModel):
|
|||
|
|
"""文档内容响应模型"""
|
|||
|
|
file_path: str = Field(..., description="文件路径")
|
|||
|
|
content: str = Field(..., description="文档内容")
|
|||
|
|
file_type: str = Field(..., description="文件类型")
|
|||
|
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="元数据")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class IntegratedContentResponse(BaseModel):
|
|||
|
|
"""整合内容响应模型"""
|
|||
|
|
documents: List[DocumentContentResponse] = Field(default_factory=list, description="文档列表")
|
|||
|
|
integrated_text: str = Field(..., description="整合后的文本")
|
|||
|
|
summary: str = Field(..., description="摘要")
|
|||
|
|
key_topics: List[str] = Field(default_factory=list, description="关键主题")
|
|||
|
|
total_length: int = Field(..., description="总长度")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class IntegrationResponse(BaseModel):
|
|||
|
|
"""整合响应模型"""
|
|||
|
|
task_id: str = Field(..., description="任务ID")
|
|||
|
|
keyword: str = Field(..., description="关键词")
|
|||
|
|
success: bool = Field(..., description="是否成功")
|
|||
|
|
xhs_content: Optional[XHSSearchResponse] = Field(default=None, description="小红书内容")
|
|||
|
|
document_content: Optional[IntegratedContentResponse] = Field(default=None, description="文档内容")
|
|||
|
|
integrated_summary: str = Field(default="", description="整合摘要")
|
|||
|
|
processing_time: float = Field(default=0.0, description="处理时间")
|
|||
|
|
created_time: datetime = Field(default_factory=datetime.now, description="创建时间")
|
|||
|
|
error_message: str = Field(default="", description="错误信息")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
json_encoders = {
|
|||
|
|
datetime: lambda v: v.isoformat()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CookieStatsResponse(BaseModel):
|
|||
|
|
"""Cookie统计响应模型"""
|
|||
|
|
total_cookies: int = Field(..., description="总Cookie数量")
|
|||
|
|
valid_cookies: int = Field(..., description="有效Cookie数量")
|
|||
|
|
invalid_cookies: int = Field(..., description="无效Cookie数量")
|
|||
|
|
current_cookie: Optional[str] = Field(default=None, description="当前使用的Cookie")
|
|||
|
|
cookie_details: List[Dict[str, Any]] = Field(default_factory=list, description="Cookie详情")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ServiceStatusResponse(BaseModel):
|
|||
|
|
"""服务状态响应模型"""
|
|||
|
|
status: str = Field(..., description="服务状态")
|
|||
|
|
startup_time: datetime = Field(..., description="启动时间")
|
|||
|
|
output_path: str = Field(..., description="输出路径")
|
|||
|
|
xhs_service_available: bool = Field(..., description="小红书服务是否可用")
|
|||
|
|
document_service_available: bool = Field(..., description="文档服务是否可用")
|
|||
|
|
integration_stats: Dict[str, Any] = Field(default_factory=dict, description="整合统计")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
json_encoders = {
|
|||
|
|
datetime: lambda v: v.isoformat()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HealthCheckResponse(BaseModel):
|
|||
|
|
"""健康检查响应模型"""
|
|||
|
|
status: str = Field(..., description="健康状态")
|
|||
|
|
timestamp: datetime = Field(default_factory=datetime.now, description="检查时间")
|
|||
|
|
services: Dict[str, bool] = Field(default_factory=dict, description="服务状态")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
json_encoders = {
|
|||
|
|
datetime: lambda v: v.isoformat()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TaskSummaryResponse(BaseModel):
|
|||
|
|
"""任务摘要响应模型"""
|
|||
|
|
task_id: str = Field(..., description="任务ID")
|
|||
|
|
keyword: str = Field(..., description="关键词")
|
|||
|
|
success: bool = Field(..., description="是否成功")
|
|||
|
|
processing_time: float = Field(..., description="处理时间")
|
|||
|
|
created_time: datetime = Field(..., description="创建时间")
|
|||
|
|
notes_count: int = Field(default=0, description="笔记数量")
|
|||
|
|
documents_count: int = Field(default=0, description="文档数量")
|
|||
|
|
content_preview: str = Field(default="", description="内容预览")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
json_encoders = {
|
|||
|
|
datetime: lambda v: v.isoformat()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ValidationResponse(BaseModel):
|
|||
|
|
"""验证响应模型"""
|
|||
|
|
validation_results: Dict[str, bool] = Field(..., description="验证结果")
|
|||
|
|
valid_count: int = Field(..., description="有效文档数量")
|
|||
|
|
invalid_count: int = Field(..., description="无效文档数量")
|
|||
|
|
invalid_documents: List[str] = Field(default_factory=list, description="无效文档列表")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ApiResponse(BaseModel):
|
|||
|
|
"""通用API响应模型"""
|
|||
|
|
success: bool = Field(..., description="是否成功")
|
|||
|
|
message: str = Field(default="", description="响应消息")
|
|||
|
|
data: Optional[Any] = Field(default=None, description="响应数据")
|
|||
|
|
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
json_encoders = {
|
|||
|
|
datetime: lambda v: v.isoformat()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ErrorResponse(BaseModel):
|
|||
|
|
"""错误响应模型"""
|
|||
|
|
success: bool = Field(default=False, description="是否成功")
|
|||
|
|
error_code: str = Field(..., description="错误码")
|
|||
|
|
error_message: str = Field(..., description="错误信息")
|
|||
|
|
timestamp: datetime = Field(default_factory=datetime.now, description="时间戳")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
json_encoders = {
|
|||
|
|
datetime: lambda v: v.isoformat()
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class CookieConfig(BaseModel):
|
|||
|
|
"""Cookie配置模型"""
|
|||
|
|
cookie_string: str = Field(..., description="完整的Cookie字符串")
|
|||
|
|
name: Optional[str] = Field(None, description="Cookie名称/标识")
|
|||
|
|
user_info: Optional[Dict[str, Any]] = Field(None, description="用户信息")
|
|||
|
|
priority: int = Field(default=1, ge=1, le=10, description="优先级 (1-10, 数字越大优先级越高)")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"cookie_string": "a1=your_a1_value; web_session=your_session_value; webId=your_webid",
|
|||
|
|
"name": "user1",
|
|||
|
|
"user_info": {"username": "用户1", "description": "主要账号"},
|
|||
|
|
"priority": 5
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class BatchSearchRequest(BaseModel):
|
|||
|
|
"""批量搜索请求模型"""
|
|||
|
|
# Cookie配置 - 必填
|
|||
|
|
cookies: Union[
|
|||
|
|
List[CookieConfig], # 方式1: 详细的Cookie配置列表
|
|||
|
|
Dict[str, str], # 方式2: 简单的键值对
|
|||
|
|
str # 方式3: 单一Cookie字符串
|
|||
|
|
] = Field(..., description="Cookie配置信息,支持多种格式")
|
|||
|
|
|
|||
|
|
keywords: List[str] = Field(..., description="批量搜索关键词列表")
|
|||
|
|
max_notes_per_keyword: int = Field(default=20, ge=1, le=100, description="每个关键词的最大笔记数量")
|
|||
|
|
sort_type: int = Field(default=0, ge=0, le=4, description="排序类型 (0:综合排序, 1:最新, 2:最多点赞, 3:最多评论, 4:最多收藏)")
|
|||
|
|
note_type: int = Field(default=0, ge=0, le=2, description="笔记类型 (0:不限, 1:视频笔记, 2:普通笔记)")
|
|||
|
|
save_images: bool = Field(default=True, description="是否保存图片")
|
|||
|
|
image_storage_path: str = Field(default="data/images", description="图像存储路径")
|
|||
|
|
get_detailed_content: bool = Field(default=True, description="是否获取详细内容")
|
|||
|
|
output_path: str = Field(default="data/output", description="输出路径")
|
|||
|
|
cookie_rotation: bool = Field(default=True, description="是否启用Cookie轮换")
|
|||
|
|
max_retries: int = Field(default=3, ge=1, le=10, description="每个关键词的最大重试次数")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"cookies": [
|
|||
|
|
{
|
|||
|
|
"cookie_string": "a1=main_user_a1; web_session=main_session; webId=main_webid",
|
|||
|
|
"name": "main_user",
|
|||
|
|
"priority": 5,
|
|||
|
|
"user_info": {"username": "主要用户"}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cookie_string": "a1=backup_user_a1; web_session=backup_session; webId=backup_webid",
|
|||
|
|
"name": "backup_user",
|
|||
|
|
"priority": 3
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"keywords": ["北京旅游", "上海美食", "广州攻略"],
|
|||
|
|
"max_notes_per_keyword": 20,
|
|||
|
|
"cookie_rotation": True,
|
|||
|
|
"max_retries": 3,
|
|||
|
|
"save_images": True,
|
|||
|
|
"get_detailed_content": True
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
class NoteInfo(BaseModel):
|
|||
|
|
"""笔记信息模型"""
|
|||
|
|
note_id: str = Field(..., description="笔记ID")
|
|||
|
|
title: str = Field(..., description="笔记标题")
|
|||
|
|
author: str = Field(..., description="作者昵称")
|
|||
|
|
content: Optional[str] = Field(None, description="笔记内容")
|
|||
|
|
like_count: int = Field(default=0, description="点赞数")
|
|||
|
|
comment_count: int = Field(default=0, description="评论数")
|
|||
|
|
share_count: int = Field(default=0, description="分享数")
|
|||
|
|
collect_count: int = Field(default=0, description="收藏数")
|
|||
|
|
note_url: str = Field(..., description="笔记链接")
|
|||
|
|
images: List[str] = Field(default_factory=list, description="图片URL列表")
|
|||
|
|
saved_image_paths: List[str] = Field(default_factory=list, description="保存的图片本地路径")
|
|||
|
|
publish_time: Optional[str] = Field(None, description="发布时间")
|
|||
|
|
location: Optional[str] = Field(None, description="位置信息")
|
|||
|
|
tags: List[str] = Field(default_factory=list, description="标签列表")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class KeywordSearchResult(BaseModel):
|
|||
|
|
"""单个关键词搜索结果"""
|
|||
|
|
keyword: str = Field(..., description="搜索关键词")
|
|||
|
|
total_notes: int = Field(..., description="搜索到的笔记总数")
|
|||
|
|
notes: List[NoteInfo] = Field(..., description="笔记列表")
|
|||
|
|
search_time: str = Field(..., description="搜索时间")
|
|||
|
|
success: bool = Field(..., description="搜索是否成功")
|
|||
|
|
error_message: Optional[str] = Field(None, description="错误信息")
|
|||
|
|
|
|||
|
|
|
|||
|
|
class BatchSearchResponse(BaseModel):
|
|||
|
|
"""批量搜索响应模型"""
|
|||
|
|
total_keywords: int = Field(..., description="总关键词数量")
|
|||
|
|
successful_searches: int = Field(..., description="成功搜索数量")
|
|||
|
|
failed_searches: int = Field(..., description="失败搜索数量")
|
|||
|
|
total_notes: int = Field(..., description="总笔记数量")
|
|||
|
|
results: List[KeywordSearchResult] = Field(..., description="搜索结果列表")
|
|||
|
|
image_storage_path: str = Field(..., description="图像存储路径")
|
|||
|
|
output_path: str = Field(..., description="输出路径")
|
|||
|
|
processing_time: float = Field(..., description="处理时间(秒)")
|
|||
|
|
summary: Dict[str, Any] = Field(..., description="结果摘要")
|
|||
|
|
|
|||
|
|
class Config:
|
|||
|
|
schema_extra = {
|
|||
|
|
"example": {
|
|||
|
|
"total_keywords": 3,
|
|||
|
|
"successful_searches": 2,
|
|||
|
|
"failed_searches": 1,
|
|||
|
|
"total_notes": 35,
|
|||
|
|
"results": [
|
|||
|
|
{
|
|||
|
|
"keyword": "北京旅游",
|
|||
|
|
"total_notes": 20,
|
|||
|
|
"notes": [
|
|||
|
|
{
|
|||
|
|
"note_id": "64a1b2c3d4e5f6g7",
|
|||
|
|
"title": "北京三日游攻略",
|
|||
|
|
"author": "旅游达人",
|
|||
|
|
"content": "详细的北京旅游攻略...",
|
|||
|
|
"like_count": 150,
|
|||
|
|
"comment_count": 30,
|
|||
|
|
"note_url": "https://www.xiaohongshu.com/discovery/item/64a1b2c3d4e5f6g7",
|
|||
|
|
"images": ["https://sns-img-hw.xhscdn.com/example1.jpg"],
|
|||
|
|
"saved_image_paths": ["data/images/beijing_travel_1.jpg"]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"search_time": "2024-01-01 12:00:00",
|
|||
|
|
"success": True
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"image_storage_path": "data/images",
|
|||
|
|
"output_path": "data/output",
|
|||
|
|
"processing_time": 45.5,
|
|||
|
|
"summary": {
|
|||
|
|
"top_authors": ["旅游达人", "美食博主"],
|
|||
|
|
"total_interactions": 5000,
|
|||
|
|
"saved_images": 25
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|