160 lines
6.3 KiB
Python
160 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
内容整合API模型
|
|
"""
|
|
|
|
from typing import List, Optional, Dict, Any, Union
|
|
from pydantic import BaseModel, Field, validator
|
|
|
|
|
|
class ContentIntegrationRequest(BaseModel):
|
|
"""内容整合请求模型"""
|
|
document_paths: Optional[List[str]] = Field(default=None, description="文档文件路径列表(可选,纯搜索模式时可为空)")
|
|
keywords: List[str] = Field(..., description="搜索关键词列表", min_length=1)
|
|
cookies: str = Field(..., description="小红书Cookie字符串")
|
|
|
|
# 小红书搜索配置
|
|
sort_type: int = Field(default=2, ge=0, le=4, description="排序方式: 0综合排序, 1最新, 2最多点赞, 3最多评论, 4最多收藏")
|
|
note_type: int = Field(default=2, ge=0, le=2, description="笔记类型: 0不限, 1视频笔记, 2普通笔记")
|
|
note_time: int = Field(default=0, ge=0, le=3, description="笔记时间: 0不限, 1一天内, 2一周内, 3半年内")
|
|
note_range: int = Field(default=0, ge=0, le=3, description="笔记范围: 0不限, 1已看过, 2未看过, 3已关注")
|
|
pos_distance: int = Field(default=0, ge=0, le=2, description="位置距离: 0不限, 1同城, 2附近")
|
|
query_num: int = Field(default=10, ge=1, le=50, description="每个关键词搜索的笔记数量")
|
|
|
|
# 输出配置
|
|
output_path: str = Field(default="data/output", description="输出目录路径")
|
|
|
|
@validator('document_paths')
|
|
def validate_document_paths(cls, v):
|
|
if v is not None and not v:
|
|
raise ValueError("如果提供文档路径,列表不能为空")
|
|
return v
|
|
|
|
@validator('keywords')
|
|
def validate_keywords(cls, v):
|
|
if not v:
|
|
raise ValueError("关键词列表不能为空")
|
|
# 去除空字符串和重复关键词
|
|
cleaned = list(set(k.strip() for k in v if k.strip()))
|
|
if not cleaned:
|
|
raise ValueError("关键词列表不能全为空")
|
|
return cleaned
|
|
|
|
@validator('cookies')
|
|
def validate_cookies(cls, v):
|
|
if not v or not v.strip():
|
|
raise ValueError("Cookie不能为空")
|
|
return v.strip()
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"document_paths": [
|
|
"uploads/travel_guide.pdf",
|
|
"uploads/attraction_info.docx"
|
|
],
|
|
"keywords": ["北京旅游", "故宫攻略", "长城一日游"],
|
|
"cookies": "a1=your_cookie_value; web_session=your_session_value",
|
|
"sort_type": 2,
|
|
"note_type": 2,
|
|
"note_time": 0,
|
|
"note_range": 0,
|
|
"pos_distance": 0,
|
|
"query_num": 10,
|
|
"output_path": "data/output"
|
|
}
|
|
}
|
|
|
|
|
|
class DocumentInfo(BaseModel):
|
|
"""文档信息模型"""
|
|
file_path: str = Field(..., description="文件路径")
|
|
file_type: str = Field(..., description="文件类型")
|
|
content_length: int = Field(..., description="内容长度")
|
|
|
|
|
|
class XHSInfo(BaseModel):
|
|
"""小红书信息模型"""
|
|
total_notes: int = Field(..., description="笔记总数")
|
|
authors: List[str] = Field(..., description="作者列表")
|
|
total_interactions: int = Field(..., description="总互动数")
|
|
|
|
|
|
class SearchConfig(BaseModel):
|
|
"""搜索配置模型"""
|
|
sort_type: int = Field(..., description="排序方式")
|
|
note_type: int = Field(..., description="笔记类型")
|
|
note_time: int = Field(..., description="笔记时间")
|
|
note_range: int = Field(..., description="笔记范围")
|
|
pos_distance: int = Field(..., description="位置距离")
|
|
query_num: int = Field(..., description="每个关键词搜索的笔记数量")
|
|
|
|
|
|
class InputSummary(BaseModel):
|
|
"""输入摘要模型"""
|
|
document_count: int = Field(..., description="文档数量")
|
|
xhs_notes_count: int = Field(..., description="小红书笔记数量")
|
|
keywords: List[str] = Field(..., description="关键词列表")
|
|
|
|
|
|
class DocumentInfoDetail(BaseModel):
|
|
"""详细文档信息模型"""
|
|
documents: List[DocumentInfo] = Field(..., description="文档列表")
|
|
integrated_text_length: int = Field(..., description="整合文本长度")
|
|
|
|
|
|
class ContentIntegrationResponse(BaseModel):
|
|
"""内容整合响应模型"""
|
|
success: bool = Field(..., description="是否成功")
|
|
timestamp: str = Field(..., description="时间戳")
|
|
processing_time: str = Field(..., description="处理时间")
|
|
|
|
# 成功时的字段
|
|
input_summary: Optional[InputSummary] = Field(None, description="输入摘要")
|
|
document_info: Optional[DocumentInfoDetail] = Field(None, description="文档信息")
|
|
xhs_info: Optional[XHSInfo] = Field(None, description="小红书信息")
|
|
integrated_content: Optional[str] = Field(None, description="整合后的内容")
|
|
search_config: Optional[SearchConfig] = Field(None, description="搜索配置")
|
|
|
|
# 失败时的字段
|
|
error_message: Optional[str] = Field(None, description="错误信息")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"success": True,
|
|
"timestamp": "20250715_143022",
|
|
"processing_time": "45.32秒",
|
|
"input_summary": {
|
|
"document_count": 2,
|
|
"xhs_notes_count": 25,
|
|
"keywords": ["北京旅游", "故宫攻略"]
|
|
},
|
|
"document_info": {
|
|
"documents": [
|
|
{
|
|
"file_path": "uploads/travel_guide.pdf",
|
|
"file_type": "pdf",
|
|
"content_length": 5420
|
|
}
|
|
],
|
|
"integrated_text_length": 8650
|
|
},
|
|
"xhs_info": {
|
|
"total_notes": 25,
|
|
"authors": ["旅游达人小王", "北京导游张三"],
|
|
"total_interactions": 15420
|
|
},
|
|
"integrated_content": "# 北京旅游全攻略...",
|
|
"search_config": {
|
|
"sort_type": 2,
|
|
"note_type": 2,
|
|
"note_time": 0,
|
|
"note_range": 0,
|
|
"pos_distance": 0,
|
|
"query_num": 10
|
|
}
|
|
}
|
|
} |