TravelContentCreator/api/models/content_integration.py

191 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容整合API模型
"""
from typing import List, Optional, Dict, Any, Union
from pydantic import BaseModel, Field, field_validator, model_validator
from typing import Dict
class Base64Document(BaseModel):
"""Base64编码的文档模型"""
filename: str = Field(..., description="文件名")
content: str = Field(..., description="Base64编码的文件内容")
mime_type: str = Field(..., description="文件MIME类型")
class ContentIntegrationRequest(BaseModel):
"""内容整合请求模型"""
documents: Optional[List[Base64Document]] = Field(default=None, description="Base64编码的文档列表")
keywords: Optional[List[str]] = Field(default=None, description="搜索关键词列表")
cookies: Optional[str] = Field(default=None, description="小红书Cookie字符串")
# 小红书搜索配置
sort_type: Optional[int] = Field(default=2, ge=0, le=4, description="排序方式: 0综合排序, 1最新, 2最多点赞, 3最多评论, 4最多收藏")
note_type: Optional[int] = Field(default=2, ge=0, le=2, description="笔记类型: 0不限, 1视频笔记, 2普通笔记")
note_time: Optional[int] = Field(default=0, ge=0, le=3, description="笔记时间: 0不限, 1一天内, 2一周内, 3半年内")
note_range: Optional[int] = Field(default=0, ge=0, le=3, description="笔记范围: 0不限, 1已看过, 2未看过, 3已关注")
pos_distance: Optional[int] = Field(default=0, ge=0, le=2, description="位置距离: 0不限, 1同城, 2附近")
query_num: Optional[int] = Field(default=10, ge=1, le=50, description="每个关键词搜索的笔记数量")
# 文档校验
@field_validator('documents')
def validate_documents(cls, v):
if v is not None and not v:
raise ValueError("如果提供文档,列表不能为空")
return v
# 关键词校验
@field_validator('keywords')
def validate_keywords(cls, v):
if v is not None:
if not v:
raise ValueError("如果提供关键词,列表不能为空")
cleaned = list({k.strip() for k in v if k.strip()})
if not cleaned:
raise ValueError("关键词列表不能全为空")
return cleaned
return v
# Cookie校验
@field_validator('cookies')
def validate_cookies(cls, v):
if v is not None:
if not v.strip():
raise ValueError("如果提供Cookie不能为空")
return v.strip()
return v
# 跨字段校验
@model_validator(mode='after')
def validate_request(cls, values):
has_documents = values.documents is not None
has_keywords = values.keywords is not None
has_cookies = values.cookies is not None
if not has_documents and not (has_keywords and has_cookies):
raise ValueError("必须提供文档或(关键词和Cookie)中的至少一项")
if has_keywords and not has_cookies:
raise ValueError("提供关键词时必须提供Cookie")
if has_cookies and not has_keywords:
raise ValueError("提供Cookie时必须提供关键词")
return values
class Config:
json_schema_extra = {
"example": {
"documents": [
{
"filename": "travel_guide.pdf",
"content": "base64_encoded_content_here",
"mime_type": "application/pdf"
}
],
"keywords": ["北京旅游", "故宫攻略", "长城一日游"],
"cookies": "a1=your_cookie_value; web_session=your_session_value",
"sort_type": 2,
"note_type": 2,
"note_time": 0,
"note_range": 0,
"pos_distance": 0,
"query_num": 10
}
}
class DocumentInfo(BaseModel):
"""文档信息模型"""
file_path: str = Field(..., description="文件路径")
file_type: str = Field(..., description="文件类型")
content_length: int = Field(..., description="内容长度")
class XHSInfo(BaseModel):
"""小红书信息模型"""
total_notes: int = Field(..., description="笔记总数")
authors: List[str] = Field(..., description="作者列表")
total_interactions: int = Field(..., description="总互动数")
class SearchConfig(BaseModel):
"""搜索配置模型"""
sort_type: int = Field(..., description="排序方式")
note_type: int = Field(..., description="笔记类型")
note_time: int = Field(..., description="笔记时间")
note_range: int = Field(..., description="笔记范围")
pos_distance: int = Field(..., description="位置距离")
query_num: int = Field(..., description="每个关键词搜索的笔记数量")
class InputSummary(BaseModel):
"""输入摘要模型"""
document_count: int = Field(..., description="文档数量")
xhs_notes_count: int = Field(..., description="小红书笔记数量")
keywords: List[str] = Field(..., description="关键词列表")
class DocumentInfoDetail(BaseModel):
"""详细文档信息模型"""
documents: List[DocumentInfo] = Field(..., description="文档列表")
integrated_text_length: int = Field(..., description="整合文本长度")
class ContentIntegrationResponse(BaseModel):
"""内容整合响应模型"""
success: bool = Field(..., description="是否成功")
timestamp: str = Field(..., description="时间戳")
processing_time: str = Field(..., description="处理时间")
# 成功时的字段
input_summary: Optional[InputSummary] = Field(None, description="输入摘要")
document_info: Optional[DocumentInfoDetail] = Field(None, description="文档信息")
xhs_info: Optional[XHSInfo] = Field(None, description="小红书信息")
integrated_content: Optional[str] = Field(None, description="整合后的内容")
search_config: Optional[SearchConfig] = Field(None, description="搜索配置")
# 失败时的字段
error_message: Optional[str] = Field(None, description="错误信息")
class Config:
schema_extra = {
"example": {
"success": True,
"timestamp": "20250715_143022",
"processing_time": "45.32秒",
"input_summary": {
"document_count": 2,
"xhs_notes_count": 25,
"keywords": ["北京旅游", "故宫攻略"]
},
"document_info": {
"documents": [
{
"file_path": "uploads/travel_guide.pdf",
"file_type": "pdf",
"content_length": 5420
}
],
"integrated_text_length": 8650
},
"xhs_info": {
"total_notes": 25,
"authors": ["旅游达人小王", "北京导游张三"],
"total_interactions": 15420
},
"integrated_content": "# 北京旅游全攻略...",
"search_config": {
"sort_type": 2,
"note_type": 2,
"note_time": 0,
"note_range": 0,
"pos_distance": 0,
"query_num": 10
}
}
}