307 lines
11 KiB
Python
Raw Normal View History

2025-07-15 10:59:36 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
文档处理API模型定义
"""
from typing import List, Dict, Any, Optional
from pydantic import BaseModel, Field
class DocumentProcessRequest(BaseModel):
"""文档处理请求模型"""
file_path: str = Field(..., description="文档文件路径")
attraction_format: str = Field("standard", description="景区转换格式")
product_format: str = Field("standard", description="产品转换格式")
class Config:
schema_extra = {
"example": {
"file_path": "/path/to/document.pdf",
"attraction_format": "standard",
"product_format": "standard"
}
}
class BatchProcessRequest(BaseModel):
"""批量文档处理请求模型"""
file_paths: List[str] = Field(..., description="文档文件路径列表")
attraction_format: str = Field("standard", description="景区转换格式")
product_format: str = Field("standard", description="产品转换格式")
class Config:
schema_extra = {
"example": {
"file_paths": [
"/path/to/document1.pdf",
"/path/to/document2.docx"
],
"attraction_format": "standard",
"product_format": "standard"
}
}
class TextExtractionRequest(BaseModel):
"""文本提取请求模型"""
file_path: str = Field(..., description="文档文件路径")
class Config:
schema_extra = {
"example": {
"file_path": "/path/to/document.pdf"
}
}
class TextParsingRequest(BaseModel):
"""文本解析请求模型"""
text: str = Field(..., description="文本内容")
metadata: Optional[Dict[str, Any]] = Field(None, description="元数据")
class Config:
schema_extra = {
"example": {
"text": "这是一个关于某景区的介绍文档...",
"metadata": {
"title": "景区介绍",
"author": "作者"
}
}
}
class DocumentTransformRequest(BaseModel):
"""文档转换请求模型"""
parsed_document: Dict[str, Any] = Field(..., description="解析后的文档数据")
attraction_format: str = Field("standard", description="景区转换格式")
product_format: str = Field("standard", description="产品转换格式")
class Config:
schema_extra = {
"example": {
"parsed_document": {
"title": "景区介绍",
"sections": [],
"attractions": [],
"products": [],
"metadata": {}
},
"attraction_format": "standard",
"product_format": "standard"
}
}
class DocumentProcessResponse(BaseModel):
"""文档处理响应模型"""
request_id: str = Field(..., description="请求ID")
success: bool = Field(..., description="处理是否成功")
source_file: Optional[Dict[str, Any]] = Field(None, description="源文件信息")
extraction_result: Optional[Dict[str, Any]] = Field(None, description="文本提取结果")
parsing_result: Optional[Dict[str, Any]] = Field(None, description="文档解析结果")
transformation_result: Optional[Dict[str, Any]] = Field(None, description="内容转换结果")
processing_time: Optional[str] = Field(None, description="处理时间")
error: Optional[str] = Field(None, description="错误信息")
stage: Optional[str] = Field(None, description="失败阶段")
class Config:
schema_extra = {
"example": {
"request_id": "document-20240715-123456-a1b2c3d4",
"success": True,
"source_file": {
"path": "/path/to/document.pdf",
"name": "document.pdf",
"format": ".pdf"
},
"extraction_result": {
"text_length": 1500,
"metadata": {
"total_pages": 5,
"title": "景区介绍"
}
},
"parsing_result": {
"title": "某景区介绍",
"sections_count": 3,
"attractions_found": 2,
"products_found": 1
},
"transformation_result": {
"request_id": "document-20240715-123456-a1b2c3d4",
"transformed_attractions": [],
"transformed_products": [],
"document_summary": {}
},
"processing_time": "2024-07-15 12:34:56"
}
}
class BatchProcessResponse(BaseModel):
"""批量处理响应模型"""
batch_request_id: str = Field(..., description="批次请求ID")
total_files: int = Field(..., description="总文件数")
successful_count: int = Field(..., description="成功处理数")
failed_count: int = Field(..., description="失败处理数")
results: List[Dict[str, Any]] = Field(..., description="处理结果列表")
processing_time: str = Field(..., description="处理时间")
class Config:
schema_extra = {
"example": {
"batch_request_id": "document-20240715-123456-a1b2c3d4",
"total_files": 2,
"successful_count": 1,
"failed_count": 1,
"results": [
{
"request_id": "document-20240715-123457-b2c3d4e5",
"success": True,
"source_file": {
"path": "/path/to/document1.pdf"
}
},
{
"request_id": "document-20240715-123458-c3d4e5f6",
"success": False,
"error": "不支持的文件格式"
}
],
"processing_time": "2024-07-15 12:34:56"
}
}
class TextExtractionResponse(BaseModel):
"""文本提取响应模型"""
request_id: str = Field(..., description="请求ID")
success: bool = Field(..., description="提取是否成功")
text: Optional[str] = Field(None, description="提取的文本内容")
metadata: Dict[str, Any] = Field(..., description="文档元数据")
source_file: Dict[str, Any] = Field(..., description="源文件信息")
error: Optional[str] = Field(None, description="错误信息")
class Config:
schema_extra = {
"example": {
"request_id": "document-20240715-123456-a1b2c3d4",
"success": True,
"text": "这是从PDF文档中提取的文本内容...",
"metadata": {
"total_pages": 5,
"title": "景区介绍",
"author": "作者"
},
"source_file": {
"path": "/path/to/document.pdf",
"name": "document.pdf",
"format": ".pdf"
}
}
}
class TextParsingResponse(BaseModel):
"""文本解析响应模型"""
request_id: str = Field(..., description="请求ID")
success: bool = Field(..., description="解析是否成功")
parsed_document: Optional[Dict[str, Any]] = Field(None, description="解析后的文档")
statistics: Optional[Dict[str, Any]] = Field(None, description="解析统计信息")
error: Optional[str] = Field(None, description="错误信息")
class Config:
schema_extra = {
"example": {
"request_id": "document-20240715-123456-a1b2c3d4",
"success": True,
"parsed_document": {
"title": "某景区介绍",
"sections": [],
"attractions": [],
"products": [],
"metadata": {}
},
"statistics": {
"title": "某景区介绍",
"sections_count": 3,
"attractions_found": 2,
"products_found": 1
}
}
}
class DocumentTransformResponse(BaseModel):
"""文档转换响应模型"""
request_id: str = Field(..., description="请求ID")
success: bool = Field(..., description="转换是否成功")
transformation_result: Optional[Dict[str, Any]] = Field(None, description="转换结果")
error: Optional[str] = Field(None, description="错误信息")
class Config:
schema_extra = {
"example": {
"request_id": "document-20240715-123456-a1b2c3d4",
"success": True,
"transformation_result": {
"request_id": "document-20240715-123456-a1b2c3d4",
"transformed_attractions": [],
"transformed_products": [],
"document_summary": {}
}
}
}
class SupportedFormatsResponse(BaseModel):
"""支持格式响应模型"""
supported_file_formats: Dict[str, str] = Field(..., description="支持的文件格式")
transformation_formats: Dict[str, Dict[str, Any]] = Field(..., description="转换格式")
class Config:
schema_extra = {
"example": {
"supported_file_formats": {
".pdf": "PDF文档",
".docx": "Word文档",
".txt": "纯文本文件"
},
"transformation_formats": {
"attraction_formats": {
"standard": {
"name": "景区标准格式",
"description": "包含基本信息、特色介绍、游玩指南等标准化景区资料"
}
},
"product_formats": {
"standard": {
"name": "产品标准格式",
"description": "包含产品基本信息、特色、价格等标准化产品资料"
}
}
}
}
}
class ProcessingStatisticsResponse(BaseModel):
"""处理统计响应模型"""
supported_formats: int = Field(..., description="支持的格式数量")
available_transformation_formats: Dict[str, int] = Field(..., description="可用转换格式数量")
service_status: str = Field(..., description="服务状态")
class Config:
schema_extra = {
"example": {
"supported_formats": 8,
"available_transformation_formats": {
"attraction_formats": 3,
"product_formats": 3
},
"service_status": "active"
}
}