307 lines
11 KiB
Python
307 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
文档处理API模型定义
|
|
"""
|
|
|
|
from typing import List, Dict, Any, Optional
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class DocumentProcessRequest(BaseModel):
|
|
"""文档处理请求模型"""
|
|
file_path: str = Field(..., description="文档文件路径")
|
|
attraction_format: str = Field("standard", description="景区转换格式")
|
|
product_format: str = Field("standard", description="产品转换格式")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"file_path": "/path/to/document.pdf",
|
|
"attraction_format": "standard",
|
|
"product_format": "standard"
|
|
}
|
|
}
|
|
|
|
|
|
class BatchProcessRequest(BaseModel):
|
|
"""批量文档处理请求模型"""
|
|
file_paths: List[str] = Field(..., description="文档文件路径列表")
|
|
attraction_format: str = Field("standard", description="景区转换格式")
|
|
product_format: str = Field("standard", description="产品转换格式")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"file_paths": [
|
|
"/path/to/document1.pdf",
|
|
"/path/to/document2.docx"
|
|
],
|
|
"attraction_format": "standard",
|
|
"product_format": "standard"
|
|
}
|
|
}
|
|
|
|
|
|
class TextExtractionRequest(BaseModel):
|
|
"""文本提取请求模型"""
|
|
file_path: str = Field(..., description="文档文件路径")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"file_path": "/path/to/document.pdf"
|
|
}
|
|
}
|
|
|
|
|
|
class TextParsingRequest(BaseModel):
|
|
"""文本解析请求模型"""
|
|
text: str = Field(..., description="文本内容")
|
|
metadata: Optional[Dict[str, Any]] = Field(None, description="元数据")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"text": "这是一个关于某景区的介绍文档...",
|
|
"metadata": {
|
|
"title": "景区介绍",
|
|
"author": "作者"
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
class DocumentTransformRequest(BaseModel):
|
|
"""文档转换请求模型"""
|
|
parsed_document: Dict[str, Any] = Field(..., description="解析后的文档数据")
|
|
attraction_format: str = Field("standard", description="景区转换格式")
|
|
product_format: str = Field("standard", description="产品转换格式")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"parsed_document": {
|
|
"title": "景区介绍",
|
|
"sections": [],
|
|
"attractions": [],
|
|
"products": [],
|
|
"metadata": {}
|
|
},
|
|
"attraction_format": "standard",
|
|
"product_format": "standard"
|
|
}
|
|
}
|
|
|
|
|
|
class DocumentProcessResponse(BaseModel):
|
|
"""文档处理响应模型"""
|
|
request_id: str = Field(..., description="请求ID")
|
|
success: bool = Field(..., description="处理是否成功")
|
|
source_file: Optional[Dict[str, Any]] = Field(None, description="源文件信息")
|
|
extraction_result: Optional[Dict[str, Any]] = Field(None, description="文本提取结果")
|
|
parsing_result: Optional[Dict[str, Any]] = Field(None, description="文档解析结果")
|
|
transformation_result: Optional[Dict[str, Any]] = Field(None, description="内容转换结果")
|
|
processing_time: Optional[str] = Field(None, description="处理时间")
|
|
error: Optional[str] = Field(None, description="错误信息")
|
|
stage: Optional[str] = Field(None, description="失败阶段")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"request_id": "document-20240715-123456-a1b2c3d4",
|
|
"success": True,
|
|
"source_file": {
|
|
"path": "/path/to/document.pdf",
|
|
"name": "document.pdf",
|
|
"format": ".pdf"
|
|
},
|
|
"extraction_result": {
|
|
"text_length": 1500,
|
|
"metadata": {
|
|
"total_pages": 5,
|
|
"title": "景区介绍"
|
|
}
|
|
},
|
|
"parsing_result": {
|
|
"title": "某景区介绍",
|
|
"sections_count": 3,
|
|
"attractions_found": 2,
|
|
"products_found": 1
|
|
},
|
|
"transformation_result": {
|
|
"request_id": "document-20240715-123456-a1b2c3d4",
|
|
"transformed_attractions": [],
|
|
"transformed_products": [],
|
|
"document_summary": {}
|
|
},
|
|
"processing_time": "2024-07-15 12:34:56"
|
|
}
|
|
}
|
|
|
|
|
|
class BatchProcessResponse(BaseModel):
|
|
"""批量处理响应模型"""
|
|
batch_request_id: str = Field(..., description="批次请求ID")
|
|
total_files: int = Field(..., description="总文件数")
|
|
successful_count: int = Field(..., description="成功处理数")
|
|
failed_count: int = Field(..., description="失败处理数")
|
|
results: List[Dict[str, Any]] = Field(..., description="处理结果列表")
|
|
processing_time: str = Field(..., description="处理时间")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"batch_request_id": "document-20240715-123456-a1b2c3d4",
|
|
"total_files": 2,
|
|
"successful_count": 1,
|
|
"failed_count": 1,
|
|
"results": [
|
|
{
|
|
"request_id": "document-20240715-123457-b2c3d4e5",
|
|
"success": True,
|
|
"source_file": {
|
|
"path": "/path/to/document1.pdf"
|
|
}
|
|
},
|
|
{
|
|
"request_id": "document-20240715-123458-c3d4e5f6",
|
|
"success": False,
|
|
"error": "不支持的文件格式"
|
|
}
|
|
],
|
|
"processing_time": "2024-07-15 12:34:56"
|
|
}
|
|
}
|
|
|
|
|
|
class TextExtractionResponse(BaseModel):
|
|
"""文本提取响应模型"""
|
|
request_id: str = Field(..., description="请求ID")
|
|
success: bool = Field(..., description="提取是否成功")
|
|
text: Optional[str] = Field(None, description="提取的文本内容")
|
|
metadata: Dict[str, Any] = Field(..., description="文档元数据")
|
|
source_file: Dict[str, Any] = Field(..., description="源文件信息")
|
|
error: Optional[str] = Field(None, description="错误信息")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"request_id": "document-20240715-123456-a1b2c3d4",
|
|
"success": True,
|
|
"text": "这是从PDF文档中提取的文本内容...",
|
|
"metadata": {
|
|
"total_pages": 5,
|
|
"title": "景区介绍",
|
|
"author": "作者"
|
|
},
|
|
"source_file": {
|
|
"path": "/path/to/document.pdf",
|
|
"name": "document.pdf",
|
|
"format": ".pdf"
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
class TextParsingResponse(BaseModel):
|
|
"""文本解析响应模型"""
|
|
request_id: str = Field(..., description="请求ID")
|
|
success: bool = Field(..., description="解析是否成功")
|
|
parsed_document: Optional[Dict[str, Any]] = Field(None, description="解析后的文档")
|
|
statistics: Optional[Dict[str, Any]] = Field(None, description="解析统计信息")
|
|
error: Optional[str] = Field(None, description="错误信息")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"request_id": "document-20240715-123456-a1b2c3d4",
|
|
"success": True,
|
|
"parsed_document": {
|
|
"title": "某景区介绍",
|
|
"sections": [],
|
|
"attractions": [],
|
|
"products": [],
|
|
"metadata": {}
|
|
},
|
|
"statistics": {
|
|
"title": "某景区介绍",
|
|
"sections_count": 3,
|
|
"attractions_found": 2,
|
|
"products_found": 1
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
class DocumentTransformResponse(BaseModel):
|
|
"""文档转换响应模型"""
|
|
request_id: str = Field(..., description="请求ID")
|
|
success: bool = Field(..., description="转换是否成功")
|
|
transformation_result: Optional[Dict[str, Any]] = Field(None, description="转换结果")
|
|
error: Optional[str] = Field(None, description="错误信息")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"request_id": "document-20240715-123456-a1b2c3d4",
|
|
"success": True,
|
|
"transformation_result": {
|
|
"request_id": "document-20240715-123456-a1b2c3d4",
|
|
"transformed_attractions": [],
|
|
"transformed_products": [],
|
|
"document_summary": {}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
class SupportedFormatsResponse(BaseModel):
|
|
"""支持格式响应模型"""
|
|
supported_file_formats: Dict[str, str] = Field(..., description="支持的文件格式")
|
|
transformation_formats: Dict[str, Dict[str, Any]] = Field(..., description="转换格式")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"supported_file_formats": {
|
|
".pdf": "PDF文档",
|
|
".docx": "Word文档",
|
|
".txt": "纯文本文件"
|
|
},
|
|
"transformation_formats": {
|
|
"attraction_formats": {
|
|
"standard": {
|
|
"name": "景区标准格式",
|
|
"description": "包含基本信息、特色介绍、游玩指南等标准化景区资料"
|
|
}
|
|
},
|
|
"product_formats": {
|
|
"standard": {
|
|
"name": "产品标准格式",
|
|
"description": "包含产品基本信息、特色、价格等标准化产品资料"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
class ProcessingStatisticsResponse(BaseModel):
|
|
"""处理统计响应模型"""
|
|
supported_formats: int = Field(..., description="支持的格式数量")
|
|
available_transformation_formats: Dict[str, int] = Field(..., description="可用转换格式数量")
|
|
service_status: str = Field(..., description="服务状态")
|
|
|
|
class Config:
|
|
schema_extra = {
|
|
"example": {
|
|
"supported_formats": 8,
|
|
"available_transformation_formats": {
|
|
"attraction_formats": 3,
|
|
"product_formats": 3
|
|
},
|
|
"service_status": "active"
|
|
}
|
|
} |