#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 文档处理API模型定义 """ from typing import List, Dict, Any, Optional from pydantic import BaseModel, Field class DocumentProcessRequest(BaseModel): """文档处理请求模型""" file_path: str = Field(..., description="文档文件路径") attraction_format: str = Field("standard", description="景区转换格式") product_format: str = Field("standard", description="产品转换格式") class Config: schema_extra = { "example": { "file_path": "/path/to/document.pdf", "attraction_format": "standard", "product_format": "standard" } } class BatchProcessRequest(BaseModel): """批量文档处理请求模型""" file_paths: List[str] = Field(..., description="文档文件路径列表") attraction_format: str = Field("standard", description="景区转换格式") product_format: str = Field("standard", description="产品转换格式") class Config: schema_extra = { "example": { "file_paths": [ "/path/to/document1.pdf", "/path/to/document2.docx" ], "attraction_format": "standard", "product_format": "standard" } } class TextExtractionRequest(BaseModel): """文本提取请求模型""" file_path: str = Field(..., description="文档文件路径") class Config: schema_extra = { "example": { "file_path": "/path/to/document.pdf" } } class TextParsingRequest(BaseModel): """文本解析请求模型""" text: str = Field(..., description="文本内容") metadata: Optional[Dict[str, Any]] = Field(None, description="元数据") class Config: schema_extra = { "example": { "text": "这是一个关于某景区的介绍文档...", "metadata": { "title": "景区介绍", "author": "作者" } } } class DocumentTransformRequest(BaseModel): """文档转换请求模型""" parsed_document: Dict[str, Any] = Field(..., description="解析后的文档数据") attraction_format: str = Field("standard", description="景区转换格式") product_format: str = Field("standard", description="产品转换格式") class Config: schema_extra = { "example": { "parsed_document": { "title": "景区介绍", "sections": [], "attractions": [], "products": [], "metadata": {} }, "attraction_format": "standard", "product_format": "standard" } } class DocumentProcessResponse(BaseModel): """文档处理响应模型""" request_id: str = Field(..., description="请求ID") success: bool = Field(..., description="处理是否成功") source_file: Optional[Dict[str, Any]] = Field(None, description="源文件信息") extraction_result: Optional[Dict[str, Any]] = Field(None, description="文本提取结果") parsing_result: Optional[Dict[str, Any]] = Field(None, description="文档解析结果") transformation_result: Optional[Dict[str, Any]] = Field(None, description="内容转换结果") processing_time: Optional[str] = Field(None, description="处理时间") error: Optional[str] = Field(None, description="错误信息") stage: Optional[str] = Field(None, description="失败阶段") class Config: schema_extra = { "example": { "request_id": "document-20240715-123456-a1b2c3d4", "success": True, "source_file": { "path": "/path/to/document.pdf", "name": "document.pdf", "format": ".pdf" }, "extraction_result": { "text_length": 1500, "metadata": { "total_pages": 5, "title": "景区介绍" } }, "parsing_result": { "title": "某景区介绍", "sections_count": 3, "attractions_found": 2, "products_found": 1 }, "transformation_result": { "request_id": "document-20240715-123456-a1b2c3d4", "transformed_attractions": [], "transformed_products": [], "document_summary": {} }, "processing_time": "2024-07-15 12:34:56" } } class BatchProcessResponse(BaseModel): """批量处理响应模型""" batch_request_id: str = Field(..., description="批次请求ID") total_files: int = Field(..., description="总文件数") successful_count: int = Field(..., description="成功处理数") failed_count: int = Field(..., description="失败处理数") results: List[Dict[str, Any]] = Field(..., description="处理结果列表") processing_time: str = Field(..., description="处理时间") class Config: schema_extra = { "example": { "batch_request_id": "document-20240715-123456-a1b2c3d4", "total_files": 2, "successful_count": 1, "failed_count": 1, "results": [ { "request_id": "document-20240715-123457-b2c3d4e5", "success": True, "source_file": { "path": "/path/to/document1.pdf" } }, { "request_id": "document-20240715-123458-c3d4e5f6", "success": False, "error": "不支持的文件格式" } ], "processing_time": "2024-07-15 12:34:56" } } class TextExtractionResponse(BaseModel): """文本提取响应模型""" request_id: str = Field(..., description="请求ID") success: bool = Field(..., description="提取是否成功") text: Optional[str] = Field(None, description="提取的文本内容") metadata: Dict[str, Any] = Field(..., description="文档元数据") source_file: Dict[str, Any] = Field(..., description="源文件信息") error: Optional[str] = Field(None, description="错误信息") class Config: schema_extra = { "example": { "request_id": "document-20240715-123456-a1b2c3d4", "success": True, "text": "这是从PDF文档中提取的文本内容...", "metadata": { "total_pages": 5, "title": "景区介绍", "author": "作者" }, "source_file": { "path": "/path/to/document.pdf", "name": "document.pdf", "format": ".pdf" } } } class TextParsingResponse(BaseModel): """文本解析响应模型""" request_id: str = Field(..., description="请求ID") success: bool = Field(..., description="解析是否成功") parsed_document: Optional[Dict[str, Any]] = Field(None, description="解析后的文档") statistics: Optional[Dict[str, Any]] = Field(None, description="解析统计信息") error: Optional[str] = Field(None, description="错误信息") class Config: schema_extra = { "example": { "request_id": "document-20240715-123456-a1b2c3d4", "success": True, "parsed_document": { "title": "某景区介绍", "sections": [], "attractions": [], "products": [], "metadata": {} }, "statistics": { "title": "某景区介绍", "sections_count": 3, "attractions_found": 2, "products_found": 1 } } } class DocumentTransformResponse(BaseModel): """文档转换响应模型""" request_id: str = Field(..., description="请求ID") success: bool = Field(..., description="转换是否成功") transformation_result: Optional[Dict[str, Any]] = Field(None, description="转换结果") error: Optional[str] = Field(None, description="错误信息") class Config: schema_extra = { "example": { "request_id": "document-20240715-123456-a1b2c3d4", "success": True, "transformation_result": { "request_id": "document-20240715-123456-a1b2c3d4", "transformed_attractions": [], "transformed_products": [], "document_summary": {} } } } class SupportedFormatsResponse(BaseModel): """支持格式响应模型""" supported_file_formats: Dict[str, str] = Field(..., description="支持的文件格式") transformation_formats: Dict[str, Dict[str, Any]] = Field(..., description="转换格式") class Config: schema_extra = { "example": { "supported_file_formats": { ".pdf": "PDF文档", ".docx": "Word文档", ".txt": "纯文本文件" }, "transformation_formats": { "attraction_formats": { "standard": { "name": "景区标准格式", "description": "包含基本信息、特色介绍、游玩指南等标准化景区资料" } }, "product_formats": { "standard": { "name": "产品标准格式", "description": "包含产品基本信息、特色、价格等标准化产品资料" } } } } } class ProcessingStatisticsResponse(BaseModel): """处理统计响应模型""" supported_formats: int = Field(..., description="支持的格式数量") available_transformation_formats: Dict[str, int] = Field(..., description="可用转换格式数量") service_status: str = Field(..., description="服务状态") class Config: schema_extra = { "example": { "supported_formats": 8, "available_transformation_formats": { "attraction_formats": 3, "product_formats": 3 }, "service_status": "active" } }