336 lines
11 KiB
Python
336 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
File Service
|
|
文件处理服务 - API v2
|
|
处理大文件上传、下载、流式传输等
|
|
"""
|
|
|
|
import os
|
|
import aiofiles
|
|
import hashlib
|
|
import logging
|
|
from typing import Dict, Any, Optional, AsyncGenerator, BinaryIO, List
|
|
from pathlib import Path
|
|
from fastapi import UploadFile
|
|
from fastapi.responses import StreamingResponse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class FileService:
|
|
"""文件处理服务"""
|
|
|
|
def __init__(self, upload_dir: str = "uploads", chunk_size: int = 8192):
|
|
"""
|
|
初始化文件服务
|
|
|
|
Args:
|
|
upload_dir: 上传目录
|
|
chunk_size: 文件块大小 (默认8KB)
|
|
"""
|
|
self.upload_dir = Path(upload_dir)
|
|
self.upload_dir.mkdir(exist_ok=True)
|
|
self.chunk_size = chunk_size
|
|
|
|
# 支持的文件类型和最大大小 (字节)
|
|
self.max_file_sizes = {
|
|
# 文档类型
|
|
'pdf': 50 * 1024 * 1024, # 50MB
|
|
'docx': 20 * 1024 * 1024, # 20MB
|
|
'xlsx': 20 * 1024 * 1024, # 20MB
|
|
'txt': 10 * 1024 * 1024, # 10MB
|
|
'csv': 10 * 1024 * 1024, # 10MB
|
|
'json': 10 * 1024 * 1024, # 10MB
|
|
'xml': 10 * 1024 * 1024, # 10MB
|
|
'html': 10 * 1024 * 1024, # 10MB
|
|
|
|
# 图片类型
|
|
'jpg': 10 * 1024 * 1024, # 10MB
|
|
'jpeg': 10 * 1024 * 1024, # 10MB
|
|
'png': 10 * 1024 * 1024, # 10MB
|
|
'gif': 5 * 1024 * 1024, # 5MB
|
|
'webp': 10 * 1024 * 1024, # 10MB
|
|
|
|
# 视频类型 (限制较大)
|
|
'mp4': 100 * 1024 * 1024, # 100MB
|
|
'avi': 100 * 1024 * 1024, # 100MB
|
|
'mov': 100 * 1024 * 1024, # 100MB
|
|
}
|
|
|
|
# 默认最大文件大小
|
|
self.default_max_size = 50 * 1024 * 1024 # 50MB
|
|
|
|
def get_file_extension(self, filename: str) -> str:
|
|
"""获取文件扩展名"""
|
|
return Path(filename).suffix.lower().lstrip('.')
|
|
|
|
def validate_file(self, file: UploadFile) -> tuple[bool, str]:
|
|
"""
|
|
验证文件
|
|
|
|
Returns:
|
|
(是否有效, 错误信息)
|
|
"""
|
|
if not file.filename:
|
|
return False, "文件名不能为空"
|
|
|
|
# 检查文件扩展名
|
|
ext = self.get_file_extension(file.filename)
|
|
if not ext:
|
|
return False, "文件必须有扩展名"
|
|
|
|
# 检查文件大小
|
|
max_size = self.max_file_sizes.get(ext, self.default_max_size)
|
|
|
|
# 注意:这里无法直接获取文件大小,需要在实际读取时检查
|
|
# FastAPI的UploadFile对象在读取前无法获取确切大小
|
|
|
|
return True, ""
|
|
|
|
async def save_upload_file(
|
|
self,
|
|
file: UploadFile,
|
|
custom_filename: str = None,
|
|
validate_size: bool = True
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
保存上传的文件(流式处理大文件)
|
|
|
|
Args:
|
|
file: 上传的文件
|
|
custom_filename: 自定义文件名
|
|
validate_size: 是否验证文件大小
|
|
|
|
Returns:
|
|
文件信息字典
|
|
"""
|
|
# 验证文件
|
|
is_valid, error_msg = self.validate_file(file)
|
|
if not is_valid:
|
|
raise ValueError(error_msg)
|
|
|
|
# 生成文件路径
|
|
filename = custom_filename or file.filename
|
|
ext = self.get_file_extension(filename)
|
|
|
|
# 生成唯一文件名(避免冲突)
|
|
file_hash = hashlib.md5(filename.encode()).hexdigest()[:8]
|
|
unique_filename = f"{file_hash}_{filename}"
|
|
file_path = self.upload_dir / unique_filename
|
|
|
|
# 流式保存文件
|
|
total_size = 0
|
|
max_size = self.max_file_sizes.get(ext, self.default_max_size)
|
|
file_hash_obj = hashlib.md5()
|
|
|
|
try:
|
|
async with aiofiles.open(file_path, 'wb') as f:
|
|
while True:
|
|
# 分块读取
|
|
chunk = await file.read(self.chunk_size)
|
|
if not chunk:
|
|
break
|
|
|
|
# 检查文件大小
|
|
total_size += len(chunk)
|
|
if validate_size and total_size > max_size:
|
|
# 删除部分写入的文件
|
|
await self._cleanup_file(file_path)
|
|
raise ValueError(f"文件大小超过限制 ({max_size / (1024*1024):.1f}MB)")
|
|
|
|
# 计算哈希
|
|
file_hash_obj.update(chunk)
|
|
|
|
# 写入文件
|
|
await f.write(chunk)
|
|
|
|
# 重置文件指针(为后续处理准备)
|
|
await file.seek(0)
|
|
|
|
file_info = {
|
|
"original_filename": file.filename,
|
|
"saved_filename": unique_filename,
|
|
"file_path": str(file_path),
|
|
"file_size": total_size,
|
|
"file_extension": ext,
|
|
"content_type": file.content_type,
|
|
"file_hash": file_hash_obj.hexdigest(),
|
|
"upload_dir": str(self.upload_dir)
|
|
}
|
|
|
|
logger.info(f"文件保存成功: {unique_filename} (大小: {total_size} 字节)")
|
|
return file_info
|
|
|
|
except Exception as e:
|
|
# 清理失败的文件
|
|
await self._cleanup_file(file_path)
|
|
logger.error(f"文件保存失败: {e}")
|
|
raise
|
|
|
|
async def _cleanup_file(self, file_path: Path):
|
|
"""清理文件"""
|
|
try:
|
|
if file_path.exists():
|
|
file_path.unlink()
|
|
except Exception as e:
|
|
logger.warning(f"清理文件失败: {e}")
|
|
|
|
async def read_file_chunks(self, file_path: str) -> AsyncGenerator[bytes, None]:
|
|
"""
|
|
流式读取文件(用于大文件下载)
|
|
|
|
Args:
|
|
file_path: 文件路径
|
|
|
|
Yields:
|
|
文件数据块
|
|
"""
|
|
try:
|
|
async with aiofiles.open(file_path, 'rb') as f:
|
|
while True:
|
|
chunk = await f.read(self.chunk_size)
|
|
if not chunk:
|
|
break
|
|
yield chunk
|
|
except Exception as e:
|
|
logger.error(f"读取文件失败: {e}")
|
|
raise
|
|
|
|
def create_streaming_response(
|
|
self,
|
|
file_path: str,
|
|
filename: str = None,
|
|
media_type: str = None
|
|
) -> StreamingResponse:
|
|
"""
|
|
创建流式响应(用于大文件下载)
|
|
|
|
Args:
|
|
file_path: 文件路径
|
|
filename: 下载文件名
|
|
media_type: 媒体类型
|
|
|
|
Returns:
|
|
StreamingResponse对象
|
|
"""
|
|
file_path_obj = Path(file_path)
|
|
|
|
if not file_path_obj.exists():
|
|
raise FileNotFoundError(f"文件不存在: {file_path}")
|
|
|
|
# 确定媒体类型
|
|
if not media_type:
|
|
ext = self.get_file_extension(file_path)
|
|
media_type = self._get_media_type(ext)
|
|
|
|
# 确定文件名
|
|
if not filename:
|
|
filename = file_path_obj.name
|
|
|
|
# 获取文件大小
|
|
file_size = file_path_obj.stat().st_size
|
|
|
|
headers = {
|
|
"Content-Disposition": f"attachment; filename={filename}",
|
|
"Content-Length": str(file_size)
|
|
}
|
|
|
|
return StreamingResponse(
|
|
self.read_file_chunks(file_path),
|
|
media_type=media_type,
|
|
headers=headers
|
|
)
|
|
|
|
def _get_media_type(self, ext: str) -> str:
|
|
"""根据扩展名获取媒体类型"""
|
|
media_types = {
|
|
# 文档
|
|
'pdf': 'application/pdf',
|
|
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
'txt': 'text/plain',
|
|
'csv': 'text/csv',
|
|
'json': 'application/json',
|
|
'xml': 'application/xml',
|
|
'html': 'text/html',
|
|
|
|
# 图片
|
|
'jpg': 'image/jpeg',
|
|
'jpeg': 'image/jpeg',
|
|
'png': 'image/png',
|
|
'gif': 'image/gif',
|
|
'webp': 'image/webp',
|
|
|
|
# 视频
|
|
'mp4': 'video/mp4',
|
|
'avi': 'video/x-msvideo',
|
|
'mov': 'video/quicktime',
|
|
}
|
|
|
|
return media_types.get(ext, 'application/octet-stream')
|
|
|
|
async def get_file_info(self, file_path: str) -> Optional[Dict[str, Any]]:
|
|
"""获取文件信息"""
|
|
file_path_obj = Path(file_path)
|
|
|
|
if not file_path_obj.exists():
|
|
return None
|
|
|
|
stat = file_path_obj.stat()
|
|
ext = self.get_file_extension(file_path)
|
|
|
|
return {
|
|
"filename": file_path_obj.name,
|
|
"file_path": str(file_path_obj),
|
|
"file_size": stat.st_size,
|
|
"file_extension": ext,
|
|
"media_type": self._get_media_type(ext),
|
|
"created_time": stat.st_ctime,
|
|
"modified_time": stat.st_mtime
|
|
}
|
|
|
|
async def delete_file(self, file_path: str) -> bool:
|
|
"""删除文件"""
|
|
try:
|
|
file_path_obj = Path(file_path)
|
|
if file_path_obj.exists():
|
|
file_path_obj.unlink()
|
|
logger.info(f"文件删除成功: {file_path}")
|
|
return True
|
|
else:
|
|
logger.warning(f"文件不存在: {file_path}")
|
|
return False
|
|
except Exception as e:
|
|
logger.error(f"文件删除失败: {e}")
|
|
return False
|
|
|
|
def list_files(self, directory: str = None) -> List[Dict[str, Any]]:
|
|
"""列出目录中的文件"""
|
|
target_dir = Path(directory) if directory else self.upload_dir
|
|
|
|
if not target_dir.exists():
|
|
return []
|
|
|
|
files = []
|
|
for file_path in target_dir.iterdir():
|
|
if file_path.is_file():
|
|
try:
|
|
stat = file_path.stat()
|
|
ext = self.get_file_extension(str(file_path))
|
|
|
|
files.append({
|
|
"filename": file_path.name,
|
|
"file_path": str(file_path),
|
|
"file_size": stat.st_size,
|
|
"file_extension": ext,
|
|
"media_type": self._get_media_type(ext),
|
|
"created_time": stat.st_ctime,
|
|
"modified_time": stat.st_mtime
|
|
})
|
|
except Exception as e:
|
|
logger.warning(f"获取文件信息失败: {file_path}, 错误: {e}")
|
|
continue
|
|
|
|
return files |