395 lines
27 KiB
JSON
395 lines
27 KiB
JSON
{
|
||
"file_path": "travel-algorithms/travel_algorithms/document_processing/document_processor.py",
|
||
"file_size": 7229,
|
||
"line_count": 200,
|
||
"functions": [
|
||
{
|
||
"name": "__init__",
|
||
"line_start": 28,
|
||
"line_end": 40,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "config",
|
||
"type_hint": "AlgorithmConfig"
|
||
}
|
||
],
|
||
"return_type": null,
|
||
"docstring": "初始化文档处理器\n\nArgs:\n config: 算法配置",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化文档处理器\n \n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.text_extractor = TextExtractor(config.document_processing)\n self.content_integrator = ContentIntegrator(config.document_processing)\n self.content_transformer = ContentTransformer(config)\n \n logger.info(\"统一文档处理器初始化完成\")",
|
||
"code_hash": "4b9ee82331745ab38dca2eda7d5ab72e"
|
||
},
|
||
{
|
||
"name": "_extract_documents",
|
||
"line_start": 108,
|
||
"line_end": 134,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "sources",
|
||
"type_hint": "Union[str, Path, List[Union[str, Path]]]"
|
||
}
|
||
],
|
||
"return_type": "List[ExtractedDocument]",
|
||
"docstring": "提取文档内容",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def _extract_documents(self, sources: Union[str, Path, List[Union[str, Path]]]) -> List[ExtractedDocument]:\n \"\"\"提取文档内容\"\"\"\n if isinstance(sources, (str, Path)):\n sources = [sources]\n \n extracted_docs = []\n \n for source in sources:\n source_path = Path(source)\n \n try:\n if source_path.is_file():\n # 处理单个文件\n doc = self.text_extractor.extract_from_file(source_path)\n extracted_docs.append(doc)\n elif source_path.is_dir():\n # 处理目录中的所有文件\n dir_docs = self.text_extractor.extract_from_directory(source_path)\n extracted_docs.extend(dir_docs)\n else:\n logger.warning(f\"无效的文档源: {source_path}\")\n \n except Exception as e:\n logger.error(f\"处理文档源失败 {source_path}: {e}\")\n continue\n \n return extracted_docs",
|
||
"code_hash": "b4ce9ee5159b47d86dd213744fc8f6df"
|
||
},
|
||
{
|
||
"name": "integrate_only",
|
||
"line_start": 143,
|
||
"line_end": 145,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "documents",
|
||
"type_hint": "List[ExtractedDocument]"
|
||
}
|
||
],
|
||
"return_type": "IntegratedContent",
|
||
"docstring": "仅执行内容整合",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def integrate_only(self, documents: List[ExtractedDocument]) -> IntegratedContent:\n \"\"\"仅执行内容整合\"\"\"\n return self.content_integrator.integrate_documents(documents)",
|
||
"code_hash": "368f39a7d8dab90bd6e5235af61765d6"
|
||
},
|
||
{
|
||
"name": "get_supported_formats",
|
||
"line_start": 162,
|
||
"line_end": 167,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
}
|
||
],
|
||
"return_type": "Dict[str, str]",
|
||
"docstring": "获取支持的文件格式和转换格式",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def get_supported_formats(self) -> Dict[str, str]:\n \"\"\"获取支持的文件格式和转换格式\"\"\"\n return {\n 'input_formats': self.text_extractor.get_supported_formats(),\n 'output_formats': self.content_transformer.get_supported_formats()\n }",
|
||
"code_hash": "fba6eb3a4149d4d2f972168ce616591b"
|
||
},
|
||
{
|
||
"name": "get_processing_stats",
|
||
"line_start": 169,
|
||
"line_end": 176,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
}
|
||
],
|
||
"return_type": "Dict[str, Any]",
|
||
"docstring": "获取处理器统计信息",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def get_processing_stats(self) -> Dict[str, Any]:\n \"\"\"获取处理器统计信息\"\"\"\n return {\n 'extractor_stats': self.text_extractor.get_extraction_stats(),\n 'integrator_stats': self.content_integrator.get_integration_stats(),\n 'transformer_stats': self.content_transformer.get_transformation_stats(),\n 'supported_formats': self.get_supported_formats()\n }",
|
||
"code_hash": "a854e400e8958fa3168b34474aeee365"
|
||
}
|
||
],
|
||
"classes": [
|
||
{
|
||
"name": "DocumentProcessor",
|
||
"line_start": 22,
|
||
"line_end": 201,
|
||
"bases": [],
|
||
"methods": [
|
||
{
|
||
"name": "__init__",
|
||
"line_start": 28,
|
||
"line_end": 40,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "config",
|
||
"type_hint": "AlgorithmConfig"
|
||
}
|
||
],
|
||
"return_type": null,
|
||
"docstring": "初始化文档处理器\n\nArgs:\n config: 算法配置",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化文档处理器\n \n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.text_extractor = TextExtractor(config.document_processing)\n self.content_integrator = ContentIntegrator(config.document_processing)\n self.content_transformer = ContentTransformer(config)\n \n logger.info(\"统一文档处理器初始化完成\")",
|
||
"code_hash": "4b9ee82331745ab38dca2eda7d5ab72e"
|
||
},
|
||
{
|
||
"name": "process_documents",
|
||
"line_start": 42,
|
||
"line_end": 106,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "sources",
|
||
"type_hint": "Union[str, Path, List[Union[str, Path]]]"
|
||
},
|
||
{
|
||
"name": "target_format",
|
||
"type_hint": "str"
|
||
},
|
||
{
|
||
"name": "custom_prompt",
|
||
"type_hint": "Optional[str]"
|
||
},
|
||
{
|
||
"name": "additional_requirements",
|
||
"type_hint": "Optional[str]"
|
||
}
|
||
],
|
||
"return_type": "Dict[str, Any]",
|
||
"docstring": "完整的文档处理流水线\n\nArgs:\n sources: 文档源(文件路径、目录路径或路径列表)\n target_format: 目标转换格式\n custom_prompt: 自定义提示词\n additional_requirements: 额外要求\n \nReturns:\n 包含所有处理结果的字典",
|
||
"is_async": true,
|
||
"decorators": [],
|
||
"code": " async def process_documents(\n self,\n sources: Union[str, Path, List[Union[str, Path]]],\n target_format: str = 'summary',\n custom_prompt: Optional[str] = None,\n additional_requirements: Optional[str] = None\n ) -> Dict[str, Any]:\n \"\"\"\n 完整的文档处理流水线\n \n Args:\n sources: 文档源(文件路径、目录路径或路径列表)\n target_format: 目标转换格式\n custom_prompt: 自定义提示词\n additional_requirements: 额外要求\n \n Returns:\n 包含所有处理结果的字典\n \"\"\"\n try:\n logger.info(f\"开始文档处理流水线,目标格式: {target_format}\")\n \n # 第一步:提取文档内容\n extracted_docs = self._extract_documents(sources)\n \n if not extracted_docs:\n raise DocumentProcessingError(\"没有成功提取的文档\")\n \n # 第二步:整合文档内容\n integrated_content = self.content_integrator.integrate_documents(extracted_docs)\n \n # 第三步:转换为目标格式\n transformed_content = await self.content_transformer.transform_content(\n integrated_content=integrated_content,\n format_type=target_format,\n custom_prompt=custom_prompt,\n additional_requirements=additional_requirements\n )\n \n # 整理最终结果\n result = {\n 'processing_summary': {\n 'total_documents': len(extracted_docs),\n 'successful_extractions': len([d for d in extracted_docs if not d.error_info]),\n 'target_format': target_format,\n 'processing_completed': True\n },\n 'extracted_documents': [doc.get_summary() for doc in extracted_docs],\n 'integrated_content': integrated_content.to_dict(),\n 'transformed_content': transformed_content.to_dict(),\n 'final_output': transformed_content.transformed_text,\n 'quality_metrics': {\n 'extraction_success_rate': len([d for d in extracted_docs if not d.error_info]) / len(extracted_docs),\n 'content_integration_quality': len(integrated_content.key_topics) / max(1, integrated_content.document_count),\n 'transformation_quality': transformed_content.quality_score\n }\n }\n \n logger.info(f\"文档处理流水线完成,最终输出长度: {len(transformed_content.transformed_text)}\")\n return result\n \n except Exception as e:\n error_msg = f\"文档处理流水线失败: {str(e)}\"\n logger.error(error_msg, exc_info=True)\n raise DocumentProcessingError(error_msg)",
|
||
"code_hash": "f7fb77852390521c0eb54badeae1b5a1"
|
||
},
|
||
{
|
||
"name": "_extract_documents",
|
||
"line_start": 108,
|
||
"line_end": 134,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "sources",
|
||
"type_hint": "Union[str, Path, List[Union[str, Path]]]"
|
||
}
|
||
],
|
||
"return_type": "List[ExtractedDocument]",
|
||
"docstring": "提取文档内容",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def _extract_documents(self, sources: Union[str, Path, List[Union[str, Path]]]) -> List[ExtractedDocument]:\n \"\"\"提取文档内容\"\"\"\n if isinstance(sources, (str, Path)):\n sources = [sources]\n \n extracted_docs = []\n \n for source in sources:\n source_path = Path(source)\n \n try:\n if source_path.is_file():\n # 处理单个文件\n doc = self.text_extractor.extract_from_file(source_path)\n extracted_docs.append(doc)\n elif source_path.is_dir():\n # 处理目录中的所有文件\n dir_docs = self.text_extractor.extract_from_directory(source_path)\n extracted_docs.extend(dir_docs)\n else:\n logger.warning(f\"无效的文档源: {source_path}\")\n \n except Exception as e:\n logger.error(f\"处理文档源失败 {source_path}: {e}\")\n continue\n \n return extracted_docs",
|
||
"code_hash": "b4ce9ee5159b47d86dd213744fc8f6df"
|
||
},
|
||
{
|
||
"name": "extract_only",
|
||
"line_start": 136,
|
||
"line_end": 141,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "sources",
|
||
"type_hint": "Union[str, Path, List[Union[str, Path]]]"
|
||
}
|
||
],
|
||
"return_type": "List[ExtractedDocument]",
|
||
"docstring": "仅执行文档提取",
|
||
"is_async": true,
|
||
"decorators": [],
|
||
"code": " async def extract_only(\n self, \n sources: Union[str, Path, List[Union[str, Path]]]\n ) -> List[ExtractedDocument]:\n \"\"\"仅执行文档提取\"\"\"\n return self._extract_documents(sources)",
|
||
"code_hash": "4d805849738623cdd16c37152c1e3371"
|
||
},
|
||
{
|
||
"name": "integrate_only",
|
||
"line_start": 143,
|
||
"line_end": 145,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "documents",
|
||
"type_hint": "List[ExtractedDocument]"
|
||
}
|
||
],
|
||
"return_type": "IntegratedContent",
|
||
"docstring": "仅执行内容整合",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def integrate_only(self, documents: List[ExtractedDocument]) -> IntegratedContent:\n \"\"\"仅执行内容整合\"\"\"\n return self.content_integrator.integrate_documents(documents)",
|
||
"code_hash": "368f39a7d8dab90bd6e5235af61765d6"
|
||
},
|
||
{
|
||
"name": "transform_only",
|
||
"line_start": 147,
|
||
"line_end": 160,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "integrated_content",
|
||
"type_hint": "IntegratedContent"
|
||
},
|
||
{
|
||
"name": "target_format",
|
||
"type_hint": "str"
|
||
},
|
||
{
|
||
"name": "custom_prompt",
|
||
"type_hint": "Optional[str]"
|
||
},
|
||
{
|
||
"name": "additional_requirements",
|
||
"type_hint": "Optional[str]"
|
||
}
|
||
],
|
||
"return_type": "TransformedContent",
|
||
"docstring": "仅执行内容转换",
|
||
"is_async": true,
|
||
"decorators": [],
|
||
"code": " async def transform_only(\n self,\n integrated_content: IntegratedContent,\n target_format: str = 'summary',\n custom_prompt: Optional[str] = None,\n additional_requirements: Optional[str] = None\n ) -> TransformedContent:\n \"\"\"仅执行内容转换\"\"\"\n return await self.content_transformer.transform_content(\n integrated_content=integrated_content,\n format_type=target_format,\n custom_prompt=custom_prompt,\n additional_requirements=additional_requirements\n )",
|
||
"code_hash": "ef7e84b598ade4399922e642f056fbe8"
|
||
},
|
||
{
|
||
"name": "get_supported_formats",
|
||
"line_start": 162,
|
||
"line_end": 167,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
}
|
||
],
|
||
"return_type": "Dict[str, str]",
|
||
"docstring": "获取支持的文件格式和转换格式",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def get_supported_formats(self) -> Dict[str, str]:\n \"\"\"获取支持的文件格式和转换格式\"\"\"\n return {\n 'input_formats': self.text_extractor.get_supported_formats(),\n 'output_formats': self.content_transformer.get_supported_formats()\n }",
|
||
"code_hash": "fba6eb3a4149d4d2f972168ce616591b"
|
||
},
|
||
{
|
||
"name": "get_processing_stats",
|
||
"line_start": 169,
|
||
"line_end": 176,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
}
|
||
],
|
||
"return_type": "Dict[str, Any]",
|
||
"docstring": "获取处理器统计信息",
|
||
"is_async": false,
|
||
"decorators": [],
|
||
"code": " def get_processing_stats(self) -> Dict[str, Any]:\n \"\"\"获取处理器统计信息\"\"\"\n return {\n 'extractor_stats': self.text_extractor.get_extraction_stats(),\n 'integrator_stats': self.content_integrator.get_integration_stats(),\n 'transformer_stats': self.content_transformer.get_transformation_stats(),\n 'supported_formats': self.get_supported_formats()\n }",
|
||
"code_hash": "a854e400e8958fa3168b34474aeee365"
|
||
},
|
||
{
|
||
"name": "batch_process_directories",
|
||
"line_start": 178,
|
||
"line_end": 201,
|
||
"args": [
|
||
{
|
||
"name": "self"
|
||
},
|
||
{
|
||
"name": "directories",
|
||
"type_hint": "List[Union[str, Path]]"
|
||
},
|
||
{
|
||
"name": "target_format",
|
||
"type_hint": "str"
|
||
}
|
||
],
|
||
"return_type": "Dict[str, Dict[str, Any]]",
|
||
"docstring": "批量处理多个目录",
|
||
"is_async": true,
|
||
"decorators": [],
|
||
"code": " async def batch_process_directories(\n self,\n directories: List[Union[str, Path]],\n target_format: str = 'summary'\n ) -> Dict[str, Dict[str, Any]]:\n \"\"\"批量处理多个目录\"\"\"\n results = {}\n \n for i, directory in enumerate(directories):\n try:\n logger.info(f\"批量处理目录 {i+1}/{len(directories)}: {directory}\")\n \n result = await self.process_documents(\n sources=directory,\n target_format=target_format\n )\n \n results[str(directory)] = result\n \n except Exception as e:\n logger.error(f\"批量处理目录失败 {directory}: {e}\")\n results[str(directory)] = {'error': str(e)}\n \n return results ",
|
||
"code_hash": "0c58d9c4d0f4d546cb77a89d2eb374ec"
|
||
}
|
||
],
|
||
"docstring": "统一文档处理器\n提供完整的文档处理流水线:提取 -> 整合 -> 转换",
|
||
"decorators": [],
|
||
"code": "class DocumentProcessor:\n \"\"\"\n 统一文档处理器\n 提供完整的文档处理流水线:提取 -> 整合 -> 转换\n \"\"\"\n \n def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化文档处理器\n \n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.text_extractor = TextExtractor(config.document_processing)\n self.content_integrator = ContentIntegrator(config.document_processing)\n self.content_transformer = ContentTransformer(config)\n \n logger.info(\"统一文档处理器初始化完成\")\n \n async def process_documents(\n self,\n sources: Union[str, Path, List[Union[str, Path]]],\n target_format: str = 'summary',\n custom_prompt: Optional[str] = None,\n additional_requirements: Optional[str] = None\n ) -> Dict[str, Any]:\n \"\"\"\n 完整的文档处理流水线\n \n Args:\n sources: 文档源(文件路径、目录路径或路径列表)\n target_format: 目标转换格式\n custom_prompt: 自定义提示词\n additional_requirements: 额外要求\n \n Returns:\n 包含所有处理结果的字典\n \"\"\"\n try:\n logger.info(f\"开始文档处理流水线,目标格式: {target_format}\")\n \n # 第一步:提取文档内容\n extracted_docs = self._extract_documents(sources)\n \n if not extracted_docs:\n raise DocumentProcessingError(\"没有成功提取的文档\")\n \n # 第二步:整合文档内容\n integrated_content = self.content_integrator.integrate_documents(extracted_docs)\n \n # 第三步:转换为目标格式\n transformed_content = await self.content_transformer.transform_content(\n integrated_content=integrated_content,\n format_type=target_format,\n custom_prompt=custom_prompt,\n additional_requirements=additional_requirements\n )\n \n # 整理最终结果\n result = {\n 'processing_summary': {\n 'total_documents': len(extracted_docs),\n 'successful_extractions': len([d for d in extracted_docs if not d.error_info]),\n 'target_format': target_format,\n 'processing_completed': True\n },\n 'extracted_documents': [doc.get_summary() for doc in extracted_docs],\n 'integrated_content': integrated_content.to_dict(),\n 'transformed_content': transformed_content.to_dict(),\n 'final_output': transformed_content.transformed_text,\n 'quality_metrics': {\n 'extraction_success_rate': len([d for d in extracted_docs if not d.error_info]) / len(extracted_docs),\n 'content_integration_quality': len(integrated_content.key_topics) / max(1, integrated_content.document_count),\n 'transformation_quality': transformed_content.quality_score\n }\n }\n \n logger.info(f\"文档处理流水线完成,最终输出长度: {len(transformed_content.transformed_text)}\")\n return result\n \n except Exception as e:\n error_msg = f\"文档处理流水线失败: {str(e)}\"\n logger.error(error_msg, exc_info=True)\n raise DocumentProcessingError(error_msg)\n \n def _extract_documents(self, sources: Union[str, Path, List[Union[str, Path]]]) -> List[ExtractedDocument]:\n \"\"\"提取文档内容\"\"\"\n if isinstance(sources, (str, Path)):\n sources = [sources]\n \n extracted_docs = []\n \n for source in sources:\n source_path = Path(source)\n \n try:\n if source_path.is_file():\n # 处理单个文件\n doc = self.text_extractor.extract_from_file(source_path)\n extracted_docs.append(doc)\n elif source_path.is_dir():\n # 处理目录中的所有文件\n dir_docs = self.text_extractor.extract_from_directory(source_path)\n extracted_docs.extend(dir_docs)\n else:\n logger.warning(f\"无效的文档源: {source_path}\")\n \n except Exception as e:\n logger.error(f\"处理文档源失败 {source_path}: {e}\")\n continue\n \n return extracted_docs\n \n async def extract_only(\n self, \n sources: Union[str, Path, List[Union[str, Path]]]\n ) -> List[ExtractedDocument]:\n \"\"\"仅执行文档提取\"\"\"\n return self._extract_documents(sources)\n \n def integrate_only(self, documents: List[ExtractedDocument]) -> IntegratedContent:\n \"\"\"仅执行内容整合\"\"\"\n return self.content_integrator.integrate_documents(documents)\n \n async def transform_only(\n self,\n integrated_content: IntegratedContent,\n target_format: str = 'summary',\n custom_prompt: Optional[str] = None,\n additional_requirements: Optional[str] = None\n ) -> TransformedContent:\n \"\"\"仅执行内容转换\"\"\"\n return await self.content_transformer.transform_content(\n integrated_content=integrated_content,\n format_type=target_format,\n custom_prompt=custom_prompt,\n additional_requirements=additional_requirements\n )\n \n def get_supported_formats(self) -> Dict[str, str]:\n \"\"\"获取支持的文件格式和转换格式\"\"\"\n return {\n 'input_formats': self.text_extractor.get_supported_formats(),\n 'output_formats': self.content_transformer.get_supported_formats()\n }\n \n def get_processing_stats(self) -> Dict[str, Any]:\n \"\"\"获取处理器统计信息\"\"\"\n return {\n 'extractor_stats': self.text_extractor.get_extraction_stats(),\n 'integrator_stats': self.content_integrator.get_integration_stats(),\n 'transformer_stats': self.content_transformer.get_transformation_stats(),\n 'supported_formats': self.get_supported_formats()\n }\n \n async def batch_process_directories(\n self,\n directories: List[Union[str, Path]],\n target_format: str = 'summary'\n ) -> Dict[str, Dict[str, Any]]:\n \"\"\"批量处理多个目录\"\"\"\n results = {}\n \n for i, directory in enumerate(directories):\n try:\n logger.info(f\"批量处理目录 {i+1}/{len(directories)}: {directory}\")\n \n result = await self.process_documents(\n sources=directory,\n target_format=target_format\n )\n \n results[str(directory)] = result\n \n except Exception as e:\n logger.error(f\"批量处理目录失败 {directory}: {e}\")\n results[str(directory)] = {'error': str(e)}\n \n return results ",
|
||
"code_hash": "cc7099758d3cfc656cba79490f676eae"
|
||
}
|
||
],
|
||
"imports": [
|
||
{
|
||
"type": "import",
|
||
"modules": [
|
||
"logging"
|
||
],
|
||
"aliases": []
|
||
},
|
||
{
|
||
"type": "from_import",
|
||
"module": "typing",
|
||
"names": [
|
||
"List",
|
||
"Dict",
|
||
"Any",
|
||
"Optional",
|
||
"Union"
|
||
],
|
||
"aliases": [],
|
||
"level": 0
|
||
},
|
||
{
|
||
"type": "from_import",
|
||
"module": "pathlib",
|
||
"names": [
|
||
"Path"
|
||
],
|
||
"aliases": [],
|
||
"level": 0
|
||
},
|
||
{
|
||
"type": "from_import",
|
||
"module": "text_extractor",
|
||
"names": [
|
||
"TextExtractor",
|
||
"ExtractedDocument"
|
||
],
|
||
"aliases": [],
|
||
"level": 1
|
||
},
|
||
{
|
||
"type": "from_import",
|
||
"module": "content_integrator",
|
||
"names": [
|
||
"ContentIntegrator",
|
||
"IntegratedContent"
|
||
],
|
||
"aliases": [],
|
||
"level": 1
|
||
},
|
||
{
|
||
"type": "from_import",
|
||
"module": "content_transformer",
|
||
"names": [
|
||
"ContentTransformer",
|
||
"TransformedContent"
|
||
],
|
||
"aliases": [],
|
||
"level": 1
|
||
},
|
||
{
|
||
"type": "from_import",
|
||
"module": "config",
|
||
"names": [
|
||
"AlgorithmConfig"
|
||
],
|
||
"aliases": [],
|
||
"level": 2
|
||
},
|
||
{
|
||
"type": "from_import",
|
||
"module": "exceptions",
|
||
"names": [
|
||
"DocumentProcessingError"
|
||
],
|
||
"aliases": [],
|
||
"level": 2
|
||
}
|
||
],
|
||
"constants": [],
|
||
"docstring": "Document Processor\n统一文档处理器 - 集成文本提取、内容整合和格式转换功能",
|
||
"content_hash": "ce51a3f63e5798e85ac277283280b7dd"
|
||
} |