322 lines
8.3 KiB
Python
Raw Permalink Normal View History

2025-07-31 15:35:23 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Travel Algorithms Package
AI驱动的旅游内容生成算法包
这个包提供了一套完整的AI算法用于生成旅游相关的内容包括
- 主题生成
- 内容创作
- 内容审核
- 海报生成
- 文档处理
特性
- 配置化设计支持动态提示词加载
- JSON修复和格式化
- 支持多种AI模型和参数
- 灵活的输出管理
- 完整的错误处理
"""
__version__ = "1.0.0"
__author__ = "Travel Content Creator Team"
# 核心配置和服务
from .config import (
AlgorithmConfig,
AIModelConfig,
PromptConfig,
ContentGenerationConfig,
PosterGenerationConfig,
DocumentProcessingConfig,
OutputConfig,
ResourceConfig,
TaskModelConfig
)
from .core import (
AIService,
OutputManager,
PromptManager,
JSONProcessor
)
# 算法模块
from .content_generation import (
TopicGenerator,
ContentGenerator,
ContentJudger
)
# 海报生成算法
from .poster_generation import (
PosterGenerator,
TextGenerator,
TemplateManager
)
# 文档处理算法
from .document_processing import (
DocumentProcessor,
TextExtractor,
ContentIntegrator,
ContentTransformer,
ExtractedDocument,
IntegratedContent,
TransformedContent
)
# 网页爬虫算法
from .web_crawling import (
XHSCrawler,
XHSNote,
XHSSearchResult,
SearchConfig,
KeywordAnalyzer,
KeywordResult,
ContentAnalyzer,
AnalysisResult
)
# 异常类
from .exceptions import (
AlgorithmError,
ConfigError,
AIServiceError,
ContentGenerationError,
PosterGenerationError,
DocumentProcessingError,
ResourceNotFoundError,
ValidationError
)
# 便捷导入
__all__ = [
# 版本信息
"__version__",
"__author__",
# 配置类
"AlgorithmConfig",
"AIModelConfig",
"PromptConfig",
"ContentGenerationConfig",
"PosterGenerationConfig",
"DocumentProcessingConfig",
"WebCrawlingConfig",
"KeywordAnalysisConfig",
"ContentAnalysisConfig",
"OutputConfig",
"ResourceConfig",
"TaskModelConfig",
# 核心服务
"AIService",
"OutputManager",
"PromptManager",
"JSONProcessor",
# 算法模块
"TopicGenerator",
"ContentGenerator",
"ContentJudger",
"PosterGenerator",
"TextGenerator",
"TemplateManager",
"DocumentProcessor",
"TextExtractor",
"ContentIntegrator",
"ContentTransformer",
"XHSCrawler",
"KeywordAnalyzer",
"ContentAnalyzer",
# 数据模型
"ExtractedDocument",
"IntegratedContent",
"TransformedContent",
"XHSNote",
"XHSSearchResult",
"SearchConfig",
"KeywordResult",
"AnalysisResult",
# 异常类
"AlgorithmError",
"ConfigError",
"AIServiceError",
"ContentGenerationError",
"PosterGenerationError",
"DocumentProcessingError",
"CrawlingError",
"ResourceNotFoundError",
"ValidationError",
# 便捷功能
"create_default_config",
"create_content_pipeline",
"create_poster_pipeline",
"create_document_pipeline",
"create_crawling_pipeline",
]
def create_default_config(
resource_base_directory: str = None,
ai_model: str = "qwq-plus",
**kwargs
) -> AlgorithmConfig:
"""
创建默认配置
Args:
resource_base_directory: 资源基础目录可选
ai_model: AI模型名称
**kwargs: 其他配置覆盖项
Returns:
配置实例
"""
config_dict = {}
if resource_base_directory:
config_dict["resources"] = {
"resource_base_directory": resource_base_directory
}
if ai_model != "qwq-plus":
config_dict["ai_model"] = {
"model": ai_model
}
# 应用其他覆盖项
config_dict.update(kwargs)
return AlgorithmConfig.from_dict(config_dict)
def create_content_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建内容生成流水线
Args:
config: 算法配置可选默认使用默认配置
Returns:
包含各个组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"topic_generator": TopicGenerator(config),
"content_generator": ContentGenerator(config),
"content_judger": ContentJudger(config),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
def create_poster_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建海报生成流水线
Args:
config: 算法配置可选默认使用默认配置
Returns:
包含海报生成组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"poster_generator": PosterGenerator(config),
"text_generator": TextGenerator(
config,
AIService(config.ai_model),
PromptManager(config.prompts, config.resources),
JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
),
"template_manager": TemplateManager(config.poster_generation, config.resources),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
def create_document_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建文档处理流水线
Args:
config: 算法配置可选默认使用默认配置
Returns:
包含文档处理组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"document_processor": DocumentProcessor(config),
"text_extractor": TextExtractor(config.document_processing),
"content_integrator": ContentIntegrator(config.document_processing),
"content_transformer": ContentTransformer(config),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
def create_crawling_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建爬虫分析流水线
Args:
config: 算法配置可选默认使用默认配置
Returns:
包含爬虫分析组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"xhs_crawler": XHSCrawler(config.web_crawling),
"keyword_analyzer": KeywordAnalyzer(config),
"content_analyzer": ContentAnalyzer(config),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
# 设置日志
import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())