322 lines
8.3 KiB
Python
322 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
Travel Algorithms Package
|
||
AI驱动的旅游内容生成算法包
|
||
|
||
这个包提供了一套完整的AI算法,用于生成旅游相关的内容,包括:
|
||
- 主题生成
|
||
- 内容创作
|
||
- 内容审核
|
||
- 海报生成
|
||
- 文档处理
|
||
|
||
特性:
|
||
- 配置化设计,支持动态提示词加载
|
||
- JSON修复和格式化
|
||
- 支持多种AI模型和参数
|
||
- 灵活的输出管理
|
||
- 完整的错误处理
|
||
"""
|
||
|
||
__version__ = "1.0.0"
|
||
__author__ = "Travel Content Creator Team"
|
||
|
||
# 核心配置和服务
|
||
from .config import (
|
||
AlgorithmConfig,
|
||
AIModelConfig,
|
||
PromptConfig,
|
||
ContentGenerationConfig,
|
||
PosterGenerationConfig,
|
||
DocumentProcessingConfig,
|
||
OutputConfig,
|
||
ResourceConfig,
|
||
TaskModelConfig
|
||
)
|
||
|
||
from .core import (
|
||
AIService,
|
||
OutputManager,
|
||
PromptManager,
|
||
JSONProcessor
|
||
)
|
||
|
||
# 算法模块
|
||
from .content_generation import (
|
||
TopicGenerator,
|
||
ContentGenerator,
|
||
ContentJudger
|
||
)
|
||
|
||
# 海报生成算法
|
||
from .poster_generation import (
|
||
PosterGenerator,
|
||
TextGenerator,
|
||
TemplateManager
|
||
)
|
||
|
||
# 文档处理算法
|
||
from .document_processing import (
|
||
DocumentProcessor,
|
||
TextExtractor,
|
||
ContentIntegrator,
|
||
ContentTransformer,
|
||
ExtractedDocument,
|
||
IntegratedContent,
|
||
TransformedContent
|
||
)
|
||
|
||
# 网页爬虫算法
|
||
from .web_crawling import (
|
||
XHSCrawler,
|
||
XHSNote,
|
||
XHSSearchResult,
|
||
SearchConfig,
|
||
KeywordAnalyzer,
|
||
KeywordResult,
|
||
ContentAnalyzer,
|
||
AnalysisResult
|
||
)
|
||
|
||
# 异常类
|
||
from .exceptions import (
|
||
AlgorithmError,
|
||
ConfigError,
|
||
AIServiceError,
|
||
ContentGenerationError,
|
||
PosterGenerationError,
|
||
DocumentProcessingError,
|
||
ResourceNotFoundError,
|
||
ValidationError
|
||
)
|
||
|
||
# 便捷导入
|
||
__all__ = [
|
||
# 版本信息
|
||
"__version__",
|
||
"__author__",
|
||
|
||
# 配置类
|
||
"AlgorithmConfig",
|
||
"AIModelConfig",
|
||
"PromptConfig",
|
||
"ContentGenerationConfig",
|
||
"PosterGenerationConfig",
|
||
"DocumentProcessingConfig",
|
||
"WebCrawlingConfig",
|
||
"KeywordAnalysisConfig",
|
||
"ContentAnalysisConfig",
|
||
"OutputConfig",
|
||
"ResourceConfig",
|
||
"TaskModelConfig",
|
||
|
||
# 核心服务
|
||
"AIService",
|
||
"OutputManager",
|
||
"PromptManager",
|
||
"JSONProcessor",
|
||
|
||
# 算法模块
|
||
"TopicGenerator",
|
||
"ContentGenerator",
|
||
"ContentJudger",
|
||
"PosterGenerator",
|
||
"TextGenerator",
|
||
"TemplateManager",
|
||
"DocumentProcessor",
|
||
"TextExtractor",
|
||
"ContentIntegrator",
|
||
"ContentTransformer",
|
||
"XHSCrawler",
|
||
"KeywordAnalyzer",
|
||
"ContentAnalyzer",
|
||
|
||
# 数据模型
|
||
"ExtractedDocument",
|
||
"IntegratedContent",
|
||
"TransformedContent",
|
||
"XHSNote",
|
||
"XHSSearchResult",
|
||
"SearchConfig",
|
||
"KeywordResult",
|
||
"AnalysisResult",
|
||
|
||
# 异常类
|
||
"AlgorithmError",
|
||
"ConfigError",
|
||
"AIServiceError",
|
||
"ContentGenerationError",
|
||
"PosterGenerationError",
|
||
"DocumentProcessingError",
|
||
"CrawlingError",
|
||
"ResourceNotFoundError",
|
||
"ValidationError",
|
||
|
||
# 便捷功能
|
||
"create_default_config",
|
||
"create_content_pipeline",
|
||
"create_poster_pipeline",
|
||
"create_document_pipeline",
|
||
"create_crawling_pipeline",
|
||
]
|
||
|
||
|
||
def create_default_config(
|
||
resource_base_directory: str = None,
|
||
ai_model: str = "qwq-plus",
|
||
**kwargs
|
||
) -> AlgorithmConfig:
|
||
"""
|
||
创建默认配置
|
||
|
||
Args:
|
||
resource_base_directory: 资源基础目录(可选)
|
||
ai_model: AI模型名称
|
||
**kwargs: 其他配置覆盖项
|
||
|
||
Returns:
|
||
配置实例
|
||
"""
|
||
config_dict = {}
|
||
|
||
if resource_base_directory:
|
||
config_dict["resources"] = {
|
||
"resource_base_directory": resource_base_directory
|
||
}
|
||
|
||
if ai_model != "qwq-plus":
|
||
config_dict["ai_model"] = {
|
||
"model": ai_model
|
||
}
|
||
|
||
# 应用其他覆盖项
|
||
config_dict.update(kwargs)
|
||
|
||
return AlgorithmConfig.from_dict(config_dict)
|
||
|
||
|
||
def create_content_pipeline(config: AlgorithmConfig = None) -> dict:
|
||
"""
|
||
创建内容生成流水线
|
||
|
||
Args:
|
||
config: 算法配置(可选,默认使用默认配置)
|
||
|
||
Returns:
|
||
包含各个组件的字典
|
||
"""
|
||
if config is None:
|
||
config = create_default_config()
|
||
|
||
return {
|
||
"config": config,
|
||
"topic_generator": TopicGenerator(config),
|
||
"content_generator": ContentGenerator(config),
|
||
"content_judger": ContentJudger(config),
|
||
"ai_service": AIService(config.ai_model),
|
||
"output_manager": OutputManager(config.output),
|
||
"prompt_manager": PromptManager(config.prompts, config.resources),
|
||
"json_processor": JSONProcessor(
|
||
enable_repair=config.content_generation.enable_json_repair,
|
||
max_repair_attempts=config.content_generation.json_repair_attempts
|
||
)
|
||
}
|
||
|
||
|
||
def create_poster_pipeline(config: AlgorithmConfig = None) -> dict:
|
||
"""
|
||
创建海报生成流水线
|
||
|
||
Args:
|
||
config: 算法配置(可选,默认使用默认配置)
|
||
|
||
Returns:
|
||
包含海报生成组件的字典
|
||
"""
|
||
if config is None:
|
||
config = create_default_config()
|
||
|
||
return {
|
||
"config": config,
|
||
"poster_generator": PosterGenerator(config),
|
||
"text_generator": TextGenerator(
|
||
config,
|
||
AIService(config.ai_model),
|
||
PromptManager(config.prompts, config.resources),
|
||
JSONProcessor(
|
||
enable_repair=config.content_generation.enable_json_repair,
|
||
max_repair_attempts=config.content_generation.json_repair_attempts
|
||
)
|
||
),
|
||
"template_manager": TemplateManager(config.poster_generation, config.resources),
|
||
"ai_service": AIService(config.ai_model),
|
||
"output_manager": OutputManager(config.output),
|
||
"prompt_manager": PromptManager(config.prompts, config.resources),
|
||
"json_processor": JSONProcessor(
|
||
enable_repair=config.content_generation.enable_json_repair,
|
||
max_repair_attempts=config.content_generation.json_repair_attempts
|
||
)
|
||
}
|
||
|
||
|
||
def create_document_pipeline(config: AlgorithmConfig = None) -> dict:
|
||
"""
|
||
创建文档处理流水线
|
||
|
||
Args:
|
||
config: 算法配置(可选,默认使用默认配置)
|
||
|
||
Returns:
|
||
包含文档处理组件的字典
|
||
"""
|
||
if config is None:
|
||
config = create_default_config()
|
||
|
||
return {
|
||
"config": config,
|
||
"document_processor": DocumentProcessor(config),
|
||
"text_extractor": TextExtractor(config.document_processing),
|
||
"content_integrator": ContentIntegrator(config.document_processing),
|
||
"content_transformer": ContentTransformer(config),
|
||
"ai_service": AIService(config.ai_model),
|
||
"output_manager": OutputManager(config.output),
|
||
"prompt_manager": PromptManager(config.prompts, config.resources),
|
||
"json_processor": JSONProcessor(
|
||
enable_repair=config.content_generation.enable_json_repair,
|
||
max_repair_attempts=config.content_generation.json_repair_attempts
|
||
)
|
||
}
|
||
|
||
|
||
def create_crawling_pipeline(config: AlgorithmConfig = None) -> dict:
|
||
"""
|
||
创建爬虫分析流水线
|
||
|
||
Args:
|
||
config: 算法配置(可选,默认使用默认配置)
|
||
|
||
Returns:
|
||
包含爬虫分析组件的字典
|
||
"""
|
||
if config is None:
|
||
config = create_default_config()
|
||
|
||
return {
|
||
"config": config,
|
||
"xhs_crawler": XHSCrawler(config.web_crawling),
|
||
"keyword_analyzer": KeywordAnalyzer(config),
|
||
"content_analyzer": ContentAnalyzer(config),
|
||
"ai_service": AIService(config.ai_model),
|
||
"output_manager": OutputManager(config.output),
|
||
"prompt_manager": PromptManager(config.prompts, config.resources),
|
||
"json_processor": JSONProcessor(
|
||
enable_repair=config.content_generation.enable_json_repair,
|
||
max_repair_attempts=config.content_generation.json_repair_attempts
|
||
)
|
||
}
|
||
|
||
# 设置日志
|
||
import logging
|
||
logging.getLogger(__name__).addHandler(logging.NullHandler()) |