322 lines
8.3 KiB
Python
322 lines
8.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
"""
|
|||
|
|
Travel Algorithms Package
|
|||
|
|
AI驱动的旅游内容生成算法包
|
|||
|
|
|
|||
|
|
这个包提供了一套完整的AI算法,用于生成旅游相关的内容,包括:
|
|||
|
|
- 主题生成
|
|||
|
|
- 内容创作
|
|||
|
|
- 内容审核
|
|||
|
|
- 海报生成
|
|||
|
|
- 文档处理
|
|||
|
|
|
|||
|
|
特性:
|
|||
|
|
- 配置化设计,支持动态提示词加载
|
|||
|
|
- JSON修复和格式化
|
|||
|
|
- 支持多种AI模型和参数
|
|||
|
|
- 灵活的输出管理
|
|||
|
|
- 完整的错误处理
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
__version__ = "1.0.0"
|
|||
|
|
__author__ = "Travel Content Creator Team"
|
|||
|
|
|
|||
|
|
# 核心配置和服务
|
|||
|
|
from .config import (
|
|||
|
|
AlgorithmConfig,
|
|||
|
|
AIModelConfig,
|
|||
|
|
PromptConfig,
|
|||
|
|
ContentGenerationConfig,
|
|||
|
|
PosterGenerationConfig,
|
|||
|
|
DocumentProcessingConfig,
|
|||
|
|
OutputConfig,
|
|||
|
|
ResourceConfig,
|
|||
|
|
TaskModelConfig
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
from .core import (
|
|||
|
|
AIService,
|
|||
|
|
OutputManager,
|
|||
|
|
PromptManager,
|
|||
|
|
JSONProcessor
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 算法模块
|
|||
|
|
from .content_generation import (
|
|||
|
|
TopicGenerator,
|
|||
|
|
ContentGenerator,
|
|||
|
|
ContentJudger
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 海报生成算法
|
|||
|
|
from .poster_generation import (
|
|||
|
|
PosterGenerator,
|
|||
|
|
TextGenerator,
|
|||
|
|
TemplateManager
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 文档处理算法
|
|||
|
|
from .document_processing import (
|
|||
|
|
DocumentProcessor,
|
|||
|
|
TextExtractor,
|
|||
|
|
ContentIntegrator,
|
|||
|
|
ContentTransformer,
|
|||
|
|
ExtractedDocument,
|
|||
|
|
IntegratedContent,
|
|||
|
|
TransformedContent
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 网页爬虫算法
|
|||
|
|
from .web_crawling import (
|
|||
|
|
XHSCrawler,
|
|||
|
|
XHSNote,
|
|||
|
|
XHSSearchResult,
|
|||
|
|
SearchConfig,
|
|||
|
|
KeywordAnalyzer,
|
|||
|
|
KeywordResult,
|
|||
|
|
ContentAnalyzer,
|
|||
|
|
AnalysisResult
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 异常类
|
|||
|
|
from .exceptions import (
|
|||
|
|
AlgorithmError,
|
|||
|
|
ConfigError,
|
|||
|
|
AIServiceError,
|
|||
|
|
ContentGenerationError,
|
|||
|
|
PosterGenerationError,
|
|||
|
|
DocumentProcessingError,
|
|||
|
|
ResourceNotFoundError,
|
|||
|
|
ValidationError
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 便捷导入
|
|||
|
|
__all__ = [
|
|||
|
|
# 版本信息
|
|||
|
|
"__version__",
|
|||
|
|
"__author__",
|
|||
|
|
|
|||
|
|
# 配置类
|
|||
|
|
"AlgorithmConfig",
|
|||
|
|
"AIModelConfig",
|
|||
|
|
"PromptConfig",
|
|||
|
|
"ContentGenerationConfig",
|
|||
|
|
"PosterGenerationConfig",
|
|||
|
|
"DocumentProcessingConfig",
|
|||
|
|
"WebCrawlingConfig",
|
|||
|
|
"KeywordAnalysisConfig",
|
|||
|
|
"ContentAnalysisConfig",
|
|||
|
|
"OutputConfig",
|
|||
|
|
"ResourceConfig",
|
|||
|
|
"TaskModelConfig",
|
|||
|
|
|
|||
|
|
# 核心服务
|
|||
|
|
"AIService",
|
|||
|
|
"OutputManager",
|
|||
|
|
"PromptManager",
|
|||
|
|
"JSONProcessor",
|
|||
|
|
|
|||
|
|
# 算法模块
|
|||
|
|
"TopicGenerator",
|
|||
|
|
"ContentGenerator",
|
|||
|
|
"ContentJudger",
|
|||
|
|
"PosterGenerator",
|
|||
|
|
"TextGenerator",
|
|||
|
|
"TemplateManager",
|
|||
|
|
"DocumentProcessor",
|
|||
|
|
"TextExtractor",
|
|||
|
|
"ContentIntegrator",
|
|||
|
|
"ContentTransformer",
|
|||
|
|
"XHSCrawler",
|
|||
|
|
"KeywordAnalyzer",
|
|||
|
|
"ContentAnalyzer",
|
|||
|
|
|
|||
|
|
# 数据模型
|
|||
|
|
"ExtractedDocument",
|
|||
|
|
"IntegratedContent",
|
|||
|
|
"TransformedContent",
|
|||
|
|
"XHSNote",
|
|||
|
|
"XHSSearchResult",
|
|||
|
|
"SearchConfig",
|
|||
|
|
"KeywordResult",
|
|||
|
|
"AnalysisResult",
|
|||
|
|
|
|||
|
|
# 异常类
|
|||
|
|
"AlgorithmError",
|
|||
|
|
"ConfigError",
|
|||
|
|
"AIServiceError",
|
|||
|
|
"ContentGenerationError",
|
|||
|
|
"PosterGenerationError",
|
|||
|
|
"DocumentProcessingError",
|
|||
|
|
"CrawlingError",
|
|||
|
|
"ResourceNotFoundError",
|
|||
|
|
"ValidationError",
|
|||
|
|
|
|||
|
|
# 便捷功能
|
|||
|
|
"create_default_config",
|
|||
|
|
"create_content_pipeline",
|
|||
|
|
"create_poster_pipeline",
|
|||
|
|
"create_document_pipeline",
|
|||
|
|
"create_crawling_pipeline",
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_default_config(
|
|||
|
|
resource_base_directory: str = None,
|
|||
|
|
ai_model: str = "qwq-plus",
|
|||
|
|
**kwargs
|
|||
|
|
) -> AlgorithmConfig:
|
|||
|
|
"""
|
|||
|
|
创建默认配置
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
resource_base_directory: 资源基础目录(可选)
|
|||
|
|
ai_model: AI模型名称
|
|||
|
|
**kwargs: 其他配置覆盖项
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
配置实例
|
|||
|
|
"""
|
|||
|
|
config_dict = {}
|
|||
|
|
|
|||
|
|
if resource_base_directory:
|
|||
|
|
config_dict["resources"] = {
|
|||
|
|
"resource_base_directory": resource_base_directory
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if ai_model != "qwq-plus":
|
|||
|
|
config_dict["ai_model"] = {
|
|||
|
|
"model": ai_model
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 应用其他覆盖项
|
|||
|
|
config_dict.update(kwargs)
|
|||
|
|
|
|||
|
|
return AlgorithmConfig.from_dict(config_dict)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_content_pipeline(config: AlgorithmConfig = None) -> dict:
|
|||
|
|
"""
|
|||
|
|
创建内容生成流水线
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
config: 算法配置(可选,默认使用默认配置)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
包含各个组件的字典
|
|||
|
|
"""
|
|||
|
|
if config is None:
|
|||
|
|
config = create_default_config()
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"config": config,
|
|||
|
|
"topic_generator": TopicGenerator(config),
|
|||
|
|
"content_generator": ContentGenerator(config),
|
|||
|
|
"content_judger": ContentJudger(config),
|
|||
|
|
"ai_service": AIService(config.ai_model),
|
|||
|
|
"output_manager": OutputManager(config.output),
|
|||
|
|
"prompt_manager": PromptManager(config.prompts, config.resources),
|
|||
|
|
"json_processor": JSONProcessor(
|
|||
|
|
enable_repair=config.content_generation.enable_json_repair,
|
|||
|
|
max_repair_attempts=config.content_generation.json_repair_attempts
|
|||
|
|
)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_poster_pipeline(config: AlgorithmConfig = None) -> dict:
|
|||
|
|
"""
|
|||
|
|
创建海报生成流水线
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
config: 算法配置(可选,默认使用默认配置)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
包含海报生成组件的字典
|
|||
|
|
"""
|
|||
|
|
if config is None:
|
|||
|
|
config = create_default_config()
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"config": config,
|
|||
|
|
"poster_generator": PosterGenerator(config),
|
|||
|
|
"text_generator": TextGenerator(
|
|||
|
|
config,
|
|||
|
|
AIService(config.ai_model),
|
|||
|
|
PromptManager(config.prompts, config.resources),
|
|||
|
|
JSONProcessor(
|
|||
|
|
enable_repair=config.content_generation.enable_json_repair,
|
|||
|
|
max_repair_attempts=config.content_generation.json_repair_attempts
|
|||
|
|
)
|
|||
|
|
),
|
|||
|
|
"template_manager": TemplateManager(config.poster_generation, config.resources),
|
|||
|
|
"ai_service": AIService(config.ai_model),
|
|||
|
|
"output_manager": OutputManager(config.output),
|
|||
|
|
"prompt_manager": PromptManager(config.prompts, config.resources),
|
|||
|
|
"json_processor": JSONProcessor(
|
|||
|
|
enable_repair=config.content_generation.enable_json_repair,
|
|||
|
|
max_repair_attempts=config.content_generation.json_repair_attempts
|
|||
|
|
)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_document_pipeline(config: AlgorithmConfig = None) -> dict:
|
|||
|
|
"""
|
|||
|
|
创建文档处理流水线
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
config: 算法配置(可选,默认使用默认配置)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
包含文档处理组件的字典
|
|||
|
|
"""
|
|||
|
|
if config is None:
|
|||
|
|
config = create_default_config()
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"config": config,
|
|||
|
|
"document_processor": DocumentProcessor(config),
|
|||
|
|
"text_extractor": TextExtractor(config.document_processing),
|
|||
|
|
"content_integrator": ContentIntegrator(config.document_processing),
|
|||
|
|
"content_transformer": ContentTransformer(config),
|
|||
|
|
"ai_service": AIService(config.ai_model),
|
|||
|
|
"output_manager": OutputManager(config.output),
|
|||
|
|
"prompt_manager": PromptManager(config.prompts, config.resources),
|
|||
|
|
"json_processor": JSONProcessor(
|
|||
|
|
enable_repair=config.content_generation.enable_json_repair,
|
|||
|
|
max_repair_attempts=config.content_generation.json_repair_attempts
|
|||
|
|
)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
def create_crawling_pipeline(config: AlgorithmConfig = None) -> dict:
|
|||
|
|
"""
|
|||
|
|
创建爬虫分析流水线
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
config: 算法配置(可选,默认使用默认配置)
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
包含爬虫分析组件的字典
|
|||
|
|
"""
|
|||
|
|
if config is None:
|
|||
|
|
config = create_default_config()
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
"config": config,
|
|||
|
|
"xhs_crawler": XHSCrawler(config.web_crawling),
|
|||
|
|
"keyword_analyzer": KeywordAnalyzer(config),
|
|||
|
|
"content_analyzer": ContentAnalyzer(config),
|
|||
|
|
"ai_service": AIService(config.ai_model),
|
|||
|
|
"output_manager": OutputManager(config.output),
|
|||
|
|
"prompt_manager": PromptManager(config.prompts, config.resources),
|
|||
|
|
"json_processor": JSONProcessor(
|
|||
|
|
enable_repair=config.content_generation.enable_json_repair,
|
|||
|
|
max_repair_attempts=config.content_generation.json_repair_attempts
|
|||
|
|
)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 设置日志
|
|||
|
|
import logging
|
|||
|
|
logging.getLogger(__name__).addHandler(logging.NullHandler())
|