2025-07-31 15:35:23 +08:00

322 lines
8.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Travel Algorithms Package
AI驱动的旅游内容生成算法包
这个包提供了一套完整的AI算法用于生成旅游相关的内容包括
- 主题生成
- 内容创作
- 内容审核
- 海报生成
- 文档处理
特性:
- 配置化设计,支持动态提示词加载
- JSON修复和格式化
- 支持多种AI模型和参数
- 灵活的输出管理
- 完整的错误处理
"""
__version__ = "1.0.0"
__author__ = "Travel Content Creator Team"
# 核心配置和服务
from .config import (
AlgorithmConfig,
AIModelConfig,
PromptConfig,
ContentGenerationConfig,
PosterGenerationConfig,
DocumentProcessingConfig,
OutputConfig,
ResourceConfig,
TaskModelConfig
)
from .core import (
AIService,
OutputManager,
PromptManager,
JSONProcessor
)
# 算法模块
from .content_generation import (
TopicGenerator,
ContentGenerator,
ContentJudger
)
# 海报生成算法
from .poster_generation import (
PosterGenerator,
TextGenerator,
TemplateManager
)
# 文档处理算法
from .document_processing import (
DocumentProcessor,
TextExtractor,
ContentIntegrator,
ContentTransformer,
ExtractedDocument,
IntegratedContent,
TransformedContent
)
# 网页爬虫算法
from .web_crawling import (
XHSCrawler,
XHSNote,
XHSSearchResult,
SearchConfig,
KeywordAnalyzer,
KeywordResult,
ContentAnalyzer,
AnalysisResult
)
# 异常类
from .exceptions import (
AlgorithmError,
ConfigError,
AIServiceError,
ContentGenerationError,
PosterGenerationError,
DocumentProcessingError,
ResourceNotFoundError,
ValidationError
)
# 便捷导入
__all__ = [
# 版本信息
"__version__",
"__author__",
# 配置类
"AlgorithmConfig",
"AIModelConfig",
"PromptConfig",
"ContentGenerationConfig",
"PosterGenerationConfig",
"DocumentProcessingConfig",
"WebCrawlingConfig",
"KeywordAnalysisConfig",
"ContentAnalysisConfig",
"OutputConfig",
"ResourceConfig",
"TaskModelConfig",
# 核心服务
"AIService",
"OutputManager",
"PromptManager",
"JSONProcessor",
# 算法模块
"TopicGenerator",
"ContentGenerator",
"ContentJudger",
"PosterGenerator",
"TextGenerator",
"TemplateManager",
"DocumentProcessor",
"TextExtractor",
"ContentIntegrator",
"ContentTransformer",
"XHSCrawler",
"KeywordAnalyzer",
"ContentAnalyzer",
# 数据模型
"ExtractedDocument",
"IntegratedContent",
"TransformedContent",
"XHSNote",
"XHSSearchResult",
"SearchConfig",
"KeywordResult",
"AnalysisResult",
# 异常类
"AlgorithmError",
"ConfigError",
"AIServiceError",
"ContentGenerationError",
"PosterGenerationError",
"DocumentProcessingError",
"CrawlingError",
"ResourceNotFoundError",
"ValidationError",
# 便捷功能
"create_default_config",
"create_content_pipeline",
"create_poster_pipeline",
"create_document_pipeline",
"create_crawling_pipeline",
]
def create_default_config(
resource_base_directory: str = None,
ai_model: str = "qwq-plus",
**kwargs
) -> AlgorithmConfig:
"""
创建默认配置
Args:
resource_base_directory: 资源基础目录(可选)
ai_model: AI模型名称
**kwargs: 其他配置覆盖项
Returns:
配置实例
"""
config_dict = {}
if resource_base_directory:
config_dict["resources"] = {
"resource_base_directory": resource_base_directory
}
if ai_model != "qwq-plus":
config_dict["ai_model"] = {
"model": ai_model
}
# 应用其他覆盖项
config_dict.update(kwargs)
return AlgorithmConfig.from_dict(config_dict)
def create_content_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建内容生成流水线
Args:
config: 算法配置(可选,默认使用默认配置)
Returns:
包含各个组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"topic_generator": TopicGenerator(config),
"content_generator": ContentGenerator(config),
"content_judger": ContentJudger(config),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
def create_poster_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建海报生成流水线
Args:
config: 算法配置(可选,默认使用默认配置)
Returns:
包含海报生成组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"poster_generator": PosterGenerator(config),
"text_generator": TextGenerator(
config,
AIService(config.ai_model),
PromptManager(config.prompts, config.resources),
JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
),
"template_manager": TemplateManager(config.poster_generation, config.resources),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
def create_document_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建文档处理流水线
Args:
config: 算法配置(可选,默认使用默认配置)
Returns:
包含文档处理组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"document_processor": DocumentProcessor(config),
"text_extractor": TextExtractor(config.document_processing),
"content_integrator": ContentIntegrator(config.document_processing),
"content_transformer": ContentTransformer(config),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
def create_crawling_pipeline(config: AlgorithmConfig = None) -> dict:
"""
创建爬虫分析流水线
Args:
config: 算法配置(可选,默认使用默认配置)
Returns:
包含爬虫分析组件的字典
"""
if config is None:
config = create_default_config()
return {
"config": config,
"xhs_crawler": XHSCrawler(config.web_crawling),
"keyword_analyzer": KeywordAnalyzer(config),
"content_analyzer": ContentAnalyzer(config),
"ai_service": AIService(config.ai_model),
"output_manager": OutputManager(config.output),
"prompt_manager": PromptManager(config.prompts, config.resources),
"json_processor": JSONProcessor(
enable_repair=config.content_generation.enable_json_repair,
max_repair_attempts=config.content_generation.json_repair_attempts
)
}
# 设置日志
import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())