#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Travel Algorithms Package AI驱动的旅游内容生成算法包 这个包提供了一套完整的AI算法,用于生成旅游相关的内容,包括: - 主题生成 - 内容创作 - 内容审核 - 海报生成 - 文档处理 特性: - 配置化设计,支持动态提示词加载 - JSON修复和格式化 - 支持多种AI模型和参数 - 灵活的输出管理 - 完整的错误处理 """ __version__ = "1.0.0" __author__ = "Travel Content Creator Team" # 核心配置和服务 from .config import ( AlgorithmConfig, AIModelConfig, PromptConfig, ContentGenerationConfig, PosterGenerationConfig, DocumentProcessingConfig, OutputConfig, ResourceConfig, TaskModelConfig ) from .core import ( AIService, OutputManager, PromptManager, JSONProcessor ) # 算法模块 from .content_generation import ( TopicGenerator, ContentGenerator, ContentJudger ) # 海报生成算法 from .poster_generation import ( PosterGenerator, TextGenerator, TemplateManager ) # 文档处理算法 from .document_processing import ( DocumentProcessor, TextExtractor, ContentIntegrator, ContentTransformer, ExtractedDocument, IntegratedContent, TransformedContent ) # 网页爬虫算法 from .web_crawling import ( XHSCrawler, XHSNote, XHSSearchResult, SearchConfig, KeywordAnalyzer, KeywordResult, ContentAnalyzer, AnalysisResult ) # 异常类 from .exceptions import ( AlgorithmError, ConfigError, AIServiceError, ContentGenerationError, PosterGenerationError, DocumentProcessingError, ResourceNotFoundError, ValidationError ) # 便捷导入 __all__ = [ # 版本信息 "__version__", "__author__", # 配置类 "AlgorithmConfig", "AIModelConfig", "PromptConfig", "ContentGenerationConfig", "PosterGenerationConfig", "DocumentProcessingConfig", "WebCrawlingConfig", "KeywordAnalysisConfig", "ContentAnalysisConfig", "OutputConfig", "ResourceConfig", "TaskModelConfig", # 核心服务 "AIService", "OutputManager", "PromptManager", "JSONProcessor", # 算法模块 "TopicGenerator", "ContentGenerator", "ContentJudger", "PosterGenerator", "TextGenerator", "TemplateManager", "DocumentProcessor", "TextExtractor", "ContentIntegrator", "ContentTransformer", "XHSCrawler", "KeywordAnalyzer", "ContentAnalyzer", # 数据模型 "ExtractedDocument", "IntegratedContent", "TransformedContent", "XHSNote", "XHSSearchResult", "SearchConfig", "KeywordResult", "AnalysisResult", # 异常类 "AlgorithmError", "ConfigError", "AIServiceError", "ContentGenerationError", "PosterGenerationError", "DocumentProcessingError", "CrawlingError", "ResourceNotFoundError", "ValidationError", # 便捷功能 "create_default_config", "create_content_pipeline", "create_poster_pipeline", "create_document_pipeline", "create_crawling_pipeline", ] def create_default_config( resource_base_directory: str = None, ai_model: str = "qwq-plus", **kwargs ) -> AlgorithmConfig: """ 创建默认配置 Args: resource_base_directory: 资源基础目录(可选) ai_model: AI模型名称 **kwargs: 其他配置覆盖项 Returns: 配置实例 """ config_dict = {} if resource_base_directory: config_dict["resources"] = { "resource_base_directory": resource_base_directory } if ai_model != "qwq-plus": config_dict["ai_model"] = { "model": ai_model } # 应用其他覆盖项 config_dict.update(kwargs) return AlgorithmConfig.from_dict(config_dict) def create_content_pipeline(config: AlgorithmConfig = None) -> dict: """ 创建内容生成流水线 Args: config: 算法配置(可选,默认使用默认配置) Returns: 包含各个组件的字典 """ if config is None: config = create_default_config() return { "config": config, "topic_generator": TopicGenerator(config), "content_generator": ContentGenerator(config), "content_judger": ContentJudger(config), "ai_service": AIService(config.ai_model), "output_manager": OutputManager(config.output), "prompt_manager": PromptManager(config.prompts, config.resources), "json_processor": JSONProcessor( enable_repair=config.content_generation.enable_json_repair, max_repair_attempts=config.content_generation.json_repair_attempts ) } def create_poster_pipeline(config: AlgorithmConfig = None) -> dict: """ 创建海报生成流水线 Args: config: 算法配置(可选,默认使用默认配置) Returns: 包含海报生成组件的字典 """ if config is None: config = create_default_config() return { "config": config, "poster_generator": PosterGenerator(config), "text_generator": TextGenerator( config, AIService(config.ai_model), PromptManager(config.prompts, config.resources), JSONProcessor( enable_repair=config.content_generation.enable_json_repair, max_repair_attempts=config.content_generation.json_repair_attempts ) ), "template_manager": TemplateManager(config.poster_generation, config.resources), "ai_service": AIService(config.ai_model), "output_manager": OutputManager(config.output), "prompt_manager": PromptManager(config.prompts, config.resources), "json_processor": JSONProcessor( enable_repair=config.content_generation.enable_json_repair, max_repair_attempts=config.content_generation.json_repair_attempts ) } def create_document_pipeline(config: AlgorithmConfig = None) -> dict: """ 创建文档处理流水线 Args: config: 算法配置(可选,默认使用默认配置) Returns: 包含文档处理组件的字典 """ if config is None: config = create_default_config() return { "config": config, "document_processor": DocumentProcessor(config), "text_extractor": TextExtractor(config.document_processing), "content_integrator": ContentIntegrator(config.document_processing), "content_transformer": ContentTransformer(config), "ai_service": AIService(config.ai_model), "output_manager": OutputManager(config.output), "prompt_manager": PromptManager(config.prompts, config.resources), "json_processor": JSONProcessor( enable_repair=config.content_generation.enable_json_repair, max_repair_attempts=config.content_generation.json_repair_attempts ) } def create_crawling_pipeline(config: AlgorithmConfig = None) -> dict: """ 创建爬虫分析流水线 Args: config: 算法配置(可选,默认使用默认配置) Returns: 包含爬虫分析组件的字典 """ if config is None: config = create_default_config() return { "config": config, "xhs_crawler": XHSCrawler(config.web_crawling), "keyword_analyzer": KeywordAnalyzer(config), "content_analyzer": ContentAnalyzer(config), "ai_service": AIService(config.ai_model), "output_manager": OutputManager(config.output), "prompt_manager": PromptManager(config.prompts, config.resources), "json_processor": JSONProcessor( enable_repair=config.content_generation.enable_json_repair, max_repair_attempts=config.content_generation.json_repair_attempts ) } # 设置日志 import logging logging.getLogger(__name__).addHandler(logging.NullHandler())