#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 内容转换器模块 使用LLM将解析的文档内容转换为标准化的景区和产品资料格式 """ import logging from typing import Dict, Any, Optional, List from dataclasses import dataclass from datetime import datetime import uuid from .content_integrator import IntegratedContent from core.ai.ai_agent import AIAgent from core.config.manager import ConfigManager from utils.file_io import OutputManager logger = logging.getLogger(__name__) @dataclass class TransformedContent: """转换后的内容""" original_content: IntegratedContent transformed_text: str format_type: str transformation_metadata: Dict[str, Any] transformed_at: datetime class ContentTransformer: """内容转换器 - 将整合的内容转换为指定格式""" def __init__(self, config: Optional[Dict[str, Any]] = None): self.config = config or {} self.supported_formats = { 'attraction_standard': self._transform_to_attraction_standard, 'product_sales': self._transform_to_product_sales, 'travel_guide': self._transform_to_travel_guide, 'blog_post': self._transform_to_blog_post, 'summary': self._transform_to_summary } def transform_content(self, integrated_content: IntegratedContent, format_type: str = 'summary', custom_prompt: Optional[str] = None) -> TransformedContent: """转换内容 Args: integrated_content: 整合后的内容 format_type: 转换格式类型 custom_prompt: 自定义提示词 Returns: TransformedContent: 转换后的内容 """ if format_type not in self.supported_formats: raise ValueError(f"不支持的格式类型: {format_type}") logger.info(f"开始转换内容,格式: {format_type}") # 执行转换 transform_func = self.supported_formats[format_type] transformed_text = transform_func(integrated_content, custom_prompt) # 生成转换元数据 transformation_metadata = { 'format_type': format_type, 'source_document_count': integrated_content.document_count, 'source_content_length': integrated_content.total_content_length, 'transformed_content_length': len(transformed_text), 'key_topics_used': integrated_content.key_topics, 'custom_prompt_used': custom_prompt is not None } return TransformedContent( original_content=integrated_content, transformed_text=transformed_text, format_type=format_type, transformation_metadata=transformation_metadata, transformed_at=datetime.now() ) def _transform_to_attraction_standard(self, content: IntegratedContent, custom_prompt: Optional[str] = None) -> str: """转换为景点标准格式""" template = """ # 景点信息整理 ## 基本信息 - 文档来源: {document_count}个文档 - 主要主题: {key_topics} ## 详细内容 {combined_content} ## 内容摘要 {content_summary} --- *基于提供的文档整理,如需更多信息请参考原始文档* """ return template.format( document_count=content.document_count, key_topics=", ".join(content.key_topics[:5]), combined_content=content.combined_content, content_summary=content.content_summary ) def _transform_to_product_sales(self, content: IntegratedContent, custom_prompt: Optional[str] = None) -> str: """转换为产品销售格式""" template = """ # 产品销售资料 ## 产品特色 基于{document_count}个文档的信息整理: {content_summary} ## 详细介绍 {combined_content} ## 关键卖点 {key_topics} --- *内容整理自提供的文档资料* """ key_points = "\n".join([f"• {topic}" for topic in content.key_topics[:8]]) return template.format( document_count=content.document_count, content_summary=content.content_summary, combined_content=content.combined_content, key_topics=key_points ) def _transform_to_travel_guide(self, content: IntegratedContent, custom_prompt: Optional[str] = None) -> str: """转换为旅游指南格式""" template = """ # 旅游指南 ## 概述 {content_summary} ## 详细信息 {combined_content} ## 重要提示 - 信息来源: {document_count}个文档 - 关键主题: {key_topics} --- *本指南基于提供的文档整理,出行前请核实最新信息* """ return template.format( content_summary=content.content_summary, combined_content=content.combined_content, document_count=content.document_count, key_topics=", ".join(content.key_topics[:5]) ) def _transform_to_blog_post(self, content: IntegratedContent, custom_prompt: Optional[str] = None) -> str: """转换为博客文章格式""" template = """ # 博客文章 ## 前言 本文基于{document_count}个文档资料整理而成。 ## 主要内容 {combined_content} ## 总结 {content_summary} ## 相关主题 {key_topics} --- *本文内容整理自多个文档资料* """ topics_list = "\n".join([f"- {topic}" for topic in content.key_topics[:10]]) return template.format( document_count=content.document_count, combined_content=content.combined_content, content_summary=content.content_summary, key_topics=topics_list ) def _transform_to_summary(self, content: IntegratedContent, custom_prompt: Optional[str] = None) -> str: """转换为摘要格式""" template = """ # 文档内容摘要 ## 文档统计 - 文档数量: {document_count} - 文档类型: {document_types} - 内容长度: {content_length}字符 ## 内容摘要 {content_summary} ## 关键主题 {key_topics} ## 完整内容 {combined_content} """ doc_types = ", ".join([f"{k}({v}个)" for k, v in content.document_types.items()]) topics_list = "\n".join([f"• {topic}" for topic in content.key_topics]) return template.format( document_count=content.document_count, document_types=doc_types, content_length=content.total_content_length, content_summary=content.content_summary, key_topics=topics_list, combined_content=content.combined_content ) def get_supported_formats(self) -> List[str]: """获取支持的格式列表""" return list(self.supported_formats.keys()) def add_custom_format(self, format_name: str, transform_func): """添加自定义格式""" self.supported_formats[format_name] = transform_func logger.info(f"添加自定义格式: {format_name}")