diff --git a/api/routers/__pycache__/content_integration.cpython-312.pyc b/api/routers/__pycache__/content_integration.cpython-312.pyc index dfc1ec3..e677333 100644 Binary files a/api/routers/__pycache__/content_integration.cpython-312.pyc and b/api/routers/__pycache__/content_integration.cpython-312.pyc differ diff --git a/api/routers/content_integration.py b/api/routers/content_integration.py index 284a70d..d248d45 100644 --- a/api/routers/content_integration.py +++ b/api/routers/content_integration.py @@ -71,15 +71,22 @@ async def integrate_content(request: ContentIntegrationRequest) -> ContentIntegr document_info=result["document_info"], xhs_info=result["xhs_info"], integrated_content=result["integrated_content"], - search_config=result["search_config"] + search_config=result["search_config"], + error_message=None # 成功时无错误信息 ) logger.info(f"内容整合成功,处理时间:{result['processing_time']}") else: + from datetime import datetime response = ContentIntegrationResponse( success=False, - timestamp=result["timestamp"], - processing_time=result["processing_time"], - error_message=result["error_message"] + timestamp=result.get("timestamp", datetime.now().strftime("%Y%m%d_%H%M%S")), + processing_time=result.get("processing_time", "0秒"), + input_summary=result.get("input_summary"), + document_info=result.get("document_info"), + xhs_info=result.get("xhs_info"), + integrated_content=result.get("integrated_content"), + search_config=result.get("search_config"), + error_message=result.get("error_message") ) logger.error(f"内容整合失败:{result['error_message']}") diff --git a/core/__init__.py b/core/__init__.py index 72365ef..a179214 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -8,13 +8,8 @@ Core Module 提供内容整合的核心服务,通过适配器模式与xhs_spider和document模块交互 """ -# 导入主要服务 -from .content_integration_service import ( - ContentIntegrationService, - ProcessingConfig, - ProcessingResult, - ProcessingStats -) +# 注意:ContentIntegrationService已移至api.services模块 +# 这里不再导入,以避免与API服务冲突 # 导入适配器 from .xhs_adapter import XHSAdapter, XHSNote, XHSSearchResult @@ -30,14 +25,6 @@ __author__ = "TravelContentCreator Team" # 导出所有公共接口 __all__ = [ - # 主要服务 - 'ContentIntegrationService', - - # 数据模型 - 'ProcessingConfig', - 'ProcessingResult', - 'ProcessingStats', - # 适配器 'XHSAdapter', 'DocumentAdapter', @@ -57,26 +44,4 @@ __all__ = [ '__author__' ] -# 便捷函数 -def create_integration_service(cookie_config_path: str = "cookies.json", - media_storage_path: str = "media", - enable_logging: bool = True) -> ContentIntegrationService: - """ - 创建内容整合服务实例的便捷函数 - - Args: - cookie_config_path: Cookie配置文件路径 - media_storage_path: 媒体存储路径 - enable_logging: 是否启用日志 - - Returns: - ContentIntegrationService: 内容整合服务实例 - """ - return ContentIntegrationService( - cookie_config_path=cookie_config_path, - media_storage_path=media_storage_path, - enable_logging=enable_logging - ) - -# 添加到导出列表 -__all__.append('create_integration_service') +# 注意:ContentIntegrationService及相关模型已移至api.services模块 diff --git a/core/__pycache__/__init__.cpython-312.pyc b/core/__pycache__/__init__.cpython-312.pyc index 72cac26..2959084 100644 Binary files a/core/__pycache__/__init__.cpython-312.pyc and b/core/__pycache__/__init__.cpython-312.pyc differ diff --git a/core/content_integration_service.py b/core/content_integration_service.py deleted file mode 100644 index be2b805..0000000 --- a/core/content_integration_service.py +++ /dev/null @@ -1,349 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -Content Integration Service -内容整合服务 - -core模块的主要服务,通过适配器与xhs_spider和document模块交互 -""" - -import asyncio -from typing import Dict, List, Optional, Any -from dataclasses import dataclass, field -from datetime import datetime -import logging -import uuid - -from .xhs_adapter import XHSAdapter, XHSSearchResult -from .document_adapter import DocumentAdapter, IntegratedContent -from .cookie_manager import CookieManager -from .media_manager import ImageStorageManager - -logger = logging.getLogger(__name__) - -@dataclass -class ProcessingConfig: - """处理配置""" - keyword: str - document_paths: Optional[List[str]] = None - max_notes: int = 20 - output_format: str = "summary" - download_media: bool = True - custom_prompt: Optional[str] = None - -@dataclass -class ProcessingResult: - """处理结果""" - task_id: str - config: ProcessingConfig - success: bool - xhs_result: Optional[XHSSearchResult] = None - document_result: Optional[IntegratedContent] = None - final_content: str = "" - error_message: str = "" - processing_time: float = 0.0 - created_time: datetime = field(default_factory=datetime.now) - -@dataclass -class ProcessingStats: - """处理统计信息""" - total_tasks: int = 0 - successful_tasks: int = 0 - failed_tasks: int = 0 - total_processing_time: float = 0.0 - average_processing_time: float = 0.0 - total_notes_processed: int = 0 - total_documents_processed: int = 0 - start_time: datetime = field(default_factory=datetime.now) - last_updated: datetime = field(default_factory=datetime.now) - -class ContentIntegrationService: - """内容整合服务""" - - def __init__(self, - cookie_config_path: str = "cookies.json", - media_storage_path: str = "media", - enable_logging: bool = True): - """ - 初始化服务 - - Args: - cookie_config_path: Cookie配置文件路径 - media_storage_path: 媒体存储路径 - enable_logging: 是否启用日志 - """ - self.cookie_config_path = cookie_config_path - self.media_storage_path = media_storage_path - self.enable_logging = enable_logging - - # 初始化适配器 - self.xhs_adapter = XHSAdapter(cookie_config_path) - self.document_adapter = DocumentAdapter() - - # 初始化管理器 - self.cookie_manager = CookieManager(cookie_config_path) - self.media_manager = ImageStorageManager(media_storage_path) - - # 结果存储和统计 - self.results: Dict[str, ProcessingResult] = {} - self.stats = ProcessingStats() - - logger.info("内容整合服务初始化完成") - - async def process_content(self, - keyword: str, - document_paths: Optional[List[str]] = None, - output_format: str = "summary", - max_notes: int = 20, - custom_prompt: Optional[str] = None) -> ProcessingResult: - """ - 处理内容整合 - - Args: - keyword: 搜索关键词 - document_paths: 文档路径列表 - output_format: 输出格式 - max_notes: 最大笔记数量 - custom_prompt: 自定义提示词 - - Returns: - ProcessingResult: 处理结果 - """ - task_id = str(uuid.uuid4()) - start_time = datetime.now() - - config = ProcessingConfig( - keyword=keyword, - document_paths=document_paths, - max_notes=max_notes, - output_format=output_format, - custom_prompt=custom_prompt - ) - - result = ProcessingResult( - task_id=task_id, - config=config, - success=False - ) - - try: - # 1. 搜索小红书内容(如果有关键词) - xhs_result = None - if keyword: - logger.info(f"开始搜索小红书内容: {keyword}") - xhs_result = self.xhs_adapter.search_notes( - keyword=keyword, - max_notes=max_notes, - - ) - result.xhs_result = xhs_result - logger.info(f"小红书搜索完成,找到 {len(xhs_result.notes)} 条笔记") - - # 2. 处理文档内容(如果有文档) - document_result = None - if document_paths: - logger.info(f"开始处理文档: {len(document_paths)} 个文件") - document_result = self.document_adapter.integrate_documents(document_paths) - result.document_result = document_result - logger.info(f"文档处理完成,总长度: {document_result.total_length}") - - # 3. 整合内容 - final_content = self._integrate_content( - xhs_result=xhs_result, - document_result=document_result, - output_format=output_format, - custom_prompt=custom_prompt - ) - result.final_content = final_content - - # 4. 下载媒体文件(如果需要) - if config.download_media and xhs_result: - await self._download_media(xhs_result) - - result.success = True - logger.info(f"任务 {task_id} 处理完成") - - except Exception as e: - result.error_message = str(e) - logger.error(f"任务 {task_id} 处理失败: {e}") - - # 计算处理时间 - result.processing_time = (datetime.now() - start_time).total_seconds() - - # 存储结果和更新统计 - self.results[task_id] = result - self._update_statistics(result) - - return result - - async def batch_process(self, - tasks: List[Dict[str, Any]], - output_format: str = "summary") -> List[ProcessingResult]: - """ - 批量处理任务 - - Args: - tasks: 任务列表,每个任务包含keyword和document_paths - output_format: 输出格式 - - Returns: - List[ProcessingResult]: 处理结果列表 - """ - results = [] - - # 并发处理任务 - async def process_task(task_config): - return await self.process_content( - keyword=task_config.get("keyword", ""), - document_paths=task_config.get("document_paths"), - output_format=output_format, - max_notes=task_config.get("max_notes", 20), - custom_prompt=task_config.get("custom_prompt") - ) - - # 使用asyncio.gather并发执行 - tasks_coroutines = [process_task(task) for task in tasks] - results = await asyncio.gather(*tasks_coroutines, return_exceptions=True) - - # 处理异常结果 - processed_results = [] - for i, result in enumerate(results): - if isinstance(result, Exception): - error_result = ProcessingResult( - task_id=str(uuid.uuid4()), - config=ProcessingConfig(keyword=tasks[i].get("keyword", "")), - success=False, - error_message=str(result) - ) - processed_results.append(error_result) - else: - processed_results.append(result) - - return processed_results - - def _integrate_content(self, - xhs_result: Optional[XHSSearchResult], - document_result: Optional[IntegratedContent], - output_format: str, - custom_prompt: Optional[str]) -> str: - """ - 整合内容 - - Args: - xhs_result: 小红书搜索结果 - document_result: 文档处理结果 - output_format: 输出格式 - custom_prompt: 自定义提示词 - - Returns: - str: 整合后的内容 - """ - content_parts = [] - - # 添加小红书内容 - if xhs_result and xhs_result.notes: - xhs_content = "\n".join([ - f"标题: {note.title}\n内容: {note.content}\n作者: {note.author}\n" - for note in xhs_result.notes[:5] # 只取前5条 - ]) - content_parts.append(f"=== 小红书相关内容 ===\n{xhs_content}") - - # 添加文档内容 - if document_result and document_result.integrated_text: - content_parts.append(f"=== 文档相关内容 ===\n{document_result.integrated_text}") - - # 合并内容 - if not content_parts: - return "没有找到相关内容" - - combined_content = "\n\n".join(content_parts) - - # 根据输出格式转换内容 - if self.document_adapter.is_available(): - try: - formatted_content = self.document_adapter.transform_content( - combined_content, - output_format - ) - return formatted_content - except Exception as e: - logger.error(f"内容格式转换失败: {e}") - - # 简单格式化 - return self._simple_format(combined_content, output_format) - - def _simple_format(self, content: str, output_format: str) -> str: - """简单的内容格式化""" - if output_format == "summary": - return f"摘要:\n{content[:500]}..." - elif output_format == "blog_post": - return f"# 博客文章\n\n{content}" - elif output_format == "travel_guide": - return f"# 旅游攻略\n\n{content}" - else: - return content - - async def _download_media(self, xhs_result: XHSSearchResult): - """下载媒体文件""" - try: - for note in xhs_result.notes: - # 下载图片 - for image_url in note.images: - await self.media_manager.download_image(image_url, note.note_id) - - # 下载视频 - for video_url in note.videos: - await self.media_manager.download_video(video_url, note.note_id) - - except Exception as e: - logger.error(f"媒体下载失败: {e}") - - def _update_statistics(self, result: ProcessingResult): - """更新统计信息""" - self.stats.total_tasks += 1 - self.stats.total_processing_time += result.processing_time - - if result.success: - self.stats.successful_tasks += 1 - if result.xhs_result: - self.stats.total_notes_processed += len(result.xhs_result.notes) - if result.document_result: - self.stats.total_documents_processed += len(result.document_result.documents) - else: - self.stats.failed_tasks += 1 - - # 计算平均处理时间 - if self.stats.total_tasks > 0: - self.stats.average_processing_time = ( - self.stats.total_processing_time / self.stats.total_tasks - ) - - self.stats.last_updated = datetime.now() - - def get_statistics(self) -> ProcessingStats: - """获取处理统计信息""" - return self.stats - - def get_result(self, task_id: str) -> Optional[ProcessingResult]: - """获取处理结果""" - return self.results.get(task_id) - - def get_all_results(self) -> Dict[str, ProcessingResult]: - """获取所有处理结果""" - return self.results.copy() - - def clear_results(self): - """清空处理结果""" - self.results.clear() - - def get_status(self) -> Dict[str, Any]: - """获取服务状态""" - return { - "xhs_adapter": self.xhs_adapter.get_status(), - "document_adapter": self.document_adapter.get_status(), - "cookie_manager": self.cookie_manager.get_status() if hasattr(self.cookie_manager, 'get_status') else {}, - "media_manager": self.media_manager.get_status() if hasattr(self.media_manager, 'get_status') else {}, - "statistics": self.stats, - "total_results": len(self.results) - } \ No newline at end of file diff --git a/resource/prompt/integration/system.txt b/resource/prompt/integration/system.txt index a4cb144..abab06f 100644 --- a/resource/prompt/integration/system.txt +++ b/resource/prompt/integration/system.txt @@ -21,24 +21,47 @@ { "attractions": [ { - "name": "景区/酒店名称", + "name": "景区名称", + "address": "景区详细地址", + "trafficInfo": "交通指南详细信息", + "description": "景区空泛描述信息", + "advantage": "景区最大优势", + "highlight": "景区亮点描述", "products": [ { - "product_name": "产品套餐名称", - "usage_rules": "产品使用规则详细说明", + "product_name": "产品名称", + "originPrice": "原始价格", + "realPrice": "实际价格", + "packageInfo": "产品详情完整描述", + "salesPeriod": "销售周期信息", + "stock": "库存数量", + "usage_rules": { + "rules": "使用规则详细说明", + "surcharge": "加价说明", + "reservation": "预约规则", + "refund": "退改政策", + "discounts": "优惠内容" + }, "transportation": { - "guide": "交通指南", + "guide": "交通指南详细信息", "address": "详细地址" }, - "price": "产品价格信息", - "key_advantages": "产品最突出优势", + "key_advantages": "产品最大优势描述", + "highlights": "产品亮点详细描述", "detailed_description": [ - "1. 详细描述项目1", - "2. 详细描述项目2", - "..." + "1. 游泳池尺寸:长xx米,宽xx米,深xx米", + "2. 泳池特色:如无边泳池、恒温泳池等", + "3. 开放时间:xx:xx-xx:xx", + "4. 适合人群:儿童/成人等", + "5. 安全设施:救生员配置等", + "6. 周边景点:xxx景点距离xx米", + "7. 风景描述:如海景、山景等", + "8. 小贴士:需要携带的物品等", + "9. 游玩路线建议", + "10. 推荐游玩时长" ] } ] } ] -} \ No newline at end of file +} \ No newline at end of file diff --git a/utils/__pycache__/prompts.cpython-312.pyc b/utils/__pycache__/prompts.cpython-312.pyc index b3683d3..4e00896 100644 Binary files a/utils/__pycache__/prompts.cpython-312.pyc and b/utils/__pycache__/prompts.cpython-312.pyc differ diff --git a/utils/prompts.py b/utils/prompts.py index 04b06f6..92909ab 100644 --- a/utils/prompts.py +++ b/utils/prompts.py @@ -342,7 +342,7 @@ class ContentPromptBuilder(BasePromptBuilder): style_filename = topic.get("style", "") style_content = self._find_and_read_file(style_filename, self.resource_config.style.paths) or "通用风格" - demand_filename = topic.get("target_audience", "") + demand_filename = topic.get("targetAudience", "") demand_content = self._find_and_read_file(demand_filename, self.resource_config.demand.paths) or "通用用户画像" object_name = topic.get("object", "")