diff --git a/tests/test_integration_simple.py b/tests/test_integration_simple.py
new file mode 100644
index 0000000..dd8486f
--- /dev/null
+++ b/tests/test_integration_simple.py
@@ -0,0 +1,688 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+简化版离线整合测试脚本
+使用已有数据测试integration模块的不同效果
+调用真实AI进行内容整合
+"""
+
+import os
+import json
+import asyncio
+from typing import Dict, List, Optional, Any
+from datetime import datetime
+from pathlib import Path
+from dataclasses import dataclass
+
+# 添加项目根目录到路径
+import sys
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+
+# 导入核心模块
+from core.ai.ai_agent import AIAgent
+from core.config import ConfigManager, AIModelConfig
+from utils.prompts import PromptTemplate
+
+@dataclass
+class SimpleNote:
+    """简化的笔记数据模型"""
+    note_id: str
+    title: str
+    content: str
+    author: str
+    likes: int
+    comments: int
+    shares: int
+    note_url: str
+    images: List[str]
+    
+@dataclass 
+class SimpleSearchResult:
+    """简化的搜索结果"""
+    keyword: str
+    notes: List[SimpleNote]
+    total_count: int
+    success: bool
+
+class OfflineIntegrationTester:
+    """离线整合测试器"""
+    
+    def __init__(self, data_file: str):
+        """
+        初始化测试器
+        
+        Args:
+            data_file: 离线数据文件路径
+        """
+        self.data_file = data_file
+        self.raw_data = None
+        self.processed_notes = []
+        
+        # 初始化配置管理器和AI代理
+        self.config_manager = ConfigManager()
+        self.config_manager.load_from_directory("config", server_mode=True)
+        
+        # 获取AI配置并初始化AI代理
+        ai_config = self.config_manager.get_config('ai_model', AIModelConfig)
+        self.ai_agent = AIAgent(ai_config)
+        
+        # 加载integration prompt模板
+        self.prompt_template = PromptTemplate(
+            system_prompt_path="resource/prompt/integration/system.txt",
+            user_prompt_path="resource/prompt/integration/user.txt"
+        )
+        
+        # 加载数据
+        self._load_data()
+        
+        print(f"✅ 离线测试器初始化完成，加载了 {len(self.processed_notes)} 条笔记数据")
+        print(f"🤖 AI代理已初始化，模型: {ai_config.model}")
+    
+    def _load_data(self):
+        """加载离线数据"""
+        try:
+            with open(self.data_file, 'r', encoding='utf-8') as f:
+                self.raw_data = json.load(f)
+            
+            # 转换为SimpleNote对象
+            if 'results' in self.raw_data and self.raw_data['results']:
+                result = self.raw_data['results'][0]  # 取第一个搜索结果
+                
+                for note_data in result.get('notes', []):
+                    note = SimpleNote(
+                        note_id=note_data.get('note_id', ''),
+                        title=note_data.get('title', ''),
+                        content=note_data.get('content', ''),
+                        author=note_data.get('author', ''),
+                        likes=note_data.get('like_count', 0),
+                        comments=note_data.get('comment_count', 0),
+                        shares=note_data.get('share_count', 0),
+                        note_url=note_data.get('note_url', ''),
+                        images=note_data.get('images', [])
+                    )
+                    self.processed_notes.append(note)
+                    
+        except Exception as e:
+            print(f"❌ 数据加载失败: {e}")
+            self.processed_notes = []
+    
+    def get_search_result(self, keyword: str = None) -> SimpleSearchResult:
+        """获取搜索结果"""
+        if not keyword:
+            keyword = self.raw_data['results'][0]['keyword'] if self.raw_data else '测试关键词'
+        
+        return SimpleSearchResult(
+            keyword=keyword,
+            notes=self.processed_notes,
+            total_count=len(self.processed_notes),
+            success=True
+        )
+    
+    def create_mock_documents(self, content_type: str = "travel_guide") -> Dict[str, str]:
+        """创建模拟文档内容"""
+        
+        mock_docs = {
+            "travel_guide": {
+                "上海馥桂萌宠园游玩攻略.txt": """上海馥桂萌宠园游玩攻略
+
+一、园区简介
+馥桂萌宠园位于上海市嘉定区，是一个集萌宠互动、户外休闲、亲子娱乐为一体的综合性主题公园。
+
+二、开放信息
+- 开放时间：9:00-17:00（周一至周日）
+- 门票价格：成人票80元，儿童票50元（1.2米以下免费）
+- 地址：上海市嘉定区朱桥镇
+- 交通：地铁11号线嘉定西站转公交
+
+三、主要区域
+1. 萌宠互动区：可与羊驼、袋鼠、小兔子等动物亲密接触
+2. 户外游乐区：秋千、滑梯等儿童设施
+3. 餐饮休息区：提供简餐和饮品
+4. 桂花林区：春秋季节桂花飘香，适合拍照
+
+四、游玩建议
+- 建议游玩时间：3-4小时
+- 最佳游玩季节：春秋两季
+- 携带物品：防晒用品、湿巾、零食
+- 注意事项：爱护动物，文明游园""",
+                
+                "萌宠园交通指南.md": """# 馥桂萌宠园交通指南
+
+## 公共交通
+### 地铁+公交
+1. 地铁11号线至嘉定西站
+2. 转乘嘉定69路公交至朱桥站
+3. 步行约10分钟即可到达
+
+## 自驾路线
+从市区出发：
+1. 走外环高速(A20)
+2. 转嘉定环线(A5)
+3. 在朱桥出口下高速
+4. 沿指示牌行驶约5分钟
+
+## 停车信息
+- 园区提供免费停车场
+- 停车位约200个
+- 节假日建议早到，避免停车位紧张"""
+            },
+            
+            "product_info": {
+                "门票套餐信息.txt": """门票套餐信息
+
+基础门票：
+- 成人票：80元/人
+- 儿童票：50元/人（3-12岁）
+- 老年票：60元/人（65岁以上）
+- 免票：1.2米以下儿童
+
+套餐选择：
+1. 家庭套票（2大1小）：180元
+2. 亲子套票（1大1小）：120元  
+3. 团体票（10人以上）：70元/人
+
+增值服务：
+- 动物喂食包：20元/份
+- 拍照服务：50元/次
+- 导览服务：100元/团"""
+            },
+            
+            "user_reviews": {
+                "游客评价汇总.txt": """游客评价汇总
+
+正面评价：
+✅ 动物种类丰富，互动性强
+✅ 环境优美，适合拍照
+✅ 工作人员服务态度好
+✅ 停车方便，交通便利
+✅ 价格合理，性价比高
+
+负面反馈：
+❌ 节假日人流量大
+❌ 餐饮选择相对有限
+❌ 部分设施需要维护
+❌ 雨天游玩体验一般
+
+综合评分：4.2/5.0"""
+            }
+        }
+        
+        return mock_docs.get(content_type, mock_docs["travel_guide"])
+    
+    def _format_xhs_notes(self, notes: List[SimpleNote]) -> str:
+        """格式化小红书笔记内容"""
+        if not notes:
+            return "暂无相关笔记内容"
+        
+        formatted_notes = []
+        for i, note in enumerate(notes, 1):
+            note_content = f"""第{i}条笔记：
+标题：{note.title}
+作者：{note.author}
+内容：{note.content if note.content else '（无文字内容）'}
+互动数据：👍{note.likes} 💬{note.comments} 🔄{note.shares}
+笔记链接：{note.note_url}
+"""
+            if note.images:
+                note_content += f"图片数量：{len(note.images)}张\n"
+            
+            formatted_notes.append(note_content)
+        
+        return "\n".join(formatted_notes)
+    
+    def _format_document_content(self, documents: Dict[str, str]) -> str:
+        """格式化文档内容"""
+        if not documents:
+            return "暂无文档内容"
+        
+        formatted_docs = []
+        for filename, content in documents.items():
+            doc_content = f"""文档：{filename}
+内容：
+{content}
+"""
+            formatted_docs.append(doc_content)
+        
+        return "\n".join(formatted_docs)
+    
+    def test_basic_integration(self):
+        """测试基础整合功能"""
+        print("\n" + "="*60)
+        print("🔄 测试基础整合功能")
+        print("="*60)
+        
+        # 获取数据
+        search_result = self.get_search_result("上海馥桂萌宠园攻略")
+        documents = self.create_mock_documents("travel_guide")
+        
+        print(f"📚 文档处理结果：")
+        print(f"   - 文档数量：{len(documents)}")
+        total_doc_length = sum(len(content) for content in documents.values())
+        print(f"   - 总内容长度：{total_doc_length}")
+        
+        print(f"\n📱 小红书数据：")
+        print(f"   - 笔记数量：{len(search_result.notes)}")
+        print(f"   - 总点赞数：{sum(note.likes for note in search_result.notes)}")
+        print(f"   - 总评论数：{sum(note.comments for note in search_result.notes)}")
+        
+        # 显示部分内容示例
+        print(f"\n📝 内容示例：")
+        first_doc = list(documents.values())[0]
+        print(f"   文档摘要：{first_doc[:100]}...")
+        
+        if search_result.notes:
+            popular_note = max(search_result.notes, key=lambda x: x.likes)
+            print(f"   热门笔记：{popular_note.title} (👍{popular_note.likes})")
+        
+        return {
+            "search_result": search_result,
+            "documents": documents
+        }
+    
+    def test_different_document_types(self):
+        """测试不同文档类型的整合效果"""
+        print("\n" + "="*60)
+        print("📊 测试不同文档类型的整合效果")
+        print("="*60)
+        
+        doc_types = ["travel_guide", "product_info", "user_reviews"]
+        results = {}
+        
+        for doc_type in doc_types:
+            print(f"\n🔸 处理文档类型：{doc_type}")
+            
+            documents = self.create_mock_documents(doc_type)
+            total_length = sum(len(content) for content in documents.values())
+            
+            results[doc_type] = {
+                "document_count": len(documents),
+                "content_length": total_length,
+                "files": list(documents.keys())
+            }
+            
+            print(f"   ✅ 文档数量：{results[doc_type]['document_count']}")
+            print(f"   ✅ 内容长度：{results[doc_type]['content_length']}")
+            print(f"   ✅ 文件列表：{results[doc_type]['files']}")
+        
+        return results
+    
+    def test_content_filtering(self):
+        """测试内容过滤和筛选"""
+        print("\n" + "="*60)
+        print("🔍 测试内容过滤和筛选")
+        print("="*60)
+        
+        search_result = self.get_search_result()
+        
+        # 按点赞数筛选
+        high_quality_notes = [note for note in search_result.notes if note.likes >= 50]
+        print(f"📈 高质量笔记（点赞≥50）：{len(high_quality_notes)} 条")
+        
+        # 按评论数筛选
+        interactive_notes = [note for note in search_result.notes if note.comments >= 10]
+        print(f"💬 高互动笔记（评论≥10）：{len(interactive_notes)} 条")
+        
+        # 按标题关键词筛选
+        guide_notes = [note for note in search_result.notes if '攻略' in note.title or '指南' in note.title]
+        print(f"📋 攻略类笔记：{len(guide_notes)} 条")
+        
+        # 显示统计信息
+        if search_result.notes:
+            avg_likes = sum(note.likes for note in search_result.notes) / len(search_result.notes)
+            avg_comments = sum(note.comments for note in search_result.notes) / len(search_result.notes)
+            
+            print(f"\n📊 统计信息：")
+            print(f"   平均点赞数：{avg_likes:.1f}")
+            print(f"   平均评论数：{avg_comments:.1f}")
+            
+            # 找出最受欢迎的笔记
+            most_liked = max(search_result.notes, key=lambda x: x.likes)
+            most_commented = max(search_result.notes, key=lambda x: x.comments)
+            
+            print(f"\n🏆 最受欢迎：")
+            print(f"   最多点赞：《{most_liked.title}》- {most_liked.likes} 赞")
+            print(f"   最多评论：《{most_commented.title}》- {most_commented.comments} 评论")
+        
+        return {
+            "total_notes": len(search_result.notes),
+            "high_quality_notes": len(high_quality_notes),
+            "interactive_notes": len(interactive_notes), 
+            "guide_notes": len(guide_notes)
+        }
+    
+    async def test_real_ai_integration(self):
+        """测试真实AI整合功能"""
+        print("\n" + "="*60)
+        print("🤖 测试真实AI整合功能")
+        print("="*60)
+        
+        try:
+            # 准备数据
+            search_result = self.get_search_result()
+            documents = self.create_mock_documents("travel_guide")
+            
+            print("🔄 正在准备数据...")
+            
+            # 格式化文档内容
+            document_content = self._format_document_content(documents)
+            
+            # 格式化小红书笔记内容
+            xhs_content = self._format_xhs_notes(search_result.notes)
+            
+            # 构建prompt
+            system_prompt = self.prompt_template.get_system_prompt()
+            user_prompt = self.prompt_template.build_user_prompt(
+                keywords=search_result.keyword or "馥桂萌宠园",
+                document_content=document_content or "暂无文档内容",
+                xhs_notes_content=xhs_content or "暂无笔记内容"
+            )
+            
+            print(f"📝 System Prompt长度: {len(system_prompt)} 字符")
+            print(f"📝 User Prompt长度: {len(user_prompt)} 字符")
+            
+            # 调用真实AI
+            print("🔄 正在调用AI进行内容整合...")
+            start_time = datetime.now()
+            
+            response_text, input_tokens, output_tokens, time_cost = await self.ai_agent.generate_text(
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                use_stream=True,
+                stage="content_integration",
+            )
+            
+            end_time = datetime.now()
+            
+            print("✅ AI整合完成")
+            print(f"📄 生成内容长度：{len(response_text)} 字符")
+            print(f"🎯 输入Token数：{input_tokens}")
+            print(f"🎯 输出Token数：{output_tokens}")
+            print(f"⏱️ 处理时间：{time_cost:.2f} 秒")
+            
+            # 尝试解析JSON
+            try:
+                # 使用file_io模块的JSON处理功能
+                from utils.file_io import process_llm_json_text
+                parsed_json = process_llm_json_text(response_text)
+                
+                if parsed_json:
+                    print("✅ JSON解析成功")
+                    print(f"📊 解析结果包含 {len(parsed_json)} 个顶级字段")
+                    
+                    # 显示解析后的结构
+                    if isinstance(parsed_json, dict):
+                        print("🏗️ 数据结构：")
+                        for key, value in parsed_json.items():
+                            if isinstance(value, dict):
+                                print(f"   {key}: 包含 {len(value)} 个子字段")
+                            elif isinstance(value, list):
+                                print(f"   {key}: 列表，包含 {len(value)} 个项目")
+                            else:
+                                print(f"   {key}: {type(value).__name__}")
+                    
+                    formatted_response = json.dumps(parsed_json, ensure_ascii=False, indent=2)
+                else:
+                    print("⚠️ JSON解析失败，使用原始响应")
+                    formatted_response = response_text
+                    
+            except Exception as parse_error:
+                print(f"⚠️ JSON解析出错: {parse_error}")
+                formatted_response = response_text
+            
+            print(f"\n📝 AI响应预览：")
+            preview_text = formatted_response[:500] + "..." if len(formatted_response) > 500 else formatted_response
+            print(preview_text)
+            
+            return {
+                "ai_response": formatted_response,
+                "content_length": len(response_text),
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "processing_time": time_cost,
+                "source_notes": len(search_result.notes),
+                "source_docs": len(documents),
+                "json_parsed": parsed_json is not None
+            }
+            
+        except Exception as e:
+            print(f"❌ AI整合测试失败：{e}")
+            import traceback
+            traceback.print_exc()
+            return None
+    
+    def test_export_formats(self):
+        """测试不同导出格式"""
+        print("\n" + "="*60)
+        print("📤 测试不同导出格式")
+        print("="*60)
+        
+        search_result = self.get_search_result()
+        documents = self.create_mock_documents("travel_guide")
+        
+        # 模拟不同格式的导出
+        formats = {
+            "summary": "简要摘要格式",
+            "blog_post": "博客文章格式", 
+            "travel_guide": "旅游攻略格式",
+            "product_sales": "产品销售格式",
+            "attraction_standard": "景点标准格式"
+        }
+        
+        export_results = {}
+        
+        for format_type, description in formats.items():
+            print(f"🔸 生成{description}...")
+            
+            # 模拟不同格式的内容生成
+            mock_content = self._generate_format_content(format_type, search_result, documents)
+            
+            export_results[format_type] = {
+                "content": mock_content,
+                "length": len(mock_content),
+                "format": format_type
+            }
+            
+            print(f"   ✅ 长度：{len(mock_content)} 字符")
+        
+        return export_results
+    
+    def _generate_format_content(self, format_type: str, search_result: SimpleSearchResult, documents: Dict[str, str]) -> str:
+        """根据格式类型生成相应内容"""
+        
+        base_info = f"基于{len(search_result.notes)}篇小红书游记和官方资料"
+        
+        if format_type == "summary":
+            return f"【简要摘要】{base_info}，馥桂萌宠园是嘉定区优质亲子游目的地，特色萌宠互动体验，适合周末家庭出游。"
+        
+        elif format_type == "blog_post":
+            return f"""# 周末带娃新选择 | 上海馥桂萌宠园深度体验
+
+今天想和大家分享一个超棒的亲子游目的地——位于嘉定的馥桂萌宠园！{base_info}，这里真的是遛娃神器！
+
+## 为什么推荐这里？
+🦘 可以近距离接触羊驼、袋鼠等萌宠
+🌸 环境优美，特别是桂花季节
+👨‍👩‍👧‍👦 设施完善，非常适合家庭游
+
+详细攻略请看下文..."""
+
+        elif format_type == "travel_guide":
+            return f"""## 上海馥桂萌宠园游玩攻略
+
+### 基本信息
+- 地址：上海市嘉定区朱桥镇
+- 门票：成人80元，儿童50元
+- 开放时间：9:00-17:00
+
+### 交通指南
+地铁11号线嘉定西站转公交，或自驾直达
+
+### 游玩亮点
+{base_info}，园区主要特色包括萌宠互动、户外游乐、休闲拍照等。
+
+### 注意事项
+建议游玩3-4小时，春秋季节体验最佳。"""
+
+        elif format_type == "product_sales":
+            return f"""🔥【限时特惠】上海馥桂萌宠园门票
+
+⭐ 产品亮点：
+• {base_info}，口碑爆棚！
+• 🦘 独特萌宠互动体验
+• 🌸 网红拍照打卡圣地  
+• 👨‍👩‍👧‍👦 亲子游首选目的地
+
+💰 特价套餐：
+原价130元，现价只要99元！
+包含：成人门票1张 + 动物喂食包1份
+
+📞 立即预订：xxx-xxxx-xxxx"""
+
+        else:  # attraction_standard
+            return f"""景点名称：上海馥桂萌宠园
+景点类型：主题公园/动物园
+地理位置：上海市嘉定区朱桥镇
+开放状态：正常营业
+门票价格：80-50元
+适宜人群：亲子家庭
+推荐指数：4.2/5.0
+游玩时长：3-4小时
+最佳季节：春秋两季
+
+{base_info}，具有良好的口碑和游客体验。"""
+    
+    def test_parameter_effects(self):
+        """测试不同参数的效果"""
+        print("\n" + "="*60)
+        print("⚙️ 测试不同参数的效果")
+        print("="*60)
+        
+        search_result = self.get_search_result()
+        
+        # 测试不同的过滤阈值
+        thresholds = [
+            {"likes": 20, "comments": 5},
+            {"likes": 50, "comments": 10},
+            {"likes": 100, "comments": 20}
+        ]
+        
+        print("🔧 测试不同过滤阈值的效果：")
+        for i, threshold in enumerate(thresholds, 1):
+            high_likes = [note for note in search_result.notes if note.likes >= threshold["likes"]]
+            high_comments = [note for note in search_result.notes if note.comments >= threshold["comments"]]
+            
+            print(f"   阈值{i}（点赞≥{threshold['likes']}, 评论≥{threshold['comments']}）:")
+            print(f"     - 符合点赞条件：{len(high_likes)} 条")
+            print(f"     - 符合评论条件：{len(high_comments)} 条")
+        
+        # 测试不同的内容长度限制
+        length_limits = [100, 200, 500]
+        
+        print(f"\n📏 测试不同内容长度限制的效果：")
+        for limit in length_limits:
+            documents = self.create_mock_documents("travel_guide")
+            truncated_docs = {}
+            for filename, content in documents.items():
+                truncated_docs[filename] = content[:limit] + "..." if len(content) > limit else content
+            
+            total_length = sum(len(content) for content in truncated_docs.values())
+            print(f"   限制{limit}字符：总长度 {total_length} 字符")
+        
+        return {
+            "threshold_tests": len(thresholds),
+            "length_tests": len(length_limits)
+        }
+
+async def main():
+    """主函数"""
+    print("🚀 启动真实AI离线整合测试...")
+    
+    # 检查数据文件
+    data_file = "batch_search_20250717_104407.json"
+    if not os.path.exists(data_file):
+        print(f"❌ 数据文件不存在：{data_file}")
+        return
+    
+    # 初始化测试器
+    try:
+        tester = OfflineIntegrationTester(data_file)
+    except Exception as e:
+        print(f"❌ 测试器初始化失败：{e}")
+        import traceback
+        traceback.print_exc()
+        return
+    
+    try:
+        # 执行各种测试
+        print(f"\n{'='*60}")
+        print("🧪 开始执行真实AI离线整合测试套件")
+        print(f"{'='*60}")
+        
+        # 1. 基础整合测试
+        basic_result = tester.test_basic_integration()
+        
+        # 2. 不同文档类型测试
+        doc_type_results = tester.test_different_document_types()
+        
+        # 3. 内容过滤测试
+        filter_results = tester.test_content_filtering()
+        
+        # 4. 真实AI整合测试（异步）
+        ai_results = await tester.test_real_ai_integration()
+        with open("ai_results.json", "w") as f:
+            json.dump(ai_results, f, ensure_ascii=False, indent=2)
+        # 5. 导出格式测试
+        export_results = tester.test_export_formats()
+        
+        # 6. 参数效果测试
+        param_results = tester.test_parameter_effects()
+        
+        # 输出测试总结
+        print(f"\n{'='*60}")
+        print("📋 测试结果总结")
+        print(f"{'='*60}")
+        
+        print(f"✅ 基础整合测试：成功")
+        print(f"✅ 文档类型测试：处理了 {len(doc_type_results)} 种类型")
+        print(f"✅ 内容过滤测试：识别出 {filter_results['high_quality_notes']} 条高质量笔记")
+        
+        if ai_results:
+            print(f"✅ 真实AI整合测试：成功")
+            print(f"   - 处理时间：{ai_results['processing_time']:.2f}秒")
+            print(f"   - 输入Token：{ai_results['input_tokens']}")
+            print(f"   - 输出Token：{ai_results['output_tokens']}")
+            print(f"   - JSON解析：{'成功' if ai_results['json_parsed'] else '失败'}")
+        else:
+            print(f"❌ 真实AI整合测试：失败")
+        
+        # print(f"✅ 导出格式测试：生成了 {len(export_results)} 种格式")
+        # print(f"✅ 参数效果测试：测试了 {param_results['threshold_tests']} 种阈值配置")
+        
+        # print(f"\n🎉 所有测试完成！")
+        
+        # # 提供测试建议
+        # print(f"\n💡 测试建议：")
+        # print(f"   1. 可以调整AI模型参数（temperature、top_p等）来优化输出质量")
+        # print(f"   2. 可以修改integration prompt模板来改进整合效果")
+        # print(f"   3. 可以测试不同的文档类型组合")
+        # print(f"   4. 可以调整内容过滤的阈值")
+        # print(f"   5. 可以测试不同的导出格式效果")
+        
+        # # 显示原始数据概览
+        # if tester.raw_data:
+        #     summary = tester.raw_data.get('summary', {})
+        #     print(f"\n📊 原始数据概览：")
+        #     print(f"   - 总关键词数：{summary.get('total_keywords', 0)}")
+        #     print(f"   - 成功搜索数：{summary.get('successful_searches', 0)}")
+        #     print(f"   - 总笔记数：{summary.get('total_notes', 0)}")
+        #     print(f"   - 总互动数：{summary.get('total_interactions', 0)}")
+        
+    except Exception as e:
+        print(f"❌ 测试过程中发生错误：{e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    asyncio.run(main()) 
\ No newline at end of file