375 lines
44 KiB
JSON
Raw Permalink Normal View History

2025-07-31 15:35:23 +08:00
{
"file_path": "travel-algorithms/travel_algorithms/content_generation/content_generator.py",
"file_size": 12265,
"line_count": 353,
"functions": [
{
"name": "__init__",
"line_start": 26,
"line_end": 48,
"args": [
{
"name": "self"
},
{
"name": "config",
"type_hint": "AlgorithmConfig"
}
],
"return_type": null,
"docstring": "初始化内容生成器\n\nArgs:\n config: 算法配置",
"is_async": false,
"decorators": [],
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容生成器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_generation\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_generation\", {})\n \n logger.info(f\"内容生成器初始化完成,使用模型参数: {self.task_model_config}\")",
"code_hash": "a4aa3983bc905147da0ff79e7a39f46a"
},
{
"name": "_parse_content_result",
"line_start": 151,
"line_end": 181,
"args": [
{
"name": "self"
},
{
"name": "raw_content",
"type_hint": "str"
},
{
"name": "topic",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "解析内容生成结果\n\nArgs:\n raw_content: AI原始输出\n topic: 原始主题信息\n\nReturns:\n 解析后的内容字典",
"is_async": false,
"decorators": [],
"code": " def _parse_content_result(self, raw_content: str, topic: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:\n \"\"\"\n 解析内容生成结果\n\n Args:\n raw_content: AI原始输出\n topic: 原始主题信息\n\n Returns:\n 解析后的内容字典\n \"\"\"\n try:\n # 使用JSON处理器解析\n parsed_data = self.json_processor.parse_llm_output(\n raw_output=raw_content,\n expected_fields=self.field_config.get(\"expected_fields\", [\"title\", \"content\", \"tag\"]),\n required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n )\n\n # 根据实际提示词返回格式进行标准化\n if isinstance(parsed_data, dict):\n content_data = self._normalize_content_data(parsed_data, topic)\n else:\n # 如果不是预期的字典格式,创建默认结构\n content_data = self._create_fallback_content_data(raw_content, topic)\n\n return content_data\n\n except Exception as e:\n logger.warning(f\"JSON解析失败使用回退方案: {e}\")\n return self._create_fallback_content_data(raw_content, topic)",
"code_hash": "adf9e723962b8af5a76599afd15f51ee"
},
{
"name": "_normalize_content_data",
"line_start": 183,
"line_end": 235,
"args": [
{
"name": "self"
},
{
"name": "parsed_data",
"type_hint": "Dict[str, Any]"
},
{
"name": "topic",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "标准化内容数据格式\n\nArgs:\n parsed_data: 解析后的数据\n topic: 原始主题信息\n\nReturns:\n 标准化后的内容字典",
"is_async": false,
"decorators": [],
"code": " def _normalize_content_data(self, parsed_data: Dict[str, Any], topic: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:\n \"\"\"\n 标准化内容数据格式\n\n Args:\n parsed_data: 解析后的数据\n topic: 原始主题信息\n\n Returns:\n 标准化后的内容字典\n \"\"\"\n # 根据实际提示词的输出格式进行标准化\n normalized_data = {\n # 核心字段根据实际system.txt的输出格式\n \"title\": parsed_data.get(\"title\", \"\"),\n \"content\": parsed_data.get(\"content\", \"\"),\n \"tag\": parsed_data.get(\"tag\", \"\"),\n \n # 计算字段\n \"word_count\": len(parsed_data.get(\"content\", \"\")),\n \"title_length\": len(parsed_data.get(\"title\", \"\")),\n \"tag_count\": len(parsed_data.get(\"tag\", \"\").split(\"#\")) - 1 if parsed_data.get(\"tag\") else 0,\n \n # 元数据\n \"metadata\": {\n \"source_topic\": topic,\n \"generation_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"generated_at\": self.output_manager.run_id,\n \"format_version\": \"v1\",\n \"original_parsed_data\": parsed_data\n }\n }\n\n # 验证关键字段\n if not normalized_data[\"title\"] or not normalized_data[\"content\"]:\n logger.warning(\"解析结果缺少关键字段,尝试从原始数据提取\")\n \n # 尝试其他可能的字段名\n alternative_mappings = {\n \"title\": [\"标题\", \"主题\", \"title\", \"headline\"],\n \"content\": [\"内容\", \"正文\", \"content\", \"text\", \"body\"],\n \"tag\": [\"标签\", \"tag\", \"tags\", \"labels\"]\n }\n \n for std_field, alternatives in alternative_mappings.items():\n if not normalized_data[std_field]:\n for alt_field in alternatives:\n if alt_field in parsed_data and parsed_data[alt_field]:\n normalized_data[std_field] = parsed_data[alt_field]\n break\n\n return normalized_data",
"code_hash": "5bfb220ef95ac52397bb577a0a8ce558"
},
{
"name": "_create_fallback_content_data",
"line_start": 237,
"line_end": 283,
"args": [
{
"name": "self"
},
{
"name": "raw_content",
"type_hint": "str"
},
{
"name": "topic",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "创建回退的内容数据当JSON解析失败时\n\nArgs:\n raw_content: 原始内容\n topic: 主题信息\n\nReturns:\n 回退的内容字典",
"is_async": false,
"decorators": [],
"code": " def _create_fallback_content_data(self, raw_content: str, topic: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:\n \"\"\"\n 创建回退的内容数据当JSON解析失败时\n\n Args:\n raw_content: 原始内容\n topic: 主题信息\n\n Returns:\n 回退的内容字典\n \"\"\"\n # 尝试从原始文本中提取结构化信息\n extracted_title = \"\"\n extracted_content = raw_content\n extracted_tag = \"\"\n\n # 简单的文本解析逻辑\n lines = raw_content.split('\\n')\n if lines:\n first_line = lines[0].strip()\n # 如果第一行较短且不以句号结尾,可能是标题\n if len(first_line) < 100 and not first_line.endswith('。'):\n extracted_title = first_line\n extracted_content = '\\n'.join(lines[1:]).strip()\n\n # 寻找标签(通常包含#符号)\n for line in lines:\n if '#' in line:\n extracted_tag = line.strip()\n break\n\n return {\n \"title\": extracted_title,\n \"content\": extracted_content,\n \"tag\": extracted_tag,\n \"word_count\": len(extracted_content),\n \"title_length\": len(extracted_title),\n \"tag_count\": extracted_tag.count('#'),\n \"metadata\": {\n \"source_topic\": topic,\n \"generation_config\": self.task_model_config,\n \"generated_at\": self.output_manager.run_id,\n \"format_version\": \"fallback\",\n \"parsing_method\": \"text_extraction\",\n \"original_raw_content\": raw_content[:500] + \"...\" if len(raw_content) > 500 else raw_content\n }\n }",
"code_hash": "461e0066e4cb7732bacb1e734eff651e"
},
{
"name": "get_generation_stats",
"line_start": 336,
"line_end": 354,
"args": [
{
"name": "self"
}
],
"return_type": "Dict[str, Any]",
"docstring": "获取生成统计信息\n\nReturns:\n 统计信息字典",
"is_async": false,
"decorators": [],
"code": " def get_generation_stats(self) -> Dict[str, Any]:\n \"\"\"\n 获取生成统计信息\n\n Returns:\n 统计信息字典\n \"\"\"\n return {\n \"task_model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"output_directory\": str(self.output_manager.run_output_dir),\n \"ai_model_info\": self.ai_service.get_model_info(),\n \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_generation\", {}),\n \"json_processor_enabled\": self.json_processor.enable_repair,\n \"content_config\": {\n \"enable_auto_judge\": self.config.content_generation.enable_auto_judge,\n \"judge_threshold\": self.config.content_generation.judge_threshold\n }\n } ",
"code_hash": "eb3347b32eb79d5e7ed3b2d84b3ffc31"
}
],
"classes": [
{
"name": "ContentGenerator",
"line_start": 20,
"line_end": 354,
"bases": [],
"methods": [
{
"name": "__init__",
"line_start": 26,
"line_end": 48,
"args": [
{
"name": "self"
},
{
"name": "config",
"type_hint": "AlgorithmConfig"
}
],
"return_type": null,
"docstring": "初始化内容生成器\n\nArgs:\n config: 算法配置",
"is_async": false,
"decorators": [],
"code": " def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容生成器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_generation\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_generation\", {})\n \n logger.info(f\"内容生成器初始化完成,使用模型参数: {self.task_model_config}\")",
"code_hash": "a4aa3983bc905147da0ff79e7a39f46a"
},
{
"name": "generate_content",
"line_start": 50,
"line_end": 149,
"args": [
{
"name": "self"
},
{
"name": "style_content",
"type_hint": "str"
},
{
"name": "demand_content",
"type_hint": "str"
},
{
"name": "object_content",
"type_hint": "str"
},
{
"name": "refer_content",
"type_hint": "str"
},
{
"name": "product_content",
"type_hint": "str"
},
{
"name": "topic",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Tuple[str, Dict[str, Any]]",
"docstring": "生成内容\n\nArgs:\n style_content: 风格内容\n demand_content: 需求内容\n object_content: 对象内容(景区信息)\n refer_content: 参考内容\n product_content: 产品内容\n topic: 主题信息(可选)\n **kwargs: 其他参数\n\nReturns:\n Tuple[请求ID, 生成的内容字典]\n\nRaises:\n ContentGenerationError: 生成失败时抛出",
"is_async": true,
"decorators": [],
"code": " async def generate_content(\n self,\n style_content: str,\n demand_content: str,\n object_content: str,\n refer_content: str,\n product_content: str,\n topic: Optional[Dict[str, Any]] = None,\n **kwargs\n ) -> Tuple[str, Dict[str, Any]]:\n \"\"\"\n 生成内容\n\n Args:\n style_content: 风格内容\n demand_content: 需求内容\n object_content: 对象内容(景区信息)\n refer_content: 参考内容\n product_content: 产品内容\n topic: 主题信息(可选)\n **kwargs: 其他参数\n\n Returns:\n Tuple[请求ID, 生成的内容字典]\n\n Raises:\n ContentGenerationError: 生成失败时抛出\n \"\"\"\n try:\n topic_title = topic.get('title', 'Unknown') if topic else 'Direct Content'\n logger.info(f\"开始生成内容,主题: {topic_title}\")\n\n # 1. 构建提示词\n system_prompt = self.prompt_manager.get_prompt(\"content_generation\", \"system\")\n user_prompt_template = self.prompt_manager.get_prompt(\"content_generation\", \"user\")\n \n # 格式化用户提示词(根据实际模板格式)\n user_prompt = self.prompt_manager.format_prompt(\n user_prompt_template,\n style_content=style_content,\n demand_content=demand_content,\n object_content=object_content,\n refer_content=refer_content,\n product_content=product_content,\n **kwargs\n )\n\n # 创建子目录保存内容\n topic_id = topic.get('id', 'direct_content') if topic else 'direct_content'\n subfolder = f\"topic_{topic_id}\"\n\n # 保存提示词(如果配置允许)\n if self.config.output.save_prompts:\n self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_generation\", subfolder)\n self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_generation\", subfolder)\n\n # 2. 调用AI生成\n content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n system_prompt=system_prompt,\n user_prompt=user_prompt,\n stage=\"内容生成\",\n **self.task_model_config\n )\n\n # 保存原始响应(如果配置允许)\n if self.config.output.save_raw_responses:\n self.output_manager.save_text(content, \"raw_response\", \"content_generation\", subfolder)\n\n # 3. 解析和结构化内容\n content_data = self._parse_content_result(content, topic)\n\n # 4. 保存结果\n self.output_manager.save_json(content_data, \"content\", \"\", subfolder)\n \n # 5. 保存元数据\n metadata = {\n \"topic\": topic,\n \"materials\": {\n \"style_content\": style_content[:100] + \"...\" if len(style_content) > 100 else style_content,\n \"demand_content\": demand_content[:100] + \"...\" if len(demand_content) > 100 else demand_content,\n \"object_content\": object_content[:100] + \"...\" if len(object_content) > 100 else object_content,\n \"product_content\": product_content[:100] + \"...\" if len(product_content) > 100 else product_content\n },\n \"field_config\": self.field_config,\n \"model_config\": self.task_model_config,\n \"tokens\": {\n \"input\": input_tokens,\n \"output\": output_tokens\n },\n \"elapsed_time\": elapsed_time\n }\n self.output_manager.save_metadata(metadata, \"content_generation\", subfolder)\n\n
"code_hash": "196ed64edcbabdd0086aa86557a10918"
},
{
"name": "_parse_content_result",
"line_start": 151,
"line_end": 181,
"args": [
{
"name": "self"
},
{
"name": "raw_content",
"type_hint": "str"
},
{
"name": "topic",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "解析内容生成结果\n\nArgs:\n raw_content: AI原始输出\n topic: 原始主题信息\n\nReturns:\n 解析后的内容字典",
"is_async": false,
"decorators": [],
"code": " def _parse_content_result(self, raw_content: str, topic: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:\n \"\"\"\n 解析内容生成结果\n\n Args:\n raw_content: AI原始输出\n topic: 原始主题信息\n\n Returns:\n 解析后的内容字典\n \"\"\"\n try:\n # 使用JSON处理器解析\n parsed_data = self.json_processor.parse_llm_output(\n raw_output=raw_content,\n expected_fields=self.field_config.get(\"expected_fields\", [\"title\", \"content\", \"tag\"]),\n required_fields=self.field_config.get(\"required_fields\", [\"title\", \"content\"])\n )\n\n # 根据实际提示词返回格式进行标准化\n if isinstance(parsed_data, dict):\n content_data = self._normalize_content_data(parsed_data, topic)\n else:\n # 如果不是预期的字典格式,创建默认结构\n content_data = self._create_fallback_content_data(raw_content, topic)\n\n return content_data\n\n except Exception as e:\n logger.warning(f\"JSON解析失败使用回退方案: {e}\")\n return self._create_fallback_content_data(raw_content, topic)",
"code_hash": "adf9e723962b8af5a76599afd15f51ee"
},
{
"name": "_normalize_content_data",
"line_start": 183,
"line_end": 235,
"args": [
{
"name": "self"
},
{
"name": "parsed_data",
"type_hint": "Dict[str, Any]"
},
{
"name": "topic",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "标准化内容数据格式\n\nArgs:\n parsed_data: 解析后的数据\n topic: 原始主题信息\n\nReturns:\n 标准化后的内容字典",
"is_async": false,
"decorators": [],
"code": " def _normalize_content_data(self, parsed_data: Dict[str, Any], topic: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:\n \"\"\"\n 标准化内容数据格式\n\n Args:\n parsed_data: 解析后的数据\n topic: 原始主题信息\n\n Returns:\n 标准化后的内容字典\n \"\"\"\n # 根据实际提示词的输出格式进行标准化\n normalized_data = {\n # 核心字段根据实际system.txt的输出格式\n \"title\": parsed_data.get(\"title\", \"\"),\n \"content\": parsed_data.get(\"content\", \"\"),\n \"tag\": parsed_data.get(\"tag\", \"\"),\n \n # 计算字段\n \"word_count\": len(parsed_data.get(\"content\", \"\")),\n \"title_length\": len(parsed_data.get(\"title\", \"\")),\n \"tag_count\": len(parsed_data.get(\"tag\", \"\").split(\"#\")) - 1 if parsed_data.get(\"tag\") else 0,\n \n # 元数据\n \"metadata\": {\n \"source_topic\": topic,\n \"generation_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"generated_at\": self.output_manager.run_id,\n \"format_version\": \"v1\",\n \"original_parsed_data\": parsed_data\n }\n }\n\n # 验证关键字段\n if not normalized_data[\"title\"] or not normalized_data[\"content\"]:\n logger.warning(\"解析结果缺少关键字段,尝试从原始数据提取\")\n \n # 尝试其他可能的字段名\n alternative_mappings = {\n \"title\": [\"标题\", \"主题\", \"title\", \"headline\"],\n \"content\": [\"内容\", \"正文\", \"content\", \"text\", \"body\"],\n \"tag\": [\"标签\", \"tag\", \"tags\", \"labels\"]\n }\n \n for std_field, alternatives in alternative_mappings.items():\n if not normalized_data[std_field]:\n for alt_field in alternatives:\n if alt_field in parsed_data and parsed_data[alt_field]:\n normalized_data[std_field] = parsed_data[alt_field]\n break\n\n return normalized_data",
"code_hash": "5bfb220ef95ac52397bb577a0a8ce558"
},
{
"name": "_create_fallback_content_data",
"line_start": 237,
"line_end": 283,
"args": [
{
"name": "self"
},
{
"name": "raw_content",
"type_hint": "str"
},
{
"name": "topic",
"type_hint": "Optional[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Any]",
"docstring": "创建回退的内容数据当JSON解析失败时\n\nArgs:\n raw_content: 原始内容\n topic: 主题信息\n\nReturns:\n 回退的内容字典",
"is_async": false,
"decorators": [],
"code": " def _create_fallback_content_data(self, raw_content: str, topic: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:\n \"\"\"\n 创建回退的内容数据当JSON解析失败时\n\n Args:\n raw_content: 原始内容\n topic: 主题信息\n\n Returns:\n 回退的内容字典\n \"\"\"\n # 尝试从原始文本中提取结构化信息\n extracted_title = \"\"\n extracted_content = raw_content\n extracted_tag = \"\"\n\n # 简单的文本解析逻辑\n lines = raw_content.split('\\n')\n if lines:\n first_line = lines[0].strip()\n # 如果第一行较短且不以句号结尾,可能是标题\n if len(first_line) < 100 and not first_line.endswith('。'):\n extracted_title = first_line\n extracted_content = '\\n'.join(lines[1:]).strip()\n\n # 寻找标签(通常包含#符号)\n for line in lines:\n if '#' in line:\n extracted_tag = line.strip()\n break\n\n return {\n \"title\": extracted_title,\n \"content\": extracted_content,\n \"tag\": extracted_tag,\n \"word_count\": len(extracted_content),\n \"title_length\": len(extracted_title),\n \"tag_count\": extracted_tag.count('#'),\n \"metadata\": {\n \"source_topic\": topic,\n \"generation_config\": self.task_model_config,\n \"generated_at\": self.output_manager.run_id,\n \"format_version\": \"fallback\",\n \"parsing_method\": \"text_extraction\",\n \"original_raw_content\": raw_content[:500] + \"...\" if len(raw_content) > 500 else raw_content\n }\n }",
"code_hash": "461e0066e4cb7732bacb1e734eff651e"
},
{
"name": "generate_content_batch",
"line_start": 285,
"line_end": 311,
"args": [
{
"name": "self"
},
{
"name": "content_requests",
"type_hint": "List[Dict[str, Any]]"
}
],
"return_type": "Dict[str, Dict[str, Any]]",
"docstring": "批量生成内容\n\nArgs:\n content_requests: 内容生成请求列表\n\nReturns:\n 批次ID->内容字典的映射",
"is_async": true,
"decorators": [],
"code": " async def generate_content_batch(\n self,\n content_requests: List[Dict[str, Any]]\n ) -> Dict[str, Dict[str, Any]]:\n \"\"\"\n 批量生成内容\n\n Args:\n content_requests: 内容生成请求列表\n\n Returns:\n 批次ID->内容字典的映射\n \"\"\"\n results = {}\n \n for i, request in enumerate(content_requests):\n try:\n logger.info(f\"批量生成内容 {i+1}/{len(content_requests)}\")\n \n request_id, content_data = await self.generate_content(**request)\n results[f\"request_{i+1}\"] = content_data\n \n except Exception as e:\n logger.error(f\"批量生成第 {i+1} 项失败: {e}\")\n results[f\"request_{i+1}\"] = {\"error\": str(e)}\n \n return results",
"code_hash": "aefbf0a4c79966d70c56a35d0ca43ee3"
},
{
"name": "test_generation",
"line_start": 313,
"line_end": 334,
"args": [
{
"name": "self"
}
],
"return_type": "bool",
"docstring": "测试内容生成功能\n\nReturns:\n 测试是否成功",
"is_async": true,
"decorators": [],
"code": " async def test_generation(self) -> bool:\n \"\"\"\n 测试内容生成功能\n\n Returns:\n 测试是否成功\n \"\"\"\n try:\n test_materials = {\n \"style_content\": \"攻略风格文案\",\n \"demand_content\": \"年轻人周末游需求\",\n \"object_content\": \"上海外滩景区信息\",\n \"refer_content\": \"参考文案范例\",\n \"product_content\": \"外滩一日游产品\"\n }\n \n _, content_data = await self.generate_content(**test_materials)\n return len(content_data.get('content', '')) > 50\n \n except Exception as e:\n logger.error(f\"内容生成测试失败: {e}\")\n return False",
"code_hash": "e66518f839586eee90c7ac8425115fea"
},
{
"name": "get_generation_stats",
"line_start": 336,
"line_end": 354,
"args": [
{
"name": "self"
}
],
"return_type": "Dict[str, Any]",
"docstring": "获取生成统计信息\n\nReturns:\n 统计信息字典",
"is_async": false,
"decorators": [],
"code": " def get_generation_stats(self) -> Dict[str, Any]:\n \"\"\"\n 获取生成统计信息\n\n Returns:\n 统计信息字典\n \"\"\"\n return {\n \"task_model_config\": self.task_model_config,\n \"field_config\": self.field_config,\n \"output_directory\": str(self.output_manager.run_output_dir),\n \"ai_model_info\": self.ai_service.get_model_info(),\n \"prompt_templates\": self.prompt_manager.get_available_templates().get(\"content_generation\", {}),\n \"json_processor_enabled\": self.json_processor.enable_repair,\n \"content_config\": {\n \"enable_auto_judge\": self.config.content_generation.enable_auto_judge,\n \"judge_threshold\": self.config.content_generation.judge_threshold\n }\n } ",
"code_hash": "eb3347b32eb79d5e7ed3b2d84b3ffc31"
}
],
"docstring": "内容生成器 - 重构版本\n负责根据主题生成详细的旅游内容支持真实的字段格式",
"decorators": [],
"code": "class ContentGenerator:\n \"\"\"\n 内容生成器 - 重构版本\n 负责根据主题生成详细的旅游内容,支持真实的字段格式\n \"\"\"\n\n def __init__(self, config: AlgorithmConfig):\n \"\"\"\n 初始化内容生成器\n\n Args:\n config: 算法配置\n \"\"\"\n self.config = config\n self.ai_service = AIService(config.ai_model)\n self.output_manager = OutputManager(config.output)\n self.prompt_manager = PromptManager(config.prompts, config.resources)\n \n # 初始化JSON处理器\n self.json_processor = JSONProcessor(\n enable_repair=config.content_generation.enable_json_repair,\n max_repair_attempts=config.content_generation.json_repair_attempts\n )\n \n # 获取任务特定的模型配置和字段配置\n self.task_model_config = config.ai_model.get_task_config(\"content_generation\")\n self.field_config = config.content_generation.result_field_mapping.get(\"content_generation\", {})\n \n logger.info(f\"内容生成器初始化完成,使用模型参数: {self.task_model_config}\")\n\n async def generate_content(\n self,\n style_content: str,\n demand_content: str,\n object_content: str,\n refer_content: str,\n product_content: str,\n topic: Optional[Dict[str, Any]] = None,\n **kwargs\n ) -> Tuple[str, Dict[str, Any]]:\n \"\"\"\n 生成内容\n\n Args:\n style_content: 风格内容\n demand_content: 需求内容\n object_content: 对象内容(景区信息)\n refer_content: 参考内容\n product_content: 产品内容\n topic: 主题信息(可选)\n **kwargs: 其他参数\n\n Returns:\n Tuple[请求ID, 生成的内容字典]\n\n Raises:\n ContentGenerationError: 生成失败时抛出\n \"\"\"\n try:\n topic_title = topic.get('title', 'Unknown') if topic else 'Direct Content'\n logger.info(f\"开始生成内容,主题: {topic_title}\")\n\n # 1. 构建提示词\n system_prompt = self.prompt_manager.get_prompt(\"content_generation\", \"system\")\n user_prompt_template = self.prompt_manager.get_prompt(\"content_generation\", \"user\")\n \n # 格式化用户提示词(根据实际模板格式)\n user_prompt = self.prompt_manager.format_prompt(\n user_prompt_template,\n style_content=style_content,\n demand_content=demand_content,\n object_content=object_content,\n refer_content=refer_content,\n product_content=product_content,\n **kwargs\n )\n\n # 创建子目录保存内容\n topic_id = topic.get('id', 'direct_content') if topic else 'direct_content'\n subfolder = f\"topic_{topic_id}\"\n\n # 保存提示词(如果配置允许)\n if self.config.output.save_prompts:\n self.output_manager.save_text(system_prompt, \"system_prompt\", \"content_generation\", subfolder)\n self.output_manager.save_text(user_prompt, \"user_prompt\", \"content_generation\", subfolder)\n\n # 2. 调用AI生成\n content, input_tokens, output_tokens, elapsed_time = await self.ai_service.generate_text(\n system_prompt=system_prompt,\n user_prompt=user_prompt,\n stage=\"内容生成\",\n **self.task_model_config\n )\n\n # 保存原始响应(如果配置允许)\n if self.config.output.save_raw_responses:\n self.output_manager.save_text(content, \"raw_response\", \"content_generation\", subfolder)\n\n # 3. \n content_data = self._parse_content_result(content, topic)\n
"code_hash": "274f5ef2a9e350ed1c1759427106d4f3"
}
],
"imports": [
{
"type": "import",
"modules": [
"logging"
],
"aliases": []
},
{
"type": "from_import",
"module": "typing",
"names": [
"Dict",
"Any",
"List",
"Optional",
"Tuple"
],
"aliases": [],
"level": 0
},
{
"type": "import",
"modules": [
"json"
],
"aliases": []
},
{
"type": "from_import",
"module": "config",
"names": [
"AlgorithmConfig"
],
"aliases": [],
"level": 2
},
{
"type": "from_import",
"module": "core",
"names": [
"AIService",
"OutputManager",
"PromptManager",
"JSONProcessor"
],
"aliases": [],
"level": 2
},
{
"type": "from_import",
"module": "exceptions",
"names": [
"ContentGenerationError"
],
"aliases": [],
"level": 2
}
],
"constants": [],
"docstring": "Content Generator\n内容生成器 - 重构版本使用动态提示词和JSON处理支持真实的内容格式",
"content_hash": "fc162f397f337e584d5951ca18c75f9a"
}