2025-12-08 14:58:35 +08:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
内容生成引擎 V2
|
|
|
|
|
|
- 不访问数据库,接收完整数据
|
|
|
|
|
|
- 使用 PromptRegistry 管理 prompt
|
|
|
|
|
|
- 统一依赖注入
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import logging
|
2025-12-09 21:16:44 +08:00
|
|
|
|
from typing import Dict, Any, Optional, List
|
2025-12-08 14:58:35 +08:00
|
|
|
|
|
|
|
|
|
|
from .base import BaseAIGCEngine, EngineResult
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ContentGenerateEngineV2(BaseAIGCEngine):
|
|
|
|
|
|
"""
|
|
|
|
|
|
内容生成引擎 V2
|
|
|
|
|
|
|
|
|
|
|
|
改进:
|
|
|
|
|
|
1. 不访问数据库,所有数据由调用方传入
|
|
|
|
|
|
2. 使用 PromptRegistry 管理 prompt
|
|
|
|
|
|
3. 接收完整对象而非 ID
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
2025-12-09 21:16:44 +08:00
|
|
|
|
engine_id = "content_generate"
|
|
|
|
|
|
engine_name = "内容生成"
|
|
|
|
|
|
version = "2.1.0"
|
|
|
|
|
|
description = "根据选题信息生成小红书风格的营销文案"
|
2025-12-08 14:58:35 +08:00
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
super().__init__()
|
|
|
|
|
|
self._prompt_registry = None
|
2025-12-09 21:16:44 +08:00
|
|
|
|
self._reference_manager = None
|
2025-12-08 14:58:35 +08:00
|
|
|
|
|
|
|
|
|
|
def get_param_schema(self) -> Dict[str, Any]:
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"""
|
|
|
|
|
|
定义参数结构
|
|
|
|
|
|
|
|
|
|
|
|
V2.2: 合并 scenic_spot 和 product 为 subject
|
|
|
|
|
|
"""
|
2025-12-08 14:58:35 +08:00
|
|
|
|
return {
|
|
|
|
|
|
# 选题信息
|
|
|
|
|
|
"topic": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": True,
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"desc": "选题信息 {index, date, title, subject_name, product_name, style, audience, ...}",
|
2025-12-08 14:58:35 +08:00
|
|
|
|
},
|
|
|
|
|
|
|
2025-12-09 21:16:44 +08:00
|
|
|
|
# 主体信息 (景区+产品合并)
|
|
|
|
|
|
"subject": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": False,
|
|
|
|
|
|
"desc": "主体信息 {id, name, type, description, location, products: [...]}",
|
|
|
|
|
|
},
|
|
|
|
|
|
# 兼容旧字段
|
2025-12-08 14:58:35 +08:00
|
|
|
|
"scenic_spot": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": False,
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"desc": "[兼容] 景区信息,建议使用 subject",
|
2025-12-08 14:58:35 +08:00
|
|
|
|
},
|
|
|
|
|
|
"product": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": False,
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"desc": "[兼容] 产品信息,建议使用 subject.products",
|
2025-12-08 14:58:35 +08:00
|
|
|
|
},
|
2025-12-09 21:16:44 +08:00
|
|
|
|
|
2025-12-08 14:58:35 +08:00
|
|
|
|
"style": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": False,
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"desc": "风格信息对象 {id, name}",
|
2025-12-08 14:58:35 +08:00
|
|
|
|
},
|
|
|
|
|
|
"audience": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": False,
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"desc": "受众信息对象 {id, name}",
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
# 热点信息
|
|
|
|
|
|
"hot_topics": {
|
|
|
|
|
|
"type": "object",
|
|
|
|
|
|
"required": False,
|
|
|
|
|
|
"desc": "热点信息 {events: [], festivals: [], trending: []}",
|
2025-12-08 14:58:35 +08:00
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
# 参考内容
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"reference": {
|
|
|
|
|
|
"type": "object",
|
2025-12-08 14:58:35 +08:00
|
|
|
|
"required": False,
|
2025-12-09 21:16:44 +08:00
|
|
|
|
"desc": "参考内容 {mode: 'none'/'reference'/'rewrite', title, content}. reference=参考风格原创内容, rewrite=保留框架换主体",
|
2025-12-08 14:58:35 +08:00
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
# 审核选项
|
|
|
|
|
|
"enable_judge": {
|
|
|
|
|
|
"type": "bool",
|
|
|
|
|
|
"required": False,
|
|
|
|
|
|
"default": True,
|
|
|
|
|
|
"desc": "是否启用内容审核",
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
# Prompt 版本控制
|
|
|
|
|
|
"prompt_version": {
|
|
|
|
|
|
"type": "str",
|
|
|
|
|
|
"required": False,
|
|
|
|
|
|
"default": "latest",
|
|
|
|
|
|
"desc": "使用的 prompt 版本",
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def estimate_duration(self, params: Dict[str, Any]) -> int:
|
|
|
|
|
|
"""预估执行时间"""
|
|
|
|
|
|
enable_judge = params.get('enable_judge', True)
|
|
|
|
|
|
return 30 if enable_judge else 20
|
|
|
|
|
|
|
|
|
|
|
|
async def execute(self, params: Dict[str, Any], task_id: str = None) -> EngineResult:
|
|
|
|
|
|
"""执行内容生成"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.log("开始生成内容 (V2)")
|
|
|
|
|
|
self.set_progress(task_id, 10)
|
|
|
|
|
|
|
|
|
|
|
|
# 提取参数
|
|
|
|
|
|
topic = params.get('topic', {})
|
2025-12-09 21:16:44 +08:00
|
|
|
|
|
|
|
|
|
|
# 主体信息 (支持新旧两种格式)
|
|
|
|
|
|
subject = params.get('subject')
|
2025-12-08 14:58:35 +08:00
|
|
|
|
scenic_spot = params.get('scenic_spot')
|
|
|
|
|
|
product = params.get('product')
|
2025-12-09 21:16:44 +08:00
|
|
|
|
|
|
|
|
|
|
# 兼容处理: 如果没有 subject,从 scenic_spot + product 构建
|
|
|
|
|
|
if not subject and scenic_spot:
|
|
|
|
|
|
subject = {
|
|
|
|
|
|
**scenic_spot,
|
|
|
|
|
|
'type': scenic_spot.get('type', 'scenic_spot'),
|
|
|
|
|
|
'products': [product] if product else []
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-08 14:58:35 +08:00
|
|
|
|
style = params.get('style')
|
|
|
|
|
|
audience = params.get('audience')
|
2025-12-09 21:16:44 +08:00
|
|
|
|
hot_topics = params.get('hot_topics')
|
|
|
|
|
|
reference = params.get('reference')
|
2025-12-08 14:58:35 +08:00
|
|
|
|
enable_judge = params.get('enable_judge', True)
|
|
|
|
|
|
prompt_version = params.get('prompt_version', 'latest')
|
|
|
|
|
|
|
|
|
|
|
|
self.set_progress(task_id, 20)
|
|
|
|
|
|
|
|
|
|
|
|
# 获取 PromptRegistry
|
|
|
|
|
|
prompt_registry = self._get_prompt_registry()
|
|
|
|
|
|
|
2025-12-09 21:16:44 +08:00
|
|
|
|
# 从 subject 提取产品信息
|
|
|
|
|
|
current_product = None
|
|
|
|
|
|
if subject and subject.get('products'):
|
|
|
|
|
|
current_product = subject['products'][0]
|
|
|
|
|
|
|
|
|
|
|
|
# 如果没有用户指定的参考内容,加载内置参考文献库
|
|
|
|
|
|
use_builtin_examples = not reference or reference.get('mode') == 'none'
|
|
|
|
|
|
|
2025-12-08 14:58:35 +08:00
|
|
|
|
# 构建 prompt 上下文
|
|
|
|
|
|
context = {
|
|
|
|
|
|
'style_content': self._format_style(style, topic),
|
|
|
|
|
|
'demand_content': self._format_audience(audience, topic),
|
2025-12-09 21:16:44 +08:00
|
|
|
|
'object_content': self._format_subject(subject, topic),
|
|
|
|
|
|
'product_content': self._format_product(current_product),
|
|
|
|
|
|
'hot_topics': hot_topics,
|
|
|
|
|
|
'reference': reference,
|
|
|
|
|
|
# 内置参考文献 (仅在无用户指定参考时使用)
|
|
|
|
|
|
'title_examples': self._get_title_examples(20) if use_builtin_examples else None,
|
|
|
|
|
|
'content_examples': self._get_content_examples(3) if use_builtin_examples else None,
|
2025-12-08 14:58:35 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# 渲染 prompt
|
|
|
|
|
|
system_prompt, user_prompt = prompt_registry.render(
|
|
|
|
|
|
'content_generate',
|
|
|
|
|
|
context=context,
|
|
|
|
|
|
version=prompt_version
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
self.set_progress(task_id, 30)
|
|
|
|
|
|
|
|
|
|
|
|
# 获取模型参数
|
|
|
|
|
|
prompt_config = prompt_registry.get('content_generate', prompt_version)
|
|
|
|
|
|
model_params = prompt_config.get_model_params()
|
|
|
|
|
|
|
|
|
|
|
|
# 调用 LLM 生成内容
|
|
|
|
|
|
self.log("调用 LLM 生成内容...")
|
|
|
|
|
|
raw_result, input_tokens, output_tokens, time_cost = await self.llm.generate(
|
|
|
|
|
|
system_prompt=system_prompt,
|
|
|
|
|
|
user_prompt=user_prompt,
|
|
|
|
|
|
**model_params
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
self.set_progress(task_id, 60)
|
|
|
|
|
|
|
|
|
|
|
|
# 解析结果
|
|
|
|
|
|
content = self._parse_content(raw_result)
|
|
|
|
|
|
|
|
|
|
|
|
if not content:
|
|
|
|
|
|
return EngineResult(
|
|
|
|
|
|
success=False,
|
|
|
|
|
|
error="内容生成失败,无法解析结果",
|
|
|
|
|
|
error_code="PARSE_ERROR"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 内容审核
|
|
|
|
|
|
final_content = content
|
|
|
|
|
|
judge_result = None
|
|
|
|
|
|
|
|
|
|
|
|
if enable_judge:
|
|
|
|
|
|
self.set_progress(task_id, 70)
|
|
|
|
|
|
self.log("执行内容审核...")
|
|
|
|
|
|
|
|
|
|
|
|
judge_result = await self._judge_content(
|
|
|
|
|
|
content,
|
|
|
|
|
|
scenic_spot,
|
|
|
|
|
|
product,
|
|
|
|
|
|
prompt_registry,
|
|
|
|
|
|
prompt_version
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if judge_result and judge_result.get('success'):
|
|
|
|
|
|
final_content = {
|
|
|
|
|
|
'title': judge_result.get('title', content.get('title')),
|
|
|
|
|
|
'content': judge_result.get('content', content.get('content')),
|
|
|
|
|
|
'tag': content.get('tag', ''),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
self.set_progress(task_id, 100)
|
|
|
|
|
|
|
|
|
|
|
|
return EngineResult(
|
|
|
|
|
|
success=True,
|
|
|
|
|
|
data={
|
|
|
|
|
|
"content": final_content,
|
|
|
|
|
|
"original_content": content,
|
|
|
|
|
|
"topic": topic,
|
|
|
|
|
|
"judged": enable_judge and judge_result is not None,
|
|
|
|
|
|
"judge_analysis": judge_result.get('analysis') if judge_result else None,
|
|
|
|
|
|
},
|
|
|
|
|
|
metadata={
|
|
|
|
|
|
"input_tokens": input_tokens,
|
|
|
|
|
|
"output_tokens": output_tokens,
|
|
|
|
|
|
"time_cost": time_cost,
|
|
|
|
|
|
"prompt_version": prompt_version,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
self.log(f"内容生成异常: {e}", level='error')
|
|
|
|
|
|
return EngineResult(
|
|
|
|
|
|
success=False,
|
|
|
|
|
|
error=str(e),
|
|
|
|
|
|
error_code="EXECUTION_ERROR"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def _get_prompt_registry(self):
|
|
|
|
|
|
"""获取 PromptRegistry"""
|
|
|
|
|
|
if self._prompt_registry:
|
|
|
|
|
|
return self._prompt_registry
|
|
|
|
|
|
|
|
|
|
|
|
from domain.prompt import PromptRegistry
|
|
|
|
|
|
self._prompt_registry = PromptRegistry('prompts')
|
|
|
|
|
|
return self._prompt_registry
|
|
|
|
|
|
|
2025-12-09 21:16:44 +08:00
|
|
|
|
def _get_reference_manager(self):
|
|
|
|
|
|
"""获取 ReferenceManager"""
|
|
|
|
|
|
if self._reference_manager is None:
|
|
|
|
|
|
from domain.prompt.reference_manager import get_reference_manager
|
|
|
|
|
|
self._reference_manager = get_reference_manager()
|
|
|
|
|
|
return self._reference_manager
|
|
|
|
|
|
|
|
|
|
|
|
def _get_title_examples(self, audience_id: str = None, style_id: str = None, count: int = 20) -> List[str]:
|
|
|
|
|
|
"""获取标题参考格式 (智能匹配 + 随机抽取)"""
|
|
|
|
|
|
manager = self._get_reference_manager()
|
|
|
|
|
|
return manager.get_titles(audience_id=audience_id, style_id=style_id, count=count)
|
|
|
|
|
|
|
|
|
|
|
|
def _get_content_examples(self, audience_id: str = None, style_id: str = None, count: int = 3) -> List[str]:
|
|
|
|
|
|
"""获取正文范文参考 (智能匹配 + 随机抽取)"""
|
|
|
|
|
|
manager = self._get_reference_manager()
|
|
|
|
|
|
return manager.get_contents(audience_id=audience_id, style_id=style_id, count=count)
|
|
|
|
|
|
|
2025-12-08 14:58:35 +08:00
|
|
|
|
def _format_style(self, style: Optional[Dict], topic: Dict) -> str:
|
|
|
|
|
|
"""格式化风格信息"""
|
|
|
|
|
|
if style:
|
|
|
|
|
|
name = style.get('name', style.get('styleName', ''))
|
|
|
|
|
|
desc = style.get('description', '')
|
|
|
|
|
|
return f"{name}\n{desc}" if desc else name
|
|
|
|
|
|
|
|
|
|
|
|
# 从 topic 中获取
|
|
|
|
|
|
return topic.get('style', '')
|
|
|
|
|
|
|
|
|
|
|
|
def _format_audience(self, audience: Optional[Dict], topic: Dict) -> str:
|
|
|
|
|
|
"""格式化受众信息"""
|
|
|
|
|
|
if audience:
|
|
|
|
|
|
name = audience.get('name', audience.get('audienceName', ''))
|
|
|
|
|
|
desc = audience.get('description', '')
|
|
|
|
|
|
return f"{name}\n{desc}" if desc else name
|
|
|
|
|
|
|
|
|
|
|
|
return topic.get('targetAudience', '')
|
|
|
|
|
|
|
2025-12-09 21:16:44 +08:00
|
|
|
|
def _format_subject(self, subject: Optional[Dict], topic: Dict) -> str:
|
|
|
|
|
|
"""格式化主体信息 (景区/酒店等)"""
|
|
|
|
|
|
if subject:
|
|
|
|
|
|
name = subject.get('name', '')
|
|
|
|
|
|
desc = subject.get('description', '')
|
|
|
|
|
|
location = subject.get('location', '')
|
|
|
|
|
|
highlights = subject.get('highlights', [])
|
2025-12-08 14:58:35 +08:00
|
|
|
|
|
2025-12-09 21:16:44 +08:00
|
|
|
|
parts = [f"名称: {name}"]
|
2025-12-08 14:58:35 +08:00
|
|
|
|
if location:
|
|
|
|
|
|
parts.append(f"位置: {location}")
|
2025-12-09 21:16:44 +08:00
|
|
|
|
if highlights:
|
|
|
|
|
|
parts.append(f"亮点: {', '.join(highlights)}")
|
2025-12-08 14:58:35 +08:00
|
|
|
|
if desc:
|
2025-12-09 21:16:44 +08:00
|
|
|
|
parts.append(f"描述: {desc}")
|
2025-12-08 14:58:35 +08:00
|
|
|
|
|
|
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
|
2025-12-09 21:16:44 +08:00
|
|
|
|
# 兼容旧字段
|
|
|
|
|
|
return topic.get('object', topic.get('subject_name', ''))
|
|
|
|
|
|
|
|
|
|
|
|
def _format_scenic_spot(self, scenic_spot: Optional[Dict], topic: Dict) -> str:
|
|
|
|
|
|
"""[兼容] 格式化景区信息"""
|
|
|
|
|
|
return self._format_subject(scenic_spot, topic)
|
2025-12-08 14:58:35 +08:00
|
|
|
|
|
|
|
|
|
|
def _format_product(self, product: Optional[Dict]) -> str:
|
|
|
|
|
|
"""格式化产品信息"""
|
|
|
|
|
|
if not product:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
name = product.get('name', product.get('productName', ''))
|
|
|
|
|
|
price = product.get('price', '')
|
|
|
|
|
|
desc = product.get('description', product.get('detailedDescription', ''))
|
|
|
|
|
|
|
|
|
|
|
|
parts = [name]
|
|
|
|
|
|
if price:
|
|
|
|
|
|
parts.append(f"价格: {price}")
|
|
|
|
|
|
if desc:
|
|
|
|
|
|
parts.append(desc)
|
|
|
|
|
|
|
|
|
|
|
|
return "\n".join(parts)
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_content(self, raw_result: str) -> Optional[Dict[str, Any]]:
|
|
|
|
|
|
"""解析 LLM 返回的内容"""
|
|
|
|
|
|
import json
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
# 尝试提取 JSON
|
|
|
|
|
|
json_match = re.search(r'\{[\s\S]*\}', raw_result)
|
|
|
|
|
|
if json_match:
|
|
|
|
|
|
try:
|
|
|
|
|
|
return json.loads(json_match.group())
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# 尝试 json_repair
|
|
|
|
|
|
try:
|
|
|
|
|
|
import json_repair
|
|
|
|
|
|
return json_repair.loads(raw_result)
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
self.log("无法解析内容结果", level='error')
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
async def _judge_content(self, content: Dict,
|
|
|
|
|
|
scenic_spot: Optional[Dict],
|
|
|
|
|
|
product: Optional[Dict],
|
|
|
|
|
|
prompt_registry,
|
|
|
|
|
|
prompt_version: str) -> Optional[Dict]:
|
|
|
|
|
|
"""执行内容审核"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 构建产品资料
|
|
|
|
|
|
product_info_parts = []
|
|
|
|
|
|
if scenic_spot:
|
|
|
|
|
|
product_info_parts.append(f"景区: {scenic_spot.get('name', '')}")
|
|
|
|
|
|
if scenic_spot.get('description'):
|
|
|
|
|
|
product_info_parts.append(scenic_spot['description'])
|
|
|
|
|
|
if product:
|
|
|
|
|
|
product_info_parts.append(f"产品: {product.get('name', '')}")
|
|
|
|
|
|
if product.get('price'):
|
|
|
|
|
|
product_info_parts.append(f"价格: {product['price']}")
|
|
|
|
|
|
if product.get('description'):
|
|
|
|
|
|
product_info_parts.append(product['description'])
|
|
|
|
|
|
|
|
|
|
|
|
product_info = "\n".join(product_info_parts)
|
|
|
|
|
|
|
|
|
|
|
|
# 渲染审核 prompt
|
|
|
|
|
|
context = {
|
|
|
|
|
|
'product_info': product_info,
|
|
|
|
|
|
'title_to_judge': content.get('title', ''),
|
|
|
|
|
|
'content_to_judge': content.get('content', ''),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
system_prompt, user_prompt = prompt_registry.render(
|
|
|
|
|
|
'content_judge',
|
|
|
|
|
|
context=context,
|
|
|
|
|
|
version=prompt_version
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 获取模型参数
|
|
|
|
|
|
judge_config = prompt_registry.get('content_judge', prompt_version)
|
|
|
|
|
|
model_params = judge_config.get_model_params()
|
|
|
|
|
|
|
|
|
|
|
|
# 调用 LLM
|
|
|
|
|
|
raw_result, _, _, _ = await self.llm.generate(
|
|
|
|
|
|
system_prompt=system_prompt,
|
|
|
|
|
|
user_prompt=user_prompt,
|
|
|
|
|
|
**model_params
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 解析结果
|
|
|
|
|
|
result = self._parse_content(raw_result)
|
|
|
|
|
|
if result:
|
|
|
|
|
|
result['success'] = True
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
self.log(f"内容审核失败: {e}", level='warning')
|
|
|
|
|
|
return None
|