TravelContentCreator/domain/content/content_engine.py

249 lines
7.3 KiB
Python
Raw Normal View History

2025-12-08 14:58:35 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容生成引擎
负责根据选题生成营销文案
"""
import logging
from typing import Dict, Any, Optional, Tuple
logger = logging.getLogger(__name__)
class ContentEngine:
"""
内容生成引擎
职责
- 构建内容生成提示词
- 调用 LLM 生成内容
- 解析和格式化内容
"""
def __init__(self, llm_client=None, prompt_builder=None, db_accessor=None):
"""
初始化内容引擎
Args:
llm_client: LLM 客户端
prompt_builder: 提示词构建器
db_accessor: 数据库访问器
"""
self._llm = llm_client
self._prompt = prompt_builder
self._db = db_accessor
self.logger = logging.getLogger(f"{__name__}.ContentEngine")
async def generate(
self,
topic: Dict[str, Any],
style_id: Optional[int] = None,
audience_id: Optional[int] = None,
scenic_spot_id: Optional[int] = None,
product_id: Optional[int] = None,
**kwargs
) -> Dict[str, Any]:
"""
生成内容
Args:
topic: 选题信息
style_id: 风格 ID
audience_id: 受众 ID
scenic_spot_id: 景区 ID
product_id: 产品 ID
Returns:
生成的内容
"""
try:
self.logger.info(f"开始生成内容: topic={topic.get('title', 'unknown')}")
# 1. 增强选题信息
enhanced_topic = await self._enhance_topic(
topic, style_id, audience_id, scenic_spot_id, product_id
)
# 2. 构建提示词
system_prompt, user_prompt = self._build_prompts(enhanced_topic)
# 3. 调用 LLM
response = await self._llm.generate(
prompt=user_prompt,
system_prompt=system_prompt
)
# 4. 解析结果
content = self._parse_content(response, enhanced_topic)
self.logger.info("内容生成完成")
return content
except Exception as e:
self.logger.error(f"内容生成失败: {e}")
raise
async def generate_with_prompt(
self,
topic: Dict[str, Any],
system_prompt: str,
user_prompt: str
) -> Dict[str, Any]:
"""
使用预构建的提示词生成内容
Args:
topic: 选题信息
system_prompt: 系统提示词
user_prompt: 用户提示词
Returns:
生成的内容
"""
try:
response = await self._llm.generate(
prompt=user_prompt,
system_prompt=system_prompt
)
return self._parse_content(response, topic)
except Exception as e:
self.logger.error(f"内容生成失败: {e}")
raise
async def _enhance_topic(
self,
topic: Dict[str, Any],
style_id: Optional[int],
audience_id: Optional[int],
scenic_spot_id: Optional[int],
product_id: Optional[int]
) -> Dict[str, Any]:
"""增强选题信息"""
enhanced = topic.copy()
if self._db:
# 从选题中获取 ID如果未提供
style_id = style_id or topic.get('style_id')
audience_id = audience_id or topic.get('audience_id')
scenic_spot_id = scenic_spot_id or topic.get('scenic_spot_id')
product_id = product_id or topic.get('product_id')
if style_id:
style = await self._db.style.find_by_id(style_id)
if style:
enhanced['style'] = style
if audience_id:
audience = await self._db.audience.find_by_id(audience_id)
if audience:
enhanced['audience'] = audience
if scenic_spot_id:
spot = await self._db.scenic_spot.find_by_id(scenic_spot_id)
if spot:
enhanced['scenic_spot'] = spot
if product_id:
product = await self._db.product.find_by_id(product_id)
if product:
enhanced['product'] = product
return enhanced
def _build_prompts(self, topic: Dict[str, Any]) -> Tuple[str, str]:
"""构建提示词"""
if self._prompt:
system_prompt = self._prompt.get_system_prompt(
"content_generate",
**topic
)
user_prompt = self._prompt.get_user_prompt(
"content_generate",
**topic
)
return system_prompt, user_prompt
# 默认提示词
system_prompt = """你是一个专业的小红书内容创作者。
请根据选题信息创作吸引人的营销文案
文案应该
1. 符合小红书的风格
2. 包含吸引人的标题
3. 有清晰的结构
4. 适当使用 emoji
5. 包含相关话题标签
输出格式为 JSON包含title, content, tags"""
topic_title = topic.get('title', '')
topic_desc = topic.get('description', '')
spot_name = topic.get('scenic_spot', {}).get('name', '')
product_name = topic.get('product', {}).get('name', '')
style_name = topic.get('style', {}).get('name', '')
audience_name = topic.get('audience', {}).get('name', '')
user_prompt = f"""请根据以下选题创作小红书文案:
选题标题{topic_title}
选题描述{topic_desc}
景区{spot_name}
产品{product_name}
风格{style_name}
目标受众{audience_name}
请以 JSON 格式输出包含
- title: 文案标题吸引眼球
- content: 正文内容500-800
- tags: 话题标签数组"""
return system_prompt, user_prompt
def _parse_content(self, response: str, topic: Dict[str, Any]) -> Dict[str, Any]:
"""解析 LLM 响应"""
import json
import re
# 尝试解析 JSON
content = None
# 尝试直接解析
try:
content = json.loads(response)
except:
pass
# 尝试提取 JSON
if not content:
patterns = [
r'\{[\s\S]*\}',
r'```json\s*([\s\S]*?)\s*```',
]
for pattern in patterns:
match = re.search(pattern, response)
if match:
try:
json_str = match.group(1) if '```' in pattern else match.group(0)
content = json.loads(json_str)
break
except:
continue
# 如果无法解析,使用原始响应
if not content:
content = {
'title': topic.get('title', ''),
'content': response,
'tags': []
}
# 添加选题信息
content['topic_index'] = topic.get('index', 0)
content['topic_title'] = topic.get('title', '')
return content