TravelContentCreator/domain/content/content_engine.py

249 lines
7.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
内容生成引擎
负责根据选题生成营销文案
"""
import logging
from typing import Dict, Any, Optional, Tuple
logger = logging.getLogger(__name__)
class ContentEngine:
"""
内容生成引擎
职责:
- 构建内容生成提示词
- 调用 LLM 生成内容
- 解析和格式化内容
"""
def __init__(self, llm_client=None, prompt_builder=None, db_accessor=None):
"""
初始化内容引擎
Args:
llm_client: LLM 客户端
prompt_builder: 提示词构建器
db_accessor: 数据库访问器
"""
self._llm = llm_client
self._prompt = prompt_builder
self._db = db_accessor
self.logger = logging.getLogger(f"{__name__}.ContentEngine")
async def generate(
self,
topic: Dict[str, Any],
style_id: Optional[int] = None,
audience_id: Optional[int] = None,
scenic_spot_id: Optional[int] = None,
product_id: Optional[int] = None,
**kwargs
) -> Dict[str, Any]:
"""
生成内容
Args:
topic: 选题信息
style_id: 风格 ID
audience_id: 受众 ID
scenic_spot_id: 景区 ID
product_id: 产品 ID
Returns:
生成的内容
"""
try:
self.logger.info(f"开始生成内容: topic={topic.get('title', 'unknown')}")
# 1. 增强选题信息
enhanced_topic = await self._enhance_topic(
topic, style_id, audience_id, scenic_spot_id, product_id
)
# 2. 构建提示词
system_prompt, user_prompt = self._build_prompts(enhanced_topic)
# 3. 调用 LLM
response = await self._llm.generate(
prompt=user_prompt,
system_prompt=system_prompt
)
# 4. 解析结果
content = self._parse_content(response, enhanced_topic)
self.logger.info("内容生成完成")
return content
except Exception as e:
self.logger.error(f"内容生成失败: {e}")
raise
async def generate_with_prompt(
self,
topic: Dict[str, Any],
system_prompt: str,
user_prompt: str
) -> Dict[str, Any]:
"""
使用预构建的提示词生成内容
Args:
topic: 选题信息
system_prompt: 系统提示词
user_prompt: 用户提示词
Returns:
生成的内容
"""
try:
response = await self._llm.generate(
prompt=user_prompt,
system_prompt=system_prompt
)
return self._parse_content(response, topic)
except Exception as e:
self.logger.error(f"内容生成失败: {e}")
raise
async def _enhance_topic(
self,
topic: Dict[str, Any],
style_id: Optional[int],
audience_id: Optional[int],
scenic_spot_id: Optional[int],
product_id: Optional[int]
) -> Dict[str, Any]:
"""增强选题信息"""
enhanced = topic.copy()
if self._db:
# 从选题中获取 ID如果未提供
style_id = style_id or topic.get('style_id')
audience_id = audience_id or topic.get('audience_id')
scenic_spot_id = scenic_spot_id or topic.get('scenic_spot_id')
product_id = product_id or topic.get('product_id')
if style_id:
style = await self._db.style.find_by_id(style_id)
if style:
enhanced['style'] = style
if audience_id:
audience = await self._db.audience.find_by_id(audience_id)
if audience:
enhanced['audience'] = audience
if scenic_spot_id:
spot = await self._db.scenic_spot.find_by_id(scenic_spot_id)
if spot:
enhanced['scenic_spot'] = spot
if product_id:
product = await self._db.product.find_by_id(product_id)
if product:
enhanced['product'] = product
return enhanced
def _build_prompts(self, topic: Dict[str, Any]) -> Tuple[str, str]:
"""构建提示词"""
if self._prompt:
system_prompt = self._prompt.get_system_prompt(
"content_generate",
**topic
)
user_prompt = self._prompt.get_user_prompt(
"content_generate",
**topic
)
return system_prompt, user_prompt
# 默认提示词
system_prompt = """你是一个专业的小红书内容创作者。
请根据选题信息创作吸引人的营销文案。
文案应该:
1. 符合小红书的风格
2. 包含吸引人的标题
3. 有清晰的结构
4. 适当使用 emoji
5. 包含相关话题标签
输出格式为 JSON包含title, content, tags"""
topic_title = topic.get('title', '')
topic_desc = topic.get('description', '')
spot_name = topic.get('scenic_spot', {}).get('name', '')
product_name = topic.get('product', {}).get('name', '')
style_name = topic.get('style', {}).get('name', '')
audience_name = topic.get('audience', {}).get('name', '')
user_prompt = f"""请根据以下选题创作小红书文案:
选题标题:{topic_title}
选题描述:{topic_desc}
景区:{spot_name}
产品:{product_name}
风格:{style_name}
目标受众:{audience_name}
请以 JSON 格式输出,包含:
- title: 文案标题(吸引眼球)
- content: 正文内容500-800字
- tags: 话题标签数组"""
return system_prompt, user_prompt
def _parse_content(self, response: str, topic: Dict[str, Any]) -> Dict[str, Any]:
"""解析 LLM 响应"""
import json
import re
# 尝试解析 JSON
content = None
# 尝试直接解析
try:
content = json.loads(response)
except:
pass
# 尝试提取 JSON
if not content:
patterns = [
r'\{[\s\S]*\}',
r'```json\s*([\s\S]*?)\s*```',
]
for pattern in patterns:
match = re.search(pattern, response)
if match:
try:
json_str = match.group(1) if '```' in pattern else match.group(0)
content = json.loads(json_str)
break
except:
continue
# 如果无法解析,使用原始响应
if not content:
content = {
'title': topic.get('title', ''),
'content': response,
'tags': []
}
# 添加选题信息
content['topic_index'] = topic.get('index', 0)
content['topic_title'] = topic.get('title', '')
return content