diff --git a/api/__pycache__/main.cpython-312.pyc b/api/__pycache__/main.cpython-312.pyc index 2ef7585..f29ad4c 100644 Binary files a/api/__pycache__/main.cpython-312.pyc and b/api/__pycache__/main.cpython-312.pyc differ diff --git a/api/main.py b/api/main.py index 40209ed..07b4682 100644 --- a/api/main.py +++ b/api/main.py @@ -59,9 +59,9 @@ app.add_middleware( from api.routers import tweet, poster, prompt # 包含路由 -app.include_router(tweet.router, prefix="/api/tweet", tags=["tweet"]) -app.include_router(poster.router, prefix="/api/poster", tags=["poster"]) -app.include_router(prompt.router, prefix="/api/prompt", tags=["prompt"]) +app.include_router(tweet.router, prefix="/api/v1/tweet", tags=["tweet"]) +app.include_router(poster.router, prefix="/api/v1/poster", tags=["poster"]) +app.include_router(prompt.router, prefix="/api/v1/prompt", tags=["prompt"]) @app.get("/") async def root(): diff --git a/api/models/__pycache__/tweet.cpython-312.pyc b/api/models/__pycache__/tweet.cpython-312.pyc index 1bdc7f1..55741ec 100644 Binary files a/api/models/__pycache__/tweet.cpython-312.pyc and b/api/models/__pycache__/tweet.cpython-312.pyc differ diff --git a/api/models/tweet.py b/api/models/tweet.py index 874dedc..63d02a0 100644 --- a/api/models/tweet.py +++ b/api/models/tweet.py @@ -11,18 +11,22 @@ from pydantic import BaseModel, Field class TopicRequest(BaseModel): """选题生成请求模型""" - date: str = Field(..., description="选题日期,格式为YYYY-MM-DD") + dates: Optional[str] = Field(None, description="日期字符串,可能为单个日期、多个日期用逗号分隔或范围如'2023-01-01 to 2023-01-31'") num_topics: int = Field(5, description="要生成的选题数量", ge=1, le=10) - style: Optional[str] = Field(None, description="内容风格,如'旅游攻略'、'亲子游'等") - target_audience: Optional[str] = Field(None, description="目标受众,如'年轻人'、'家庭'等") + styles: Optional[List[str]] = Field(None, description="风格列表") + audiences: Optional[List[str]] = Field(None, description="受众列表") + scenic_spots: Optional[List[str]] = Field(None, description="景区列表") + products: Optional[List[str]] = Field(None, description="产品列表") class Config: schema_extra = { "example": { - "date": "2023-07-15", - "num_topics": 3, - "style": "旅游攻略", - "target_audience": "年轻人" + "dates": "2023-07-01 to 2023-07-31", + "num_topics": 5, + "styles": ["旅游攻略", "亲子游"], + "audiences": ["年轻人", "家庭"], + "scenic_spots": ["故宫", "长城"], + "products": ["门票", "导游服务"] } } @@ -141,19 +145,23 @@ class JudgeResponse(BaseModel): class PipelineRequest(BaseModel): """完整流程请求模型""" - date: str = Field(..., description="选题日期,格式为YYYY-MM-DD") + dates: Optional[str] = Field(None, description="日期字符串,可能为单个日期、多个日期用逗号分隔或范围如'2023-01-01 to 2023-01-31'") num_topics: int = Field(5, description="要生成的选题数量", ge=1, le=10) - style: Optional[str] = Field(None, description="内容风格,如'旅游攻略'、'亲子游'等") - target_audience: Optional[str] = Field(None, description="目标受众,如'年轻人'、'家庭'等") + styles: Optional[List[str]] = Field(None, description="风格列表") + audiences: Optional[List[str]] = Field(None, description="受众列表") + scenic_spots: Optional[List[str]] = Field(None, description="景区列表") + products: Optional[List[str]] = Field(None, description="产品列表") skip_judge: bool = Field(False, description="是否跳过内容审核步骤") class Config: schema_extra = { "example": { - "date": "2023-07-15", + "dates": "2023-07-01 to 2023-07-31", "num_topics": 3, - "style": "旅游攻略", - "target_audience": "年轻人", + "styles": ["旅游攻略", "亲子游"], + "audiences": ["年轻人", "家庭"], + "scenic_spots": ["故宫", "长城"], + "products": ["门票", "导游服务"], "skip_judge": False } } diff --git a/api/routers/__pycache__/tweet.cpython-312.pyc b/api/routers/__pycache__/tweet.cpython-312.pyc index 2dfc3ca..8275c03 100644 Binary files a/api/routers/__pycache__/tweet.cpython-312.pyc and b/api/routers/__pycache__/tweet.cpython-312.pyc differ diff --git a/api/routers/tweet.py b/api/routers/tweet.py index 2fe8913..7eb282d 100644 --- a/api/routers/tweet.py +++ b/api/routers/tweet.py @@ -60,17 +60,21 @@ async def generate_topics( """ 生成选题 - - **date**: 选题日期,格式为YYYY-MM-DD + - **dates**: 日期字符串,可能为单个日期、多个日期用逗号分隔或范围 - **num_topics**: 要生成的选题数量 - - **style**: 内容风格,如'旅游攻略'、'亲子游'等 - - **target_audience**: 目标受众,如'年轻人'、'家庭'等 + - **styles**: 风格列表 + - **audiences**: 受众列表 + - **scenic_spots**: 景区列表 + - **products**: 产品列表 """ try: request_id, topics = await tweet_service.generate_topics( - date=request.date, + dates=request.dates, num_topics=request.num_topics, - style=request.style, - target_audience=request.target_audience + styles=request.styles, + audiences=request.audiences, + scenic_spots=request.scenic_spots, + products=request.products ) return TopicResponse( @@ -172,18 +176,22 @@ async def run_pipeline( """ 运行完整流水线,包括生成选题、生成内容和审核内容 - - **date**: 选题日期,格式为YYYY-MM-DD + - **dates**: 日期字符串,可能为单个日期、多个日期用逗号分隔或范围 - **num_topics**: 要生成的选题数量 - - **style**: 内容风格,如'旅游攻略'、'亲子游'等 - - **target_audience**: 目标受众,如'年轻人'、'家庭'等 + - **styles**: 风格列表 + - **audiences**: 受众列表 + - **scenic_spots**: 景区列表 + - **products**: 产品列表 - **skip_judge**: 是否跳过内容审核步骤 """ try: request_id, topics, contents, judged_contents = await tweet_service.run_pipeline( - date=request.date, + dates=request.dates, num_topics=request.num_topics, - style=request.style, - target_audience=request.target_audience, + styles=request.styles, + audiences=request.audiences, + scenic_spots=request.scenic_spots, + products=request.products, skip_judge=request.skip_judge ) diff --git a/api/services/__pycache__/prompt_builder.cpython-312.pyc b/api/services/__pycache__/prompt_builder.cpython-312.pyc index 901ba5e..750111f 100644 Binary files a/api/services/__pycache__/prompt_builder.cpython-312.pyc and b/api/services/__pycache__/prompt_builder.cpython-312.pyc differ diff --git a/api/services/__pycache__/prompt_service.cpython-312.pyc b/api/services/__pycache__/prompt_service.cpython-312.pyc index a997d0c..bed4c8e 100644 Binary files a/api/services/__pycache__/prompt_service.cpython-312.pyc and b/api/services/__pycache__/prompt_service.cpython-312.pyc differ diff --git a/api/services/__pycache__/tweet.cpython-312.pyc b/api/services/__pycache__/tweet.cpython-312.pyc index 4858d9b..f55f292 100644 Binary files a/api/services/__pycache__/tweet.cpython-312.pyc and b/api/services/__pycache__/tweet.cpython-312.pyc differ diff --git a/api/services/prompt_builder.py b/api/services/prompt_builder.py index db27d5f..a1c7c48 100644 --- a/api/services/prompt_builder.py +++ b/api/services/prompt_builder.py @@ -7,7 +7,7 @@ """ import logging -from typing import Dict, Any, Optional, Tuple +from typing import Dict, Any, Optional, Tuple, List from pathlib import Path from core.config import ConfigManager, GenerateContentConfig, GenerateTopicConfig, PosterConfig @@ -150,13 +150,17 @@ class PromptBuilderService: return system_prompt, user_prompt - def build_topic_prompt(self, num_topics: int, month: str) -> Tuple[str, str]: + def build_topic_prompt(self, products: Optional[List[str]] = None, scenic_spots: Optional[List[str]] = None, styles: Optional[List[str]] = None, audiences: Optional[List[str]] = None, dates: Optional[str] = None, num_topics: int = 5) -> Tuple[str, str]: """ 构建选题生成提示词 Args: + products: 产品列表 + scenic_spots: 景区列表 + styles: 风格列表 + audiences: 受众列表 + dates: 日期字符串,可能为单个日期、多个日期用逗号分隔或范围如'2023-01-01 to 2023-01-31' num_topics: 要生成的选题数量 - month: 月份 Returns: 系统提示词和用户提示词的元组 @@ -173,20 +177,51 @@ class PromptBuilderService: # 创建提示词模板 template = PromptTemplate(system_prompt_path, user_prompt_path) - # 获取风格列表 - styles = self.prompt_service.get_all_styles() - style_content = "Style文件列表:\n" + "\n".join([f"- {style['name']}" for style in styles]) + # 处理日期 + if dates: + if ' to ' in dates: + start_date, end_date = dates.split(' to ') + month = f"从 {start_date} 到 {end_date}" + elif ',' in dates: + month = ', '.join(dates.split(',')) + else: + month = dates + else: + month = '' - # 获取目标受众列表 - audiences = self.prompt_service.get_all_audiences() - demand_content = "Demand文件列表:\n" + "\n".join([f"- {audience['name']}" for audience in audiences]) + # 获取风格内容 + style_content = '' + if styles: + style_content = '\n'.join([f"{style}: {self.prompt_service.get_style_content(style)}" for style in styles]) + else: + all_styles = self.prompt_service.get_all_styles() + style_content = "Style文件列表:\n" + "\n".join([f"- {style['name']}" for style in all_styles]) + + # 获取受众内容 + demand_content = '' + if audiences: + demand_content = '\n'.join([f"{audience}: {self.prompt_service.get_audience_content(audience)}" for audience in audiences]) + else: + all_audiences = self.prompt_service.get_all_audiences() + demand_content = "Demand文件列表:\n" + "\n".join([f"- {audience['name']}" for audience in all_audiences]) # 获取参考内容 refer_content = self.prompt_service.get_refer_content("topic") - # 获取景区信息列表 - spots = self.prompt_service.get_all_scenic_spots() - object_content = "Object信息:\n" + "\n".join([f"- {spot['name']}" for spot in spots]) + # 获取景区内容 + object_content = '' + if scenic_spots: + object_content = '\n'.join([f"{spot}: {self.prompt_service.get_scenic_spot_info(spot)}" for spot in scenic_spots]) + else: + all_spots = self.prompt_service.get_all_scenic_spots() + object_content = "Object信息:\n" + "\n".join([f"- {spot['name']}" for spot in all_spots]) + + # 获取产品内容 + product_content = '' + if products: + product_content = '\n'.join([f"{product}: {self.prompt_service.get_product_info(product)}" for product in products]) + else: + product_content = '' # 假设没有默认产品列表 # 构建系统提示词 system_prompt = template.get_system_prompt() @@ -194,10 +229,11 @@ class PromptBuilderService: # 构建创作资料 creative_materials = ( f"你拥有的创作资料如下:\n" - f"{style_content}\n\n" - f"{demand_content}\n\n" - f"{refer_content}\n\n" - f"{object_content}" + f"风格信息:\n{style_content}\n\n" + f"受众信息:\n{demand_content}\n\n" + f"参考内容:\n{refer_content}\n\n" + f"景区信息:\n{object_content}\n\n" + f"产品信息:\n{product_content}" ) # 构建用户提示词 diff --git a/api/services/prompt_service.py b/api/services/prompt_service.py index d514420..12d0411 100644 --- a/api/services/prompt_service.py +++ b/api/services/prompt_service.py @@ -342,13 +342,36 @@ class PromptService: full_path = self._get_full_path(path_str) if full_path.exists() and full_path.is_file(): - with open(full_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - if lines: + if full_path.suffix.lower() == '.json': + # 处理JSON文件 + with open(full_path, 'r', encoding='utf-8') as f: + data = json.load(f) + if isinstance(data, dict) and 'examples' in data: + examples = data['examples'] + if isinstance(examples, list): + sample_size = max(1, int(len(examples) * ref_item.sampling_rate)) + sampled_examples = random.sample(examples, sample_size) + sampled_content = json.dumps({'examples': sampled_examples}, ensure_ascii=False, indent=4) + elif isinstance(data, list): + sample_size = max(1, int(len(data) * ref_item.sampling_rate)) + sampled_examples = random.sample(data, sample_size) + sampled_content = json.dumps(sampled_examples, ensure_ascii=False, indent=4) + else: + # 如果不是预期结构,按原方式处理 + with open(full_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + sample_size = max(1, int(len(lines) * ref_item.sampling_rate)) + sampled_lines = random.sample(lines, sample_size) + sampled_content = ''.join(sampled_lines) + else: + # 非JSON文件,按原方式处理 + with open(full_path, 'r', encoding='utf-8') as f: + lines = f.readlines() sample_size = max(1, int(len(lines) * ref_item.sampling_rate)) sampled_lines = random.sample(lines, sample_size) sampled_content = ''.join(sampled_lines) - refer_content += f"--- {full_path.name} (sampled {ref_item.sampling_rate * 100}%) ---\n{sampled_content}\n\n" + + refer_content += f"--- {full_path.name} (sampled {ref_item.sampling_rate * 100}%) ---\n{sampled_content}\n\n" except Exception as e: logger.error(f"读取或采样参考文件失败 {ref_item.path}: {e}") except Exception as e: diff --git a/api/services/tweet.py b/api/services/tweet.py index 50d1a0b..12bf79e 100644 --- a/api/services/tweet.py +++ b/api/services/tweet.py @@ -48,32 +48,41 @@ class TweetService: self.prompt_service = PromptService(config_manager) self.prompt_builder = PromptBuilderService(config_manager, self.prompt_service) - async def generate_topics(self, date: str, num_topics: int = 5, - style: Optional[str] = None, - target_audience: Optional[str] = None) -> Tuple[str, List[Dict[str, Any]]]: + async def generate_topics(self, dates: Optional[str] = None, num_topics: int = 5, + styles: Optional[List[str]] = None, + audiences: Optional[List[str]] = None, + scenic_spots: Optional[List[str]] = None, + products: Optional[List[str]] = None) -> Tuple[str, List[Dict[str, Any]]]: """ 生成选题 Args: - date: 选题日期,格式为YYYY-MM-DD + dates: 日期字符串,可能为单个日期、多个日期用逗号分隔或范围 num_topics: 要生成的选题数量 - style: 内容风格 - target_audience: 目标受众 + styles: 风格列表 + audiences: 受众列表 + scenic_spots: 景区列表 + products: 产品列表 Returns: 请求ID和生成的选题列表 """ - logger.info(f"开始生成选题,日期: {date}, 数量: {num_topics}") + logger.info(f"开始生成选题,日期: {dates}, 数量: {num_topics}") # 获取并更新配置 topic_config = self.config_manager.get_config('topic_gen', GenerateTopicConfig) - topic_config.topic.date = date + if dates: + topic_config.topic.date = dates topic_config.topic.num = num_topics # 使用PromptBuilderService构建提示词 system_prompt, user_prompt = self.prompt_builder.build_topic_prompt( - num_topics=num_topics, - month=date + products=products, + scenic_spots=scenic_spots, + styles=styles, + audiences=audiences, + dates=dates, + num_topics=num_topics ) # 使用预构建的提示词生成选题 @@ -164,30 +173,34 @@ class TweetService: logger.info(f"内容审核完成,请求ID: {request_id}, 选题索引: {topic_index}, 审核结果: {judge_success}") return request_id, topic_index, judged_data, judge_success - async def run_pipeline(self, date: str, num_topics: int = 5, - style: Optional[str] = None, - target_audience: Optional[str] = None, + async def run_pipeline(self, dates: Optional[str] = None, num_topics: int = 5, + styles: Optional[List[str]] = None, + audiences: Optional[List[str]] = None, + scenic_spots: Optional[List[str]] = None, + products: Optional[List[str]] = None, skip_judge: bool = False) -> Tuple[str, List[Dict[str, Any]], Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]: """ 运行完整流水线 Args: - date: 选题日期,格式为YYYY-MM-DD + dates: 日期字符串,可能为单个日期、多个日期用逗号分隔或范围 num_topics: 要生成的选题数量 - style: 内容风格 - target_audience: 目标受众 + styles: 风格列表 + audiences: 受众列表 + scenic_spots: 景区列表 + products: 产品列表 skip_judge: 是否跳过内容审核步骤 Returns: 请求ID、生成的选题列表、生成的内容和审核后的内容 """ - logger.info(f"开始运行完整流水线,日期: {date}, 数量: {num_topics}") + logger.info(f"开始运行完整流水线,日期: {dates}, 数量: {num_topics}") # 生成请求ID request_id = f"pipeline_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{str(uuid.uuid4())[:8]}" # 步骤1: 生成选题 - _, topics = await self.generate_topics(date, num_topics, style, target_audience) + _, topics = await self.generate_topics(dates, num_topics, styles, audiences, scenic_spots, products) if not topics: logger.error("未能生成任何选题,流程终止") return request_id, [], {}, {} diff --git a/core/config/__pycache__/manager.cpython-312.pyc b/core/config/__pycache__/manager.cpython-312.pyc index d913be1..e3edd69 100644 Binary files a/core/config/__pycache__/manager.cpython-312.pyc and b/core/config/__pycache__/manager.cpython-312.pyc differ diff --git a/core/config/__pycache__/models.cpython-312.pyc b/core/config/__pycache__/models.cpython-312.pyc index 76b0456..c0851bf 100644 Binary files a/core/config/__pycache__/models.cpython-312.pyc and b/core/config/__pycache__/models.cpython-312.pyc differ diff --git a/core/config/models.py b/core/config/models.py index e9655b7..030aeb1 100644 --- a/core/config/models.py +++ b/core/config/models.py @@ -130,7 +130,7 @@ class GenerateTopicConfig(BaseConfig): class GenerateContentConfig(BaseConfig): """内容生成配置""" - content_system_prompt: str = "resource/prompt/generateContent/contentSystem.txt" + content_system_prompt: str = "resource/prompt/generateContent/system.txt" content_user_prompt: str = "resource/prompt/generateContent/user.txt" judger_system_prompt: str = "resource/prompt/judgeContent/system.txt" judger_user_prompt: str = "resource/prompt/judgeContent/user.txt"