2025-04-17 14:40:59 +08:00
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
|
import time
|
|
|
|
|
|
import random
|
|
|
|
|
|
import argparse
|
|
|
|
|
|
import json
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
import sys
|
|
|
|
|
|
sys.path.append('/root/autodl-tmp')
|
|
|
|
|
|
# 从本地模块导入
|
|
|
|
|
|
from TravelContentCreator.core.ai_agent import AI_Agent
|
|
|
|
|
|
from TravelContentCreator.core.topic_parser import TopicParser
|
|
|
|
|
|
from TravelContentCreator.utils.resource_loader import ResourceLoader
|
|
|
|
|
|
|
2025-04-17 15:30:24 +08:00
|
|
|
|
class tweetTopic:
|
|
|
|
|
|
def __init__(self, index, date, logic, object, product, product_logic, style, style_logic, target_audience, target_audience_logic):
|
|
|
|
|
|
self.index = index
|
|
|
|
|
|
self.date = date
|
|
|
|
|
|
self.logic = logic
|
|
|
|
|
|
self.object = object
|
|
|
|
|
|
self.product = product
|
|
|
|
|
|
self.product_logic = product_logic
|
|
|
|
|
|
self.style = style
|
|
|
|
|
|
self.style_logic = style_logic
|
|
|
|
|
|
self.target_audience = target_audience
|
|
|
|
|
|
self.target_audience_logic = target_audience_logic
|
|
|
|
|
|
|
|
|
|
|
|
class tweetTopicRecord:
|
|
|
|
|
|
def __init__(self, topics_list, system_prompt, user_prompt, output_dir, run_id):
|
|
|
|
|
|
self.topics_list = topics_list
|
|
|
|
|
|
self.system_prompt = system_prompt
|
|
|
|
|
|
self.user_prompt = user_prompt
|
|
|
|
|
|
self.output_dir = output_dir
|
|
|
|
|
|
self.run_id = run_id
|
|
|
|
|
|
|
|
|
|
|
|
def save_topics(self, path):
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
|
|
|
|
json.dump(self.topics_list, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"保存选题失败: {e}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def save_prompt(self, path):
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
|
|
|
|
f.write(self.system_prompt + "\n")
|
|
|
|
|
|
f.write(self.user_prompt + "\n")
|
|
|
|
|
|
f.write(self.output_dir + "\n")
|
|
|
|
|
|
f.write(self.run_id + "\n")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"保存提示词失败: {e}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
class tweetContent:
|
|
|
|
|
|
def __init__(self, result, prompt, output_dir, run_id, article_index, variant_index):
|
|
|
|
|
|
self.result = result
|
|
|
|
|
|
self.prompt = prompt
|
|
|
|
|
|
self.output_dir = output_dir
|
|
|
|
|
|
self.run_id = run_id
|
|
|
|
|
|
self.article_index = article_index
|
|
|
|
|
|
self.variant_index = variant_index
|
|
|
|
|
|
|
|
|
|
|
|
self.title, self.content = self.split_content(result)
|
|
|
|
|
|
self.json_file = self.gen_result_json()
|
|
|
|
|
|
def split_content(self, result):
|
|
|
|
|
|
## remove <\think>
|
|
|
|
|
|
result = result.split("</think>")[1]
|
|
|
|
|
|
## get tile
|
|
|
|
|
|
title = result.split("title>")[1].split("</title>")[0]
|
|
|
|
|
|
## get content
|
|
|
|
|
|
content = result.split("content>")[1].split("</content>")[0]
|
|
|
|
|
|
return title, content
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
2025-04-17 15:30:24 +08:00
|
|
|
|
def gen_result_json(self):
|
|
|
|
|
|
json_file = {
|
|
|
|
|
|
"title": self.title,
|
|
|
|
|
|
"content": self.content
|
|
|
|
|
|
}
|
|
|
|
|
|
return json_file
|
|
|
|
|
|
|
|
|
|
|
|
def save_content(self, json_path):
|
|
|
|
|
|
with open(json_path, "w", encoding="utf-8") as f:
|
|
|
|
|
|
json.dump(self.json_file, f, ensure_ascii=False, indent=4)
|
|
|
|
|
|
return json_path
|
|
|
|
|
|
|
|
|
|
|
|
def save_prompt(self, path):
|
|
|
|
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
|
|
|
|
f.write(self.prompt + "\n")
|
|
|
|
|
|
return path
|
|
|
|
|
|
|
|
|
|
|
|
def get_content(self):
|
|
|
|
|
|
return self.content
|
|
|
|
|
|
|
|
|
|
|
|
def get_title(self):
|
|
|
|
|
|
return self.title
|
|
|
|
|
|
|
|
|
|
|
|
def get_json_file(self):
|
|
|
|
|
|
return self.json_file
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-04-17 14:40:59 +08:00
|
|
|
|
def generate_topics(ai_agent, system_prompt, user_prompt, output_dir, temperature=0.2, top_p=0.5, presence_penalty=1.5):
|
|
|
|
|
|
"""生成选题列表"""
|
|
|
|
|
|
print("开始生成选题...")
|
|
|
|
|
|
|
|
|
|
|
|
# 记录开始时间
|
|
|
|
|
|
time_start = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
# 生成选题
|
|
|
|
|
|
result, system_prompt, user_prompt, file_folder, file_name, tokens, time_cost = ai_agent.work(
|
|
|
|
|
|
system_prompt, user_prompt, "", temperature, top_p, presence_penalty
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 计算总耗时
|
|
|
|
|
|
time_end = time.time()
|
|
|
|
|
|
print(f"选题生成完成,耗时:{time_end - time_start}秒")
|
|
|
|
|
|
|
|
|
|
|
|
# 生成唯一ID
|
|
|
|
|
|
run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
|
|
|
|
|
|
|
|
|
|
# 解析选题
|
|
|
|
|
|
result_list = TopicParser.parse_topics(result)
|
2025-04-17 15:30:24 +08:00
|
|
|
|
# success, json_path = TopicParser.save_topics(result_list, output_dir, run_id, result)
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
2025-04-17 15:30:24 +08:00
|
|
|
|
tweet_topic_record = tweetTopicRecord(result_list, system_prompt, user_prompt, output_dir, run_id)
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
2025-04-17 15:30:24 +08:00
|
|
|
|
return run_id, tweet_topic_record
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
|
|
|
|
|
|
2025-04-17 16:14:41 +08:00
|
|
|
|
def generate_single_content(ai_agent, system_prompt, item, prompts_dir, resource_dir, output_dir, run_id,
|
2025-04-17 18:39:49 +08:00
|
|
|
|
article_index, variant_index, temperature=0.3, top_p=0.4, presence_penalty=1.5):
|
2025-04-17 16:14:41 +08:00
|
|
|
|
"""生成单篇文章内容"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 构建提示词
|
|
|
|
|
|
user_prompt = ResourceLoader.build_user_prompt(item, prompts_dir, resource_dir)
|
|
|
|
|
|
print(f"完成提示词构建,长度为 {len(user_prompt)} 字符")
|
|
|
|
|
|
|
|
|
|
|
|
# 添加随机停顿,避免请求过于频繁
|
|
|
|
|
|
time.sleep(random.random() * 0.5 + 0.1)
|
|
|
|
|
|
|
|
|
|
|
|
# 生成文章
|
|
|
|
|
|
result, _, _, _, _, tokens, time_cost = ai_agent.work(
|
2025-04-17 18:39:49 +08:00
|
|
|
|
system_prompt, user_prompt, "", temperature, top_p, presence_penalty
|
2025-04-17 16:14:41 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
print(f"生成完成,tokens: {tokens}, 耗时: {time_cost}s")
|
|
|
|
|
|
|
|
|
|
|
|
# 保存到单独文件
|
|
|
|
|
|
tweet_content = tweetContent(result, user_prompt, output_dir, run_id, article_index, variant_index)
|
|
|
|
|
|
result_dir = os.path.join(output_dir, f"{article_index}_{variant_index}")
|
|
|
|
|
|
os.makedirs(result_dir, exist_ok=True)
|
|
|
|
|
|
tweet_content.save_content(os.path.join(result_dir, "article.json"))
|
|
|
|
|
|
tweet_content.save_prompt(os.path.join(result_dir, "tweet_prompt.txt"))
|
|
|
|
|
|
|
|
|
|
|
|
return tweet_content, result
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"生成单篇文章时出错: {e}")
|
|
|
|
|
|
return None, None
|
|
|
|
|
|
|
2025-04-17 14:40:59 +08:00
|
|
|
|
def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir,
|
|
|
|
|
|
variants=2, temperature=0.3, start_index=0, end_index=None):
|
|
|
|
|
|
"""根据选题生成内容"""
|
|
|
|
|
|
if not topics:
|
|
|
|
|
|
print("没有选题,无法生成内容")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# 确定处理范围
|
|
|
|
|
|
if end_index is None or end_index > len(topics):
|
|
|
|
|
|
end_index = len(topics)
|
|
|
|
|
|
|
|
|
|
|
|
topics_to_process = topics[start_index:end_index]
|
|
|
|
|
|
print(f"准备处理{len(topics_to_process)}个选题...")
|
|
|
|
|
|
|
|
|
|
|
|
# 创建汇总文件
|
2025-04-17 15:30:24 +08:00
|
|
|
|
# summary_file = ResourceLoader.create_summary_file(output_dir, run_id, len(topics_to_process))
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
|
|
|
|
|
# 处理每个选题
|
|
|
|
|
|
processed_results = []
|
|
|
|
|
|
for i, item in enumerate(topics_to_process):
|
|
|
|
|
|
print(f"处理第 {i+1}/{len(topics_to_process)} 篇文章")
|
|
|
|
|
|
|
2025-04-17 16:14:41 +08:00
|
|
|
|
# 为每个选题生成多个变体
|
|
|
|
|
|
for j in range(variants):
|
|
|
|
|
|
print(f"正在生成变体 {j+1}/{variants}")
|
|
|
|
|
|
|
|
|
|
|
|
# 调用单篇文章生成函数
|
|
|
|
|
|
tweet_content, result = generate_single_content(
|
|
|
|
|
|
ai_agent, system_prompt, item, prompts_dir, resource_dir,
|
|
|
|
|
|
output_dir, run_id, i+1, j+1, temperature
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if tweet_content:
|
2025-04-17 15:30:24 +08:00
|
|
|
|
processed_results.append(tweet_content)
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
2025-04-17 15:30:24 +08:00
|
|
|
|
# # 更新汇总文件 (仅保存第一个变体到汇总文件)
|
|
|
|
|
|
# if j == 0:
|
|
|
|
|
|
# ResourceLoader.update_summary(summary_file, i+1, user_prompt, result)
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
2025-04-17 15:30:24 +08:00
|
|
|
|
print(f"完成{len(processed_results)}篇文章生成")
|
2025-04-17 14:40:59 +08:00
|
|
|
|
return processed_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_topic_generation(
|
|
|
|
|
|
select_date, select_num,
|
|
|
|
|
|
system_prompt_path, user_prompt_path,
|
|
|
|
|
|
base_url="vllm", model_name="qwenQWQ", api_key="EMPTY",
|
|
|
|
|
|
gen_prompts_path="/root/autodl-tmp/TravelContentCreator/genPrompts",
|
|
|
|
|
|
resource_dir="/root/autodl-tmp/TravelContentCreator/resource",
|
|
|
|
|
|
output_dir="/root/autodl-tmp/TravelContentCreator/result"
|
|
|
|
|
|
):
|
|
|
|
|
|
"""准备选题生成的环境和参数"""
|
|
|
|
|
|
# 创建AI Agent
|
|
|
|
|
|
ai_agent = AI_Agent(base_url, model_name, api_key)
|
|
|
|
|
|
|
|
|
|
|
|
# 加载系统提示词
|
|
|
|
|
|
with open(system_prompt_path, "r", encoding="utf-8") as f:
|
|
|
|
|
|
system_prompt = f.read()
|
|
|
|
|
|
|
|
|
|
|
|
# 构建用户提示词
|
|
|
|
|
|
user_prompt = "你拥有的创作资料如下:\n"
|
|
|
|
|
|
|
|
|
|
|
|
# 加载genPrompts目录下的文件
|
|
|
|
|
|
gen_prompts_list = os.listdir(gen_prompts_path)
|
|
|
|
|
|
for gen_prompt_folder in gen_prompts_list:
|
|
|
|
|
|
folder_path = os.path.join(gen_prompts_path, gen_prompt_folder)
|
|
|
|
|
|
# 检查是否为目录
|
|
|
|
|
|
if os.path.isdir(folder_path):
|
|
|
|
|
|
gen_prompts = os.listdir(folder_path)
|
|
|
|
|
|
user_prompt += f"{gen_prompt_folder}\n{gen_prompts}\n"
|
|
|
|
|
|
|
|
|
|
|
|
# 加载source文档内的目标文件
|
|
|
|
|
|
## 其实只会有Object和Product两个文件夹
|
|
|
|
|
|
## 所以可以简化代码
|
|
|
|
|
|
for dir in resource_dir:
|
|
|
|
|
|
source_type = dir["type"]
|
|
|
|
|
|
# source_num = dir["num"]
|
|
|
|
|
|
source_file_path = dir["file_path"]
|
|
|
|
|
|
for file in source_file_path:
|
|
|
|
|
|
with open(file, "r", encoding="utf-8") as f:
|
|
|
|
|
|
user_prompt += f"{source_type}信息:\n{file.split('/')[-1]}\n{f.read()}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
# 加载日期信息
|
|
|
|
|
|
dateline_path = os.path.join(os.path.dirname(user_prompt_path), "2025各月节日宣传节点时间表.md")
|
|
|
|
|
|
if os.path.exists(dateline_path):
|
|
|
|
|
|
with open(dateline_path, "r", encoding="utf-8") as f:
|
|
|
|
|
|
dateline = f.read()
|
|
|
|
|
|
user_prompt += f"\n{dateline}"
|
|
|
|
|
|
|
|
|
|
|
|
# 加载用户提示词模板
|
|
|
|
|
|
with open(user_prompt_path, "r", encoding="utf-8") as f:
|
|
|
|
|
|
user_prompt += f.read()
|
|
|
|
|
|
|
|
|
|
|
|
# 添加选题数量和日期
|
|
|
|
|
|
user_prompt += f"\n选题数量:{select_num}\n选题日期:{select_date}\n"
|
|
|
|
|
|
|
|
|
|
|
|
return ai_agent, system_prompt, user_prompt, output_dir
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
"""主函数入口"""
|
2025-04-17 15:30:24 +08:00
|
|
|
|
config_file = {
|
2025-04-17 14:40:59 +08:00
|
|
|
|
"date": "4月17日",
|
|
|
|
|
|
"num": 5,
|
|
|
|
|
|
"model": "qwenQWQ",
|
|
|
|
|
|
"api_url": "vllm",
|
|
|
|
|
|
"api_key": "EMPTY",
|
|
|
|
|
|
"topic_system_prompt": "/root/autodl-tmp/TravelContentCreator/SelectPrompt/systemPrompt.txt",
|
|
|
|
|
|
"topic_user_prompt": "/root/autodl-tmp/TravelContentCreator/SelectPrompt/userPrompt.txt",
|
|
|
|
|
|
"content_system_prompt": "/root/autodl-tmp/TravelContentCreator/genPrompts/systemPrompt.txt",
|
|
|
|
|
|
"resource_dir": [{
|
|
|
|
|
|
"type": "Object",
|
|
|
|
|
|
"num": 4,
|
|
|
|
|
|
"file_path": ["/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-尚书第.txt",
|
|
|
|
|
|
"/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-明清园.txt",
|
|
|
|
|
|
"/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-泰宁古城.txt",
|
|
|
|
|
|
"/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-甘露寺.txt"
|
|
|
|
|
|
]},
|
|
|
|
|
|
{
|
|
|
|
|
|
"type": "Product",
|
|
|
|
|
|
"num": 0,
|
|
|
|
|
|
"file_path": []
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"prompts_dir": "/root/autodl-tmp/TravelContentCreator/genPrompts",
|
|
|
|
|
|
"output_dir": "/root/autodl-tmp/TravelContentCreator/result",
|
|
|
|
|
|
"variants": 2,
|
|
|
|
|
|
"topic_temperature": 0.2,
|
|
|
|
|
|
"content_temperature": 0.3
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if True:
|
|
|
|
|
|
# 1. 首先生成选题
|
|
|
|
|
|
ai_agent, system_prompt, user_prompt, output_dir = prepare_topic_generation(
|
2025-04-17 15:30:24 +08:00
|
|
|
|
config_file["date"], config_file["num"], config_file["topic_system_prompt"], config_file["topic_user_prompt"],
|
|
|
|
|
|
config_file["api_url"], config_file["model"], config_file["api_key"], config_file["prompts_dir"], config_file["resource_dir"], config_file["output_dir"]
|
2025-04-17 14:40:59 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-04-17 15:30:24 +08:00
|
|
|
|
run_id, tweet_topic_record = generate_topics(
|
|
|
|
|
|
ai_agent, system_prompt, user_prompt, config_file["output_dir"],
|
|
|
|
|
|
config_file["topic_temperature"], 0.5, 1.5
|
2025-04-17 14:40:59 +08:00
|
|
|
|
)
|
2025-04-17 15:30:24 +08:00
|
|
|
|
|
|
|
|
|
|
output_dir = os.path.join(config_file["output_dir"], run_id)
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
tweet_topic_record.save_topics(os.path.join(output_dir, "tweet_topic.json"))
|
|
|
|
|
|
tweet_topic_record.save_prompt(os.path.join(output_dir, "tweet_prompt.txt"))
|
2025-04-17 14:40:59 +08:00
|
|
|
|
# raise Exception("选题生成失败,退出程序")
|
2025-04-17 15:30:24 +08:00
|
|
|
|
if not run_id or not tweet_topic_record:
|
2025-04-17 14:40:59 +08:00
|
|
|
|
print("选题生成失败,退出程序")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 然后生成内容
|
|
|
|
|
|
print("\n开始根据选题生成内容...")
|
|
|
|
|
|
|
|
|
|
|
|
# 加载内容生成的系统提示词
|
2025-04-17 15:30:24 +08:00
|
|
|
|
content_system_prompt = ResourceLoader.load_system_prompt(config_file["content_system_prompt"])
|
2025-04-17 14:40:59 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not content_system_prompt:
|
|
|
|
|
|
print("内容生成系统提示词为空,使用选题生成的系统提示词")
|
|
|
|
|
|
content_system_prompt = system_prompt
|
|
|
|
|
|
|
|
|
|
|
|
# 直接使用同一个AI Agent实例
|
2025-04-17 15:30:24 +08:00
|
|
|
|
result = generate_content(
|
|
|
|
|
|
ai_agent, content_system_prompt, tweet_topic_record.topics_list, output_dir, run_id, config_file["prompts_dir"], config_file["resource_dir"],
|
|
|
|
|
|
config_file["variants"], config_file["content_temperature"]
|
2025-04-17 14:40:59 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
main()
|