TravelContentCreator/utils/tweet_generator.py

246 lines
9.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import time
import random
import argparse
import json
from datetime import datetime
import sys
sys.path.append('/root/autodl-tmp')
# 从本地模块导入
from TravelContentCreator.core.ai_agent import AI_Agent
from TravelContentCreator.core.topic_parser import TopicParser
from TravelContentCreator.utils.resource_loader import ResourceLoader
def generate_topics(ai_agent, system_prompt, user_prompt, output_dir, temperature=0.2, top_p=0.5, presence_penalty=1.5):
"""生成选题列表"""
print("开始生成选题...")
# 记录开始时间
time_start = time.time()
# 生成选题
result, system_prompt, user_prompt, file_folder, file_name, tokens, time_cost = ai_agent.work(
system_prompt, user_prompt, "", temperature, top_p, presence_penalty
)
# 计算总耗时
time_end = time.time()
print(f"选题生成完成,耗时:{time_end - time_start}")
# 生成唯一ID
run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# 保存原始结果
os.makedirs(output_dir, exist_ok=True)
with open(os.path.join(output_dir, f"{run_id}.txt"), "w", encoding="utf-8") as f:
f.write(f"result: {result}\n")
f.write(f"system_prompt: {system_prompt}\n")
f.write(f"user_prompt: {user_prompt}\n")
f.write(f"tokens: {tokens}\n")
f.write(f"time: {time_cost}s\n")
# 解析选题
result_list = TopicParser.parse_topics(result)
success, json_path = TopicParser.save_topics(result_list, output_dir, run_id, result)
if not success:
print("选题解析失败请检查AI输出格式")
return None
print(f"成功解析并保存{len(result_list)}个选题到{json_path}")
return json_path, run_id, result_list
def generate_content(ai_agent, system_prompt, topics, output_dir, run_id, prompts_dir, resource_dir,
variants=2, temperature=0.3, start_index=0, end_index=None):
"""根据选题生成内容"""
if not topics:
print("没有选题,无法生成内容")
return
# 确定处理范围
if end_index is None or end_index > len(topics):
end_index = len(topics)
topics_to_process = topics[start_index:end_index]
print(f"准备处理{len(topics_to_process)}个选题...")
# 创建汇总文件
summary_file = ResourceLoader.create_summary_file(output_dir, run_id, len(topics_to_process))
# 处理每个选题
processed_results = []
for i, item in enumerate(topics_to_process):
print(f"处理第 {i+1}/{len(topics_to_process)} 篇文章")
# 构建提示词
user_prompt = ResourceLoader.build_user_prompt(item, prompts_dir, resource_dir)
print(f"完成提示词构建,长度为 {len(user_prompt)} 字符")
print(user_prompt)
with open("test.txt", "w", encoding="utf-8") as f:
f.write(user_prompt)
try:
# 为每个选题生成多个变体
for j in range(variants):
print(f" 正在生成变体 {j+1}/{variants}")
# 添加随机停顿,避免请求过于频繁
time.sleep(random.uniform(0.1, 0.5))
# 生成文章
result, _, _, _, _, tokens, time_cost = ai_agent.work(
system_prompt, user_prompt, "", temperature, 0.5, 1.5
)
print(f" 生成完成tokens: {tokens}, 耗时: {time_cost}s")
processed_results.append(result)
# 保存到单独文件
filepath = ResourceLoader.save_article(
result, user_prompt, output_dir, run_id, i+1, j+1
)
if filepath:
print(f" 结果已保存到: {filepath}")
# 更新汇总文件 (仅保存第一个变体到汇总文件)
if j == 0:
ResourceLoader.update_summary(summary_file, i+1, user_prompt, result)
except Exception as e:
print(f"处理选题时出错: {e}")
print(f"完成{len(processed_results)}篇文章生成,汇总文件:{summary_file}")
return processed_results
def prepare_topic_generation(
select_date, select_num,
system_prompt_path, user_prompt_path,
base_url="vllm", model_name="qwenQWQ", api_key="EMPTY",
gen_prompts_path="/root/autodl-tmp/TravelContentCreator/genPrompts",
resource_dir="/root/autodl-tmp/TravelContentCreator/resource",
output_dir="/root/autodl-tmp/TravelContentCreator/result"
):
"""准备选题生成的环境和参数"""
# 创建AI Agent
ai_agent = AI_Agent(base_url, model_name, api_key)
# 加载系统提示词
with open(system_prompt_path, "r", encoding="utf-8") as f:
system_prompt = f.read()
# 构建用户提示词
user_prompt = "你拥有的创作资料如下:\n"
# 加载genPrompts目录下的文件
gen_prompts_list = os.listdir(gen_prompts_path)
for gen_prompt_folder in gen_prompts_list:
folder_path = os.path.join(gen_prompts_path, gen_prompt_folder)
# 检查是否为目录
if os.path.isdir(folder_path):
gen_prompts = os.listdir(folder_path)
user_prompt += f"{gen_prompt_folder}\n{gen_prompts}\n"
# 加载source文档内的目标文件
## 其实只会有Object和Product两个文件夹
## 所以可以简化代码
for dir in resource_dir:
source_type = dir["type"]
# source_num = dir["num"]
source_file_path = dir["file_path"]
for file in source_file_path:
with open(file, "r", encoding="utf-8") as f:
user_prompt += f"{source_type}信息:\n{file.split('/')[-1]}\n{f.read()}\n\n"
# 加载日期信息
dateline_path = os.path.join(os.path.dirname(user_prompt_path), "2025各月节日宣传节点时间表.md")
if os.path.exists(dateline_path):
with open(dateline_path, "r", encoding="utf-8") as f:
dateline = f.read()
user_prompt += f"\n{dateline}"
# 加载用户提示词模板
with open(user_prompt_path, "r", encoding="utf-8") as f:
user_prompt += f.read()
# 添加选题数量和日期
user_prompt += f"\n选题数量:{select_num}\n选题日期:{select_date}\n"
return ai_agent, system_prompt, user_prompt, output_dir
def main():
"""主函数入口"""
args = {
"date": "4月17日",
"num": 5,
"model": "qwenQWQ",
"api_url": "vllm",
"api_key": "EMPTY",
"topic_system_prompt": "/root/autodl-tmp/TravelContentCreator/SelectPrompt/systemPrompt.txt",
"topic_user_prompt": "/root/autodl-tmp/TravelContentCreator/SelectPrompt/userPrompt.txt",
"content_system_prompt": "/root/autodl-tmp/TravelContentCreator/genPrompts/systemPrompt.txt",
"resource_dir": [{
"type": "Object",
"num": 4,
"file_path": ["/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-尚书第.txt",
"/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-明清园.txt",
"/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-泰宁古城.txt",
"/root/autodl-tmp/TravelContentCreator/resource/Object/景点信息-甘露寺.txt"
]},
{
"type": "Product",
"num": 0,
"file_path": []
}
],
"prompts_dir": "/root/autodl-tmp/TravelContentCreator/genPrompts",
"output_dir": "/root/autodl-tmp/TravelContentCreator/result",
"variants": 2,
"topic_temperature": 0.2,
"content_temperature": 0.3
}
if True:
# 1. 首先生成选题
ai_agent, system_prompt, user_prompt, output_dir = prepare_topic_generation(
args["date"], args["num"], args["topic_system_prompt"], args["topic_user_prompt"],
args["api_url"], args["model"], args["api_key"], args["prompts_dir"], args["resource_dir"], args["output_dir"]
)
json_path, run_id, topics = generate_topics(
ai_agent, system_prompt, user_prompt, args["output_dir"],
args["topic_temperature"], 0.5, 1.5
)
print(topics)
# raise Exception("选题生成失败,退出程序")
if not json_path or not topics:
print("选题生成失败,退出程序")
return
# 2. 然后生成内容
print("\n开始根据选题生成内容...")
# 加载内容生成的系统提示词
content_system_prompt = ResourceLoader.load_system_prompt(args["content_system_prompt"])
if not content_system_prompt:
print("内容生成系统提示词为空,使用选题生成的系统提示词")
content_system_prompt = system_prompt
# 直接使用同一个AI Agent实例
generate_content(
ai_agent, content_system_prompt, topics, args["output_dir"], run_id, args["prompts_dir"], args["resource_dir"],
args["variants"], args["content_temperature"]
)
if __name__ == "__main__":
main()