video_template_gen/code/api_video_only.py

265 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from openai import OpenAI
import os
import base64
import time
from datetime import datetime
from save_usage_info import save_usage_info_to_txt, save_simple_usage_info
# Base64 编码格式
def encode_video(video_path):
with open(video_path, "rb") as video_file:
return base64.b64encode(video_file.read()).decode("utf-8")
def encode_audio(audio_path):
with open(audio_path, "rb") as audio_file:
return base64.b64encode(audio_file.read()).decode("utf-8")
def read_txt_file(txt_path):
"""读取txt文件内容"""
try:
with open(txt_path, 'r', encoding='utf-8') as file:
content = file.read()
print(f"成功读取txt文件: {txt_path}")
print(f"文件内容长度: {len(content)} 字符")
return content
except FileNotFoundError:
print(f"错误: 找不到文件 {txt_path}")
return ""
except Exception as e:
print(f"读取文件时出错: {e}")
return ""
def read_json_file(json_path):
"""读取JSON文件内容"""
try:
import json
with open(json_path, 'r', encoding='utf-8') as file:
data = json.load(file)
print(f"成功读取JSON文件: {json_path}")
return data
except FileNotFoundError:
print(f"错误: 找不到文件 {json_path}")
return None
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}")
return None
except Exception as e:
print(f"读取JSON文件时出错: {e}")
return None
def save_result_to_txt(response_text, video_path, save_dir="/root/autodl-tmp/not_output"):
"""将分析结果保存为TXT文件"""
# 创建保存目录
os.makedirs(save_dir, exist_ok=True)
# 生成文件名(基于视频文件名和时间戳)
video_name = os.path.splitext(os.path.basename(video_path))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
txt_filename = f"{video_name}_analysis_{timestamp}.txt"
txt_path = os.path.join(save_dir, txt_filename)
# 准备保存内容(添加头部信息)
content = f"""视频分析结果
=====================================
视频文件: {video_path}
分析时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
=====================================
{response_text}
"""
# 保存到文件
try:
with open(txt_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"\n✅ 分析结果已保存到: {txt_path}")
return txt_path
except Exception as e:
print(f"\n❌ 保存TXT文件失败: {e}")
return None
STREAM_MODE = True
# 文件路径配置
video_path = "/root/autodl-tmp/video/中国国旅_compressed.mp4"
# 编码文件
print("开始编码文件...")
encode_start_time = time.time()
base64_video = encode_video(video_path)
# 统计提示词token
prompt_text = """🎥 **抖音短视频内容分析专家**
## 任务背景
您是一位经验丰富的视频导演和编辑,需要基于视频内容。为视频写一个完整、流畅的脚本。
请对这个抖音短视频进行详细的内容分析,重点关注以下三个方面:
## 🎤 一、口播内容提取
请仔细听取视频中的语音内容,完整转录:
- **完整口播转录**:逐字逐句转录所有口语表达
- **语音时长**:估算总的讲话时长
## 📝 二、字幕文字识别
请识别视频画面中出现的所有文字内容:
- **屏幕字幕**:视频中显示的字幕文字(包括自动字幕和手动添加的字幕)
- **标题文字**:视频开头、中间、结尾出现的大标题
## 🎬 三、转场效果分析
请仔细观察视频中的转场效果,并且结合参考资料中的转场内容,请你整体分析一下视频。比如几个画面出现第一个转场等。
## 📊 输出格式要求
## 视频内容分析
请按照以下JSON格式输出视频描述
请你描述下视频中出现的一系列画面以JSON格式输出开始时间start_time、结束事件end_time、口播内容talk、字幕内容subtitles、事件event
关联一下视频的所有帧,找出出现多次的文本,并输出它的字体,颜色,大小,位置,
请使用HH:mm:ss表示 时间戳,不要输出```json```代码段。
## 注意事项
1. 保持描述简洁明了,但要有足够的细节
2. 突出视频的亮点和特色
3. 确保时间戳的准确性
4. 对话内容要符合视频画面
5. 整体风格要统一连贯
6. 每个镜头的描述要包含关键信息
请根据以上要求分析视频并输出JSON格式的描述。
请开始详细分析这个抖音短视频:"""
client = OpenAI(
# 若没有配置环境变量请用百炼API Key将下行替换为api_key="sk-xxx"
api_key="sk-3a0e98d05fab49cebc1f1379ca92d85d",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
# 构建content列表
content_list = [
{
# 直接传入视频文件时请将type的值设置为video_url
"type": "video_url",
"video_url": {"url": f"data:video/mp4;base64,{base64_video}"},
}
# ,
# {
# "type": "audio_url",
# "audio_url": {"url": f"data:audio/wav;base64,{base64_audio}"},
# }
]
content_list.append({
"type": "text",
"text": prompt_text
})
print(f"\n开始请求API...")
print(f"请求时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Stream模式: {STREAM_MODE}")
print(f"Content项目数量: {len(content_list)}")
# 记录API请求开始时间
api_start_time = time.time()
completion = client.chat.completions.create(
model="qwen-omni-turbo",
#model="/root/autodl-tmp/llm/Qwen-omni",
messages=[
{
"role": "system",
"content": "You are a helpful assistant."
#"content": [{"type":"text","text": "You are a helpful assistant."}]
},
{
"role": "user",
"content": content_list
}
],
stream=STREAM_MODE,
stream_options={"include_usage": True} if STREAM_MODE else None,
temperature=0.5
)
if STREAM_MODE:
# 流式输出 - 拼接完整回复
full_response = ""
usage_info = None
money = {}
# 记录第一个token的时间
first_token_time = None
print("正在生成回复...")
for chunk in completion:
if chunk.choices:
delta = chunk.choices[0].delta
if delta.content:
# 记录第一个token的时间
if first_token_time is None:
first_token_time = time.time()
first_token_delay = first_token_time - api_start_time
print(f"首个token延迟: {first_token_delay:.2f}")
# 拼接内容
full_response += delta.content
else:
# 保存使用情况信息
usage_info = chunk.usage
money["output_momey"] = chunk.usage.completion_tokens * 0.0045 / 1000
money["prompt_momey"] = chunk.usage.prompt_tokens_details.text_tokens * 0.0004 / 1000
money["video_momey"] = chunk.usage.prompt_tokens_details.video_tokens * 0.0015 / 1000
money["audio_momey"] = chunk.usage.prompt_tokens_details.audio_tokens * 0.025 / 1000
money["sum_momey"]= money["output_momey"] + money["prompt_momey"] + money["video_momey"] + money["audio_momey"]
print(usage_info)
# 记录API请求结束时间
api_end_time = time.time()
total_duration = api_end_time - api_start_time
# 输出完整的响应
print("\n" + "="*50)
print("完整回复:")
print("="*50)
print(full_response)
# 保存结果为TXT文件
txt_file_path = save_result_to_txt(full_response, video_path)
# 保存使用情况信息
usage_info_txt = save_usage_info_to_txt(usage_info, total_duration, money, video_path)
# 输出使用情况信息
if usage_info:
print("\n" + "="*50)
print("📈 使用情况:")
print("="*50)
print(usage_info)
# else:
# # 非流式输出 - 直接输出完整响应
# api_end_time = time.time()
# total_duration = api_end_time - api_start_time
# print("非流式输出模式:")
# print("完整回复:")
# print("="*50)
# print(completion.choices[0].message.content)
# # 保存结果为TXT文件
# txt_file_path = save_result_to_txt(completion.choices[0].message.content + "total_duration:" + str(total_duration), video_path)
# # 输出时间统计信息
# print("\n" + "="*50)
# print("⏱️ 时间统计:")
# print("="*50)
# print(f"📁 文件编码时间: {encode_duration:.2f} 秒")
# print(f"🕐 API总响应时间: {total_duration:.2f} 秒")
# print(f"⏰ 完成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
# # 输出使用情况信息
# if hasattr(completion, 'usage') and completion.usage:
# print("\n" + "="*50)
# print("📈 使用情况:")
# print("="*50)
# print(completion.usage)