265 lines
8.9 KiB
Python
265 lines
8.9 KiB
Python
from openai import OpenAI
|
||
import os
|
||
import base64
|
||
import time
|
||
from datetime import datetime
|
||
from save_usage_info import save_usage_info_to_txt, save_simple_usage_info
|
||
|
||
# Base64 编码格式
|
||
def encode_video(video_path):
|
||
with open(video_path, "rb") as video_file:
|
||
return base64.b64encode(video_file.read()).decode("utf-8")
|
||
|
||
def encode_audio(audio_path):
|
||
with open(audio_path, "rb") as audio_file:
|
||
return base64.b64encode(audio_file.read()).decode("utf-8")
|
||
|
||
def read_txt_file(txt_path):
|
||
"""读取txt文件内容"""
|
||
try:
|
||
with open(txt_path, 'r', encoding='utf-8') as file:
|
||
content = file.read()
|
||
print(f"成功读取txt文件: {txt_path}")
|
||
print(f"文件内容长度: {len(content)} 字符")
|
||
return content
|
||
except FileNotFoundError:
|
||
print(f"错误: 找不到文件 {txt_path}")
|
||
return ""
|
||
except Exception as e:
|
||
print(f"读取文件时出错: {e}")
|
||
return ""
|
||
|
||
def read_json_file(json_path):
|
||
"""读取JSON文件内容"""
|
||
try:
|
||
import json
|
||
with open(json_path, 'r', encoding='utf-8') as file:
|
||
data = json.load(file)
|
||
print(f"成功读取JSON文件: {json_path}")
|
||
return data
|
||
except FileNotFoundError:
|
||
print(f"错误: 找不到文件 {json_path}")
|
||
return None
|
||
except json.JSONDecodeError as e:
|
||
print(f"JSON解析错误: {e}")
|
||
return None
|
||
except Exception as e:
|
||
print(f"读取JSON文件时出错: {e}")
|
||
return None
|
||
|
||
def save_result_to_txt(response_text, video_path, save_dir="/root/autodl-tmp/not_output"):
|
||
"""将分析结果保存为TXT文件"""
|
||
# 创建保存目录
|
||
os.makedirs(save_dir, exist_ok=True)
|
||
|
||
# 生成文件名(基于视频文件名和时间戳)
|
||
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
txt_filename = f"{video_name}_analysis_{timestamp}.txt"
|
||
txt_path = os.path.join(save_dir, txt_filename)
|
||
|
||
# 准备保存内容(添加头部信息)
|
||
content = f"""视频分析结果
|
||
=====================================
|
||
视频文件: {video_path}
|
||
分析时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||
=====================================
|
||
|
||
{response_text}
|
||
"""
|
||
|
||
# 保存到文件
|
||
try:
|
||
with open(txt_path, 'w', encoding='utf-8') as f:
|
||
f.write(content)
|
||
print(f"\n✅ 分析结果已保存到: {txt_path}")
|
||
return txt_path
|
||
except Exception as e:
|
||
print(f"\n❌ 保存TXT文件失败: {e}")
|
||
return None
|
||
|
||
STREAM_MODE = True
|
||
|
||
# 文件路径配置
|
||
video_path = "/root/autodl-tmp/video/中国国旅_compressed.mp4"
|
||
|
||
# 编码文件
|
||
print("开始编码文件...")
|
||
encode_start_time = time.time()
|
||
|
||
base64_video = encode_video(video_path)
|
||
|
||
|
||
# 统计提示词token
|
||
prompt_text = """🎥 **抖音短视频内容分析专家**
|
||
## 任务背景
|
||
您是一位经验丰富的视频导演和编辑,需要基于视频内容。为视频写一个完整、流畅的脚本。
|
||
请对这个抖音短视频进行详细的内容分析,重点关注以下三个方面:
|
||
## 🎤 一、口播内容提取
|
||
请仔细听取视频中的语音内容,完整转录:
|
||
- **完整口播转录**:逐字逐句转录所有口语表达
|
||
- **语音时长**:估算总的讲话时长
|
||
## 📝 二、字幕文字识别
|
||
请识别视频画面中出现的所有文字内容:
|
||
- **屏幕字幕**:视频中显示的字幕文字(包括自动字幕和手动添加的字幕)
|
||
- **标题文字**:视频开头、中间、结尾出现的大标题
|
||
|
||
## 🎬 三、转场效果分析
|
||
请仔细观察视频中的转场效果,并且结合参考资料中的转场内容,请你整体分析一下视频。比如几个画面出现第一个转场等。
|
||
|
||
|
||
## 📊 输出格式要求
|
||
|
||
## 视频内容分析
|
||
请按照以下JSON格式输出视频描述:
|
||
|
||
请你描述下视频中出现的一系列画面,以JSON格式输出开始时间(start_time)、结束事件(end_time)、口播内容(talk)、字幕内容(subtitles)、事件(event),
|
||
关联一下视频的所有帧,找出出现多次的文本,并输出它的字体,颜色,大小,位置,
|
||
请使用HH:mm:ss表示 时间戳,不要输出```json```代码段。
|
||
|
||
## 注意事项
|
||
1. 保持描述简洁明了,但要有足够的细节
|
||
2. 突出视频的亮点和特色
|
||
3. 确保时间戳的准确性
|
||
4. 对话内容要符合视频画面
|
||
5. 整体风格要统一连贯
|
||
6. 每个镜头的描述要包含关键信息
|
||
|
||
请根据以上要求,分析视频并输出JSON格式的描述。
|
||
|
||
请开始详细分析这个抖音短视频:"""
|
||
|
||
client = OpenAI(
|
||
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx"
|
||
api_key="sk-3a0e98d05fab49cebc1f1379ca92d85d",
|
||
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||
)
|
||
|
||
# 构建content列表
|
||
content_list = [
|
||
{
|
||
# 直接传入视频文件时,请将type的值设置为video_url
|
||
"type": "video_url",
|
||
"video_url": {"url": f"data:video/mp4;base64,{base64_video}"},
|
||
}
|
||
|
||
# ,
|
||
# {
|
||
# "type": "audio_url",
|
||
# "audio_url": {"url": f"data:audio/wav;base64,{base64_audio}"},
|
||
# }
|
||
]
|
||
|
||
content_list.append({
|
||
"type": "text",
|
||
"text": prompt_text
|
||
})
|
||
|
||
print(f"\n开始请求API...")
|
||
print(f"请求时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
print(f"Stream模式: {STREAM_MODE}")
|
||
print(f"Content项目数量: {len(content_list)}")
|
||
|
||
# 记录API请求开始时间
|
||
api_start_time = time.time()
|
||
completion = client.chat.completions.create(
|
||
model="qwen-omni-turbo",
|
||
#model="/root/autodl-tmp/llm/Qwen-omni",
|
||
messages=[
|
||
{
|
||
"role": "system",
|
||
"content": "You are a helpful assistant."
|
||
#"content": [{"type":"text","text": "You are a helpful assistant."}]
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": content_list
|
||
}
|
||
],
|
||
stream=STREAM_MODE,
|
||
stream_options={"include_usage": True} if STREAM_MODE else None,
|
||
temperature=0.5
|
||
)
|
||
|
||
if STREAM_MODE:
|
||
# 流式输出 - 拼接完整回复
|
||
full_response = ""
|
||
usage_info = None
|
||
money = {}
|
||
# 记录第一个token的时间
|
||
first_token_time = None
|
||
|
||
print("正在生成回复...")
|
||
for chunk in completion:
|
||
if chunk.choices:
|
||
delta = chunk.choices[0].delta
|
||
if delta.content:
|
||
# 记录第一个token的时间
|
||
if first_token_time is None:
|
||
first_token_time = time.time()
|
||
first_token_delay = first_token_time - api_start_time
|
||
print(f"首个token延迟: {first_token_delay:.2f} 秒")
|
||
|
||
# 拼接内容
|
||
full_response += delta.content
|
||
|
||
else:
|
||
# 保存使用情况信息
|
||
usage_info = chunk.usage
|
||
money["output_momey"] = chunk.usage.completion_tokens * 0.0045 / 1000
|
||
money["prompt_momey"] = chunk.usage.prompt_tokens_details.text_tokens * 0.0004 / 1000
|
||
money["video_momey"] = chunk.usage.prompt_tokens_details.video_tokens * 0.0015 / 1000
|
||
money["audio_momey"] = chunk.usage.prompt_tokens_details.audio_tokens * 0.025 / 1000
|
||
money["sum_momey"]= money["output_momey"] + money["prompt_momey"] + money["video_momey"] + money["audio_momey"]
|
||
print(usage_info)
|
||
|
||
# 记录API请求结束时间
|
||
api_end_time = time.time()
|
||
total_duration = api_end_time - api_start_time
|
||
|
||
# 输出完整的响应
|
||
print("\n" + "="*50)
|
||
print("完整回复:")
|
||
print("="*50)
|
||
print(full_response)
|
||
|
||
# 保存结果为TXT文件
|
||
txt_file_path = save_result_to_txt(full_response, video_path)
|
||
# 保存使用情况信息
|
||
usage_info_txt = save_usage_info_to_txt(usage_info, total_duration, money, video_path)
|
||
|
||
# 输出使用情况信息
|
||
if usage_info:
|
||
print("\n" + "="*50)
|
||
print("📈 使用情况:")
|
||
print("="*50)
|
||
print(usage_info)
|
||
|
||
# else:
|
||
# # 非流式输出 - 直接输出完整响应
|
||
# api_end_time = time.time()
|
||
# total_duration = api_end_time - api_start_time
|
||
|
||
# print("非流式输出模式:")
|
||
# print("完整回复:")
|
||
# print("="*50)
|
||
# print(completion.choices[0].message.content)
|
||
|
||
# # 保存结果为TXT文件
|
||
|
||
# txt_file_path = save_result_to_txt(completion.choices[0].message.content + "total_duration:" + str(total_duration), video_path)
|
||
|
||
# # 输出时间统计信息
|
||
# print("\n" + "="*50)
|
||
# print("⏱️ 时间统计:")
|
||
# print("="*50)
|
||
# print(f"📁 文件编码时间: {encode_duration:.2f} 秒")
|
||
# print(f"🕐 API总响应时间: {total_duration:.2f} 秒")
|
||
# print(f"⏰ 完成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
|
||
# # 输出使用情况信息
|
||
# if hasattr(completion, 'usage') and completion.usage:
|
||
# print("\n" + "="*50)
|
||
# print("📈 使用情况:")
|
||
# print("="*50)
|
||
# print(completion.usage) |