267 lines
8.3 KiB
Python
267 lines
8.3 KiB
Python
|
from openai import OpenAI
|
|||
|
import os
|
|||
|
import base64
|
|||
|
import time
|
|||
|
from datetime import datetime
|
|||
|
|
|||
|
|
|||
|
# Base64 编码格式
|
|||
|
def encode_video(video_path):
|
|||
|
with open(video_path, "rb") as video_file:
|
|||
|
return base64.b64encode(video_file.read()).decode("utf-8")
|
|||
|
|
|||
|
def encode_audio(audio_path):
|
|||
|
with open(audio_path, "rb") as audio_file:
|
|||
|
return base64.b64encode(audio_file.read()).decode("utf-8")
|
|||
|
|
|||
|
def read_txt_file(txt_path):
|
|||
|
"""读取txt文件内容"""
|
|||
|
try:
|
|||
|
with open(txt_path, 'r', encoding='utf-8') as file:
|
|||
|
content = file.read()
|
|||
|
print(f"成功读取txt文件: {txt_path}")
|
|||
|
print(f"文件内容长度: {len(content)} 字符")
|
|||
|
return content
|
|||
|
except FileNotFoundError:
|
|||
|
print(f"错误: 找不到文件 {txt_path}")
|
|||
|
return ""
|
|||
|
except Exception as e:
|
|||
|
print(f"读取文件时出错: {e}")
|
|||
|
return ""
|
|||
|
|
|||
|
STREAM_MODE = True
|
|||
|
|
|||
|
# 文件路径配置
|
|||
|
video_path = "/root/autodl-tmp/hot_video_analyse/video2audio/sample_demo_6.mp4"
|
|||
|
audio_path = "/root/autodl-tmp/hot_video_analyse/video2audio/sample_demo_6.wav"
|
|||
|
#txt_path = "/root/autodl-tmp/hot_video_analyse/source/example_reference.txt" # 使用示例参考文档
|
|||
|
|
|||
|
# 编码文件
|
|||
|
print("开始编码文件...")
|
|||
|
encode_start_time = time.time()
|
|||
|
|
|||
|
base64_video = encode_video(video_path)
|
|||
|
base64_audio = encode_audio(audio_path)
|
|||
|
#txt_content = read_txt_file(txt_path)
|
|||
|
txt_content = ""
|
|||
|
|
|||
|
encode_end_time = time.time()
|
|||
|
encode_duration = encode_end_time - encode_start_time
|
|||
|
print(f"文件编码完成,耗时: {encode_duration:.2f} 秒")
|
|||
|
|
|||
|
|
|||
|
client = OpenAI(
|
|||
|
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx"
|
|||
|
api_key="EMPTY",
|
|||
|
base_url="http://localhost:8000/v1",
|
|||
|
)
|
|||
|
|
|||
|
# 构建content列表
|
|||
|
content_list = [
|
|||
|
{
|
|||
|
# 直接传入视频文件时,请将type的值设置为video_url
|
|||
|
"type": "video_url",
|
|||
|
"video_url": {"url": f"data:video/mp4;base64,{base64_video}"},
|
|||
|
}
|
|||
|
|
|||
|
,
|
|||
|
{
|
|||
|
"type": "audio_url",
|
|||
|
"audio_url": {"url": f"data:audio/wav;base64,{base64_audio}"},
|
|||
|
}
|
|||
|
]
|
|||
|
|
|||
|
# 如果txt文件有内容,添加到content中
|
|||
|
if txt_content.strip():
|
|||
|
content_list.append({
|
|||
|
"type": "text",
|
|||
|
"text": f"参考文档内容:\n{txt_content}\n\n"
|
|||
|
})
|
|||
|
|
|||
|
# 添加主要提示文本
|
|||
|
content_list.append({
|
|||
|
"type": "text",
|
|||
|
"text": """🎥 **抖音短视频内容分析专家**
|
|||
|
|
|||
|
请对这个抖音短视频进行详细的内容分析,重点关注以下三个方面:
|
|||
|
|
|||
|
## 🎤 一、口播内容提取
|
|||
|
请仔细听取视频中的语音内容,完整转录:
|
|||
|
- **完整口播转录**:逐字逐句转录所有口语表达
|
|||
|
- **语音时长**:估算总的讲话时长
|
|||
|
|
|||
|
## 📝 二、字幕文字识别
|
|||
|
请识别视频画面中出现的所有文字内容:
|
|||
|
- **屏幕字幕**:视频中显示的字幕文字(包括自动字幕和手动添加的字幕)
|
|||
|
- **标题文字**:视频开头、中间、结尾出现的大标题
|
|||
|
- **字幕位置**: 以右上角的像素为0 0 为基准,计算字幕的像素位置 e.g. 0 600 1080 720
|
|||
|
|
|||
|
|
|||
|
## 🎣 三、勾子分析
|
|||
|
根据以下五种勾子类型,分析视频使用的勾子策略:
|
|||
|
|
|||
|
### 1. 好奇类勾子
|
|||
|
- 公式1:**是一种什么体验
|
|||
|
- 公式2:如何不**也能**
|
|||
|
- 公式3:如果你**,你会怎么样
|
|||
|
- 示例:有一个长得像要债的老公,是一种什么体验
|
|||
|
|
|||
|
### 2. 借势类勾子
|
|||
|
- 公式1:曾经被某人**,如今却**
|
|||
|
- 公式2:某**大火,我却关心**
|
|||
|
- 公式3:某明星都在用的**
|
|||
|
- 示例:马云都在看的3本书,建议收藏
|
|||
|
|
|||
|
### 3. 痛点式勾子
|
|||
|
- 公式1:为什么**,却**
|
|||
|
- 公式2:不知道**?一定要**
|
|||
|
- 公式3:最新**,你不会不知道吧
|
|||
|
- 示例:怕上火,喝王老吉
|
|||
|
|
|||
|
### 4. 极限式勾子
|
|||
|
- 公式1:这是全网最**
|
|||
|
- 公式2:**一定要**
|
|||
|
- 公式3:99%的人都不知道的**
|
|||
|
- 示例:成都最值得去的十大景点
|
|||
|
|
|||
|
### 5. 恐吓式勾子
|
|||
|
- 公式1:**揭密,千万不要**
|
|||
|
- 公式2:如果你再不**,就会**
|
|||
|
- 公式3:**将迎来**,赶紧**
|
|||
|
|
|||
|
## 📊 输出格式要求
|
|||
|
|
|||
|
请严格按照以下格式输出分析结果:
|
|||
|
|
|||
|
{
|
|||
|
"口播分析": {
|
|||
|
"是否有口播": "是",
|
|||
|
"口播内容": "完整的口播内容转录(逐字逐句)",
|
|||
|
"讲话时长": "大概的讲话时长(秒)"
|
|||
|
},
|
|||
|
"字幕分析": {
|
|||
|
"是否有字幕": "是",
|
|||
|
"字幕内容": "所有字幕文字内容",
|
|||
|
"字幕位置": "字幕位置(以右上角的像素为0 0 为基准,计算字幕的像素位置 e.g. 0 600 1080 720)",
|
|||
|
},
|
|||
|
"勾子分析": {
|
|||
|
"勾子类型": "勾子类型(好奇类/借势类/痛点式/极限式/恐吓式/混合型/无明显勾子)",
|
|||
|
"勾子公式": "匹配的勾子公式",
|
|||
|
"勾子内容": "具体的勾子内容",
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
请开始详细分析这个抖音短视频:"""
|
|||
|
})
|
|||
|
|
|||
|
print(f"\n开始请求API...")
|
|||
|
print(f"请求时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|||
|
print(f"Stream模式: {STREAM_MODE}")
|
|||
|
print(f"Content项目数量: {len(content_list)}")
|
|||
|
|
|||
|
# 记录API请求开始时间
|
|||
|
api_start_time = time.time()
|
|||
|
completion = client.chat.completions.create(
|
|||
|
model="/root/autodl-tmp/llm",
|
|||
|
messages=[
|
|||
|
{
|
|||
|
"role": "system",
|
|||
|
"content": [{"type":"text","text": "You are a helpful assistant."}]
|
|||
|
},
|
|||
|
{
|
|||
|
"role": "user",
|
|||
|
"content": content_list
|
|||
|
}
|
|||
|
],
|
|||
|
stream=STREAM_MODE,
|
|||
|
stream_options={"include_usage": True} if STREAM_MODE else None,
|
|||
|
)
|
|||
|
|
|||
|
if STREAM_MODE:
|
|||
|
# 流式输出 - 拼接完整回复
|
|||
|
full_response = ""
|
|||
|
usage_info = None
|
|||
|
|
|||
|
# 记录第一个token的时间
|
|||
|
first_token_time = None
|
|||
|
token_count = 0
|
|||
|
|
|||
|
print("正在生成回复...")
|
|||
|
for chunk in completion:
|
|||
|
if chunk.choices:
|
|||
|
delta = chunk.choices[0].delta
|
|||
|
if delta.content:
|
|||
|
# 记录第一个token的时间
|
|||
|
if first_token_time is None:
|
|||
|
first_token_time = time.time()
|
|||
|
first_token_delay = first_token_time - api_start_time
|
|||
|
print(f"首个token延迟: {first_token_delay:.2f} 秒")
|
|||
|
|
|||
|
# 拼接内容
|
|||
|
full_response += delta.content
|
|||
|
token_count += 1
|
|||
|
|
|||
|
# 实时显示(可选)
|
|||
|
#print(delta.content, end='', flush=True)
|
|||
|
else:
|
|||
|
# 保存使用情况信息
|
|||
|
usage_info = chunk.usage
|
|||
|
|
|||
|
# 记录API请求结束时间
|
|||
|
api_end_time = time.time()
|
|||
|
total_duration = api_end_time - api_start_time
|
|||
|
|
|||
|
# 输出完整的响应
|
|||
|
print("\n" + "="*50)
|
|||
|
print("完整回复:")
|
|||
|
print("="*50)
|
|||
|
print(full_response)
|
|||
|
|
|||
|
# 输出时间统计信息
|
|||
|
print("\n" + "="*50)
|
|||
|
print("⏱️ 时间统计:")
|
|||
|
print("="*50)
|
|||
|
print(f"📁 文件编码时间: {encode_duration:.2f} 秒")
|
|||
|
if first_token_time:
|
|||
|
print(f"🚀 首个token延迟: {first_token_delay:.2f} 秒")
|
|||
|
generation_time = api_end_time - first_token_time
|
|||
|
print(f"⚡ 内容生成时间: {generation_time:.2f} 秒")
|
|||
|
print(f"🕐 API总响应时间: {total_duration:.2f} 秒")
|
|||
|
print(f"📊 生成token数量: {token_count}")
|
|||
|
if first_token_time and token_count > 0:
|
|||
|
tokens_per_second = token_count / generation_time
|
|||
|
print(f"🔥 生成速度: {tokens_per_second:.2f} tokens/秒")
|
|||
|
print(f"⏰ 完成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|||
|
|
|||
|
# 输出使用情况信息
|
|||
|
if usage_info:
|
|||
|
print("\n" + "="*50)
|
|||
|
print("📈 使用情况:")
|
|||
|
print("="*50)
|
|||
|
print(usage_info)
|
|||
|
|
|||
|
else:
|
|||
|
# 非流式输出 - 直接输出完整响应
|
|||
|
api_end_time = time.time()
|
|||
|
total_duration = api_end_time - api_start_time
|
|||
|
|
|||
|
print("非流式输出模式:")
|
|||
|
print("完整回复:")
|
|||
|
print("="*50)
|
|||
|
print(completion.choices[0].message.content)
|
|||
|
|
|||
|
# 输出时间统计信息
|
|||
|
print("\n" + "="*50)
|
|||
|
print("⏱️ 时间统计:")
|
|||
|
print("="*50)
|
|||
|
print(f"📁 文件编码时间: {encode_duration:.2f} 秒")
|
|||
|
print(f"🕐 API总响应时间: {total_duration:.2f} 秒")
|
|||
|
print(f"⏰ 完成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|||
|
|
|||
|
# 输出使用情况信息
|
|||
|
if hasattr(completion, 'usage') and completion.usage:
|
|||
|
print("\n" + "="*50)
|
|||
|
print("📈 使用情况:")
|
|||
|
print("="*50)
|
|||
|
print(completion.usage)
|