hot_video_analyse/code/api_video.py

from openai import OpenAI
import os
import base64
import time
from datetime import datetime


#  Base64 编码格式
def encode_video(video_path):
    with open(video_path, "rb") as video_file:
        return base64.b64encode(video_file.read()).decode("utf-8")
    
def encode_audio(audio_path):
    with open(audio_path, "rb") as audio_file:
        return base64.b64encode(audio_file.read()).decode("utf-8")

def read_txt_file(txt_path):
    """读取txt文件内容"""
    try:
        with open(txt_path, 'r', encoding='utf-8') as file:
            content = file.read()
        print(f"成功读取txt文件: {txt_path}")
        print(f"文件内容长度: {len(content)} 字符")
        return content
    except FileNotFoundError:
        print(f"错误: 找不到文件 {txt_path}")
        return ""
    except Exception as e:
        print(f"读取文件时出错: {e}")
        return ""

STREAM_MODE = True

# 文件路径配置
video_path = "/root/autodl-tmp/hot_video_analyse/video2audio/sample_demo_6.mp4"
audio_path = "/root/autodl-tmp/hot_video_analyse/video2audio/sample_demo_6.wav"
#txt_path = "/root/autodl-tmp/hot_video_analyse/source/example_reference.txt"  # 使用示例参考文档

# 编码文件
print("开始编码文件...")
encode_start_time = time.time()

base64_video = encode_video(video_path)
base64_audio = encode_audio(audio_path)
#txt_content = read_txt_file(txt_path)
txt_content = ""

encode_end_time = time.time()
encode_duration = encode_end_time - encode_start_time
print(f"文件编码完成，耗时: {encode_duration:.2f} 秒")


client = OpenAI(
    # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key="sk-xxx"
    api_key="EMPTY",
    base_url="http://localhost:8000/v1",
)

# 构建content列表
content_list = [
    {
        # 直接传入视频文件时，请将type的值设置为video_url
        "type": "video_url",
        "video_url": {"url": f"data:video/mp4;base64,{base64_video}"},
    }

    ,
    {
        "type": "audio_url",
        "audio_url": {"url": f"data:audio/wav;base64,{base64_audio}"},
    }
]

# 如果txt文件有内容，添加到content中
if txt_content.strip():
    content_list.append({
        "type": "text", 
        "text": f"参考文档内容:\n{txt_content}\n\n"
    })

# 添加主要提示文本
content_list.append({
    "type": "text", 
    "text": """🎥 **抖音短视频内容分析专家**

请对这个抖音短视频进行详细的内容分析，重点关注以下三个方面：

## 🎤 一、口播内容提取
请仔细听取视频中的语音内容，完整转录：
- **完整口播转录**：逐字逐句转录所有口语表达
- **语音时长**：估算总的讲话时长

## 📝 二、字幕文字识别
请识别视频画面中出现的所有文字内容：
- **屏幕字幕**：视频中显示的字幕文字（包括自动字幕和手动添加的字幕）
- **标题文字**：视频开头、中间、结尾出现的大标题
- **字幕位置**: 以右上角的像素为0 0 为基准，计算字幕的像素位置 e.g. 0 600 1080 720


## 🎣 三、勾子分析
根据以下五种勾子类型，分析视频使用的勾子策略：

### 1. 好奇类勾子
- 公式1：**是一种什么体验
- 公式2：如何不**也能**
- 公式3：如果你**，你会怎么样
- 示例：有一个长得像要债的老公，是一种什么体验

### 2. 借势类勾子
- 公式1：曾经被某人**，如今却**
- 公式2：某**大火，我却关心**
- 公式3：某明星都在用的**
- 示例：马云都在看的3本书，建议收藏

### 3. 痛点式勾子
- 公式1：为什么**，却**
- 公式2：不知道**？一定要**
- 公式3：最新**，你不会不知道吧
- 示例：怕上火，喝王老吉

### 4. 极限式勾子
- 公式1：这是全网最**
- 公式2：**一定要**
- 公式3：99%的人都不知道的**
- 示例：成都最值得去的十大景点

### 5. 恐吓式勾子
- 公式1：**揭密，千万不要**
- 公式2：如果你再不**，就会**
- 公式3：**将迎来**，赶紧**

## 📊 输出格式要求

请严格按照以下格式输出分析结果：

{
  "口播分析": {
    "是否有口播": "是",
    "口播内容": "完整的口播内容转录（逐字逐句）",
    "讲话时长": "大概的讲话时长（秒）"
  },
  "字幕分析": {
    "是否有字幕": "是",
    "字幕内容": "所有字幕文字内容",
    "字幕位置": "字幕位置(以右上角的像素为0 0 为基准，计算字幕的像素位置 e.g. 0 600 1080 720)",
  },
  "勾子分析": {
    "勾子类型": "勾子类型（好奇类/借势类/痛点式/极限式/恐吓式/混合型/无明显勾子）",
    "勾子公式": "匹配的勾子公式",
    "勾子内容": "具体的勾子内容",
    }
  }
}

请开始详细分析这个抖音短视频："""
})

print(f"\n开始请求API...")
print(f"请求时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Stream模式: {STREAM_MODE}")
print(f"Content项目数量: {len(content_list)}")

# 记录API请求开始时间
api_start_time = time.time()
completion = client.chat.completions.create(
    model="/root/autodl-tmp/llm",  
    messages=[
        {
            "role": "system",
            "content": [{"type":"text","text": "You are a helpful assistant."}]
        },
        {
            "role": "user",
            "content": content_list
        }
    ],
    stream=STREAM_MODE,
    stream_options={"include_usage": True} if STREAM_MODE else None,
)

if STREAM_MODE:
    # 流式输出 - 拼接完整回复
    full_response = ""
    usage_info = None
    
    # 记录第一个token的时间
    first_token_time = None
    token_count = 0
    
    print("正在生成回复...")
    for chunk in completion:
        if chunk.choices:
            delta = chunk.choices[0].delta
            if delta.content:
                # 记录第一个token的时间
                if first_token_time is None:
                    first_token_time = time.time()
                    first_token_delay = first_token_time - api_start_time
                    print(f"首个token延迟: {first_token_delay:.2f} 秒")
                
                # 拼接内容
                full_response += delta.content
                token_count += 1
                
                # 实时显示（可选）
                #print(delta.content, end='', flush=True)
        else:
            # 保存使用情况信息
            usage_info = chunk.usage
    
    # 记录API请求结束时间
    api_end_time = time.time()
    total_duration = api_end_time - api_start_time
    
    # 输出完整的响应
    print("\n" + "="*50)
    print("完整回复:")
    print("="*50)
    print(full_response)
    
    # 输出时间统计信息
    print("\n" + "="*50)
    print("⏱️  时间统计:")
    print("="*50)
    print(f"📁 文件编码时间: {encode_duration:.2f} 秒")
    if first_token_time:
        print(f"🚀 首个token延迟: {first_token_delay:.2f} 秒")
        generation_time = api_end_time - first_token_time
        print(f"⚡ 内容生成时间: {generation_time:.2f} 秒")
    print(f"🕐 API总响应时间: {total_duration:.2f} 秒")
    print(f"📊 生成token数量: {token_count}")
    if first_token_time and token_count > 0:
        tokens_per_second = token_count / generation_time
        print(f"🔥 生成速度: {tokens_per_second:.2f} tokens/秒")
    print(f"⏰ 完成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # 输出使用情况信息
    if usage_info:
        print("\n" + "="*50)
        print("📈 使用情况:")
        print("="*50)
        print(usage_info)
        
else:
    # 非流式输出 - 直接输出完整响应
    api_end_time = time.time()
    total_duration = api_end_time - api_start_time
    
    print("非流式输出模式:")
    print("完整回复:")
    print("="*50)
    print(completion.choices[0].message.content)
    
    # 输出时间统计信息
    print("\n" + "="*50)
    print("⏱️  时间统计:")
    print("="*50)
    print(f"📁 文件编码时间: {encode_duration:.2f} 秒")
    print(f"🕐 API总响应时间: {total_duration:.2f} 秒")
    print(f"⏰ 完成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # 输出使用情况信息
    if hasattr(completion, 'usage') and completion.usage:
        print("\n" + "="*50)
        print("📈 使用情况:")
        print("="*50)
        print(completion.usage)