hot_video_analyse/code/api_video_with_monitor.py

484 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
from openai import OpenAI
import os
import base64
import time
import psutil
import subprocess
from datetime import datetime
class MemoryMonitor:
def __init__(self):
self.checkpoints = []
self.initial_memory = self.get_memory_info()
def get_memory_info(self):
"""获取当前内存使用情况"""
memory = psutil.virtual_memory()
gpu_info = self.get_gpu_memory()
process = psutil.Process()
memory_info = process.memory_info()
return {
"timestamp": datetime.now().isoformat(),
"system_memory_gb": memory.used / 1024**3,
"system_memory_percent": memory.percent,
"gpu_memory": gpu_info,
"process_memory_mb": memory_info.rss / 1024 / 1024
}
def get_gpu_memory(self):
"""获取GPU内存使用情况"""
try:
result = subprocess.run(['nvidia-smi', '--query-gpu=memory.total,memory.used,memory.free',
'--format=csv,noheader,nounits'],
capture_output=True, text=True, check=True)
lines = result.stdout.strip().split('\n')
gpu_info = []
for i, line in enumerate(lines):
parts = line.split(', ')
if len(parts) == 3:
total, used, free = map(int, parts)
gpu_info.append({
"gpu_id": i,
"total_mb": total,
"used_mb": used,
"free_mb": free,
"usage_percent": round(used / total * 100, 2)
})
return gpu_info
except:
return []
def checkpoint(self, name=""):
"""创建内存检查点"""
current_memory = self.get_memory_info()
if self.checkpoints:
last_memory = self.checkpoints[-1]["memory"]
memory_diff = {
"system_memory_gb": current_memory["system_memory_gb"] - last_memory["system_memory_gb"],
"process_memory_mb": current_memory["process_memory_mb"] - last_memory["process_memory_mb"],
}
# GPU内存差异
gpu_diff = []
if current_memory["gpu_memory"] and last_memory["gpu_memory"]:
for i in range(min(len(current_memory["gpu_memory"]), len(last_memory["gpu_memory"]))):
current_gpu = current_memory["gpu_memory"][i]["used_mb"]
last_gpu = last_memory["gpu_memory"][i]["used_mb"]
gpu_diff.append({
"gpu_id": i,
"used_mb_diff": current_gpu - last_gpu
})
memory_diff["gpu_memory"] = gpu_diff
else:
memory_diff = None
checkpoint = {
"name": name,
"memory": current_memory,
"memory_diff": memory_diff
}
self.checkpoints.append(checkpoint)
return checkpoint
def check_memory_risk(self):
"""检查内存风险等级"""
current = self.get_memory_info()
# 系统内存风险
sys_risk = ""
if current["system_memory_percent"] > 90:
sys_risk = ""
elif current["system_memory_percent"] > 80:
sys_risk = ""
# GPU内存风险
gpu_risk = ""
if current["gpu_memory"]:
max_gpu_usage = max(gpu["usage_percent"] for gpu in current["gpu_memory"])
if max_gpu_usage > 95:
gpu_risk = ""
elif max_gpu_usage > 85:
gpu_risk = ""
return {
"system_risk": sys_risk,
"gpu_risk": gpu_risk,
"current_memory": current
}
def print_memory_status(self, title=""):
"""打印当前内存状态"""
current = self.get_memory_info()
risk = self.check_memory_risk()
print(f"\n{'='*50}")
print(f"🔍 {title if title else '内存状态检查'}")
print(f"{'='*50}")
# 系统内存
risk_icon = {"": "", "": "⚠️", "": "🚨"}[risk["system_risk"]]
print(f"💾 系统内存: {current['system_memory_gb']:.1f} GB ({current['system_memory_percent']:.1f}%) {risk_icon}")
# GPU内存
if current["gpu_memory"]:
risk_icon = {"": "", "": "⚠️", "": "🚨"}[risk["gpu_risk"]]
for gpu in current["gpu_memory"]:
print(f"🎮 GPU {gpu['gpu_id']}: {gpu['used_mb']:.0f}/{gpu['total_mb']:.0f} MB ({gpu['usage_percent']:.1f}%) {risk_icon}")
# 进程内存
print(f"🔧 当前进程: {current['process_memory_mb']:.1f} MB")
return risk
def analyze_file_sizes(video_path, audio_path=None, txt_content=""):
"""分析文件大小和预估内存占用"""
print(f"\n{'='*50}")
print("📊 文件大小分析")
print(f"{'='*50}")
total_estimated_mb = 0
warnings = []
# 视频文件分析
if os.path.exists(video_path):
video_size = os.path.getsize(video_path)
video_size_mb = video_size / 1024 / 1024
base64_size_mb = video_size_mb * 1.33 # Base64编码增加约33%
memory_estimate_mb = base64_size_mb * 2 # 编码过程需要双倍内存
print(f"🎥 视频文件: {os.path.basename(video_path)}")
print(f" 原始大小: {video_size_mb:.2f} MB")
print(f" Base64后: {base64_size_mb:.2f} MB")
print(f" 内存估算: {memory_estimate_mb:.2f} MB")
total_estimated_mb += memory_estimate_mb
if base64_size_mb > 100:
warnings.append("视频文件过大(>100MB Base64)")
elif base64_size_mb > 50:
warnings.append("视频文件较大(>50MB Base64)")
# 音频文件分析
if audio_path and os.path.exists(audio_path):
audio_size = os.path.getsize(audio_path)
audio_size_mb = audio_size / 1024 / 1024
base64_size_mb = audio_size_mb * 1.33
memory_estimate_mb = base64_size_mb * 2
print(f"\n🎵 音频文件: {os.path.basename(audio_path)}")
print(f" 原始大小: {audio_size_mb:.2f} MB")
print(f" Base64后: {base64_size_mb:.2f} MB")
print(f" 内存估算: {memory_estimate_mb:.2f} MB")
total_estimated_mb += memory_estimate_mb
if base64_size_mb > 50:
warnings.append("音频文件过大(>50MB Base64)")
# 文本内容分析
if txt_content:
text_size_mb = len(txt_content.encode('utf-8')) / 1024 / 1024
print(f"\n📝 文本内容: {len(txt_content)} 字符 ({text_size_mb:.3f} MB)")
total_estimated_mb += text_size_mb
if len(txt_content) > 50000:
warnings.append("文本内容过长(>50k字符)")
print(f"\n📋 总估算内存: {total_estimated_mb:.2f} MB")
# 风险评估
if total_estimated_mb > 500:
print("🚨 高风险: 内容过大,强烈建议压缩或分段处理")
warnings.append("总内存占用过高(>500MB)")
elif total_estimated_mb > 200:
print("⚠️ 中风险: 建议监控内存使用")
warnings.append("总内存占用较高(>200MB)")
else:
print("✅ 低风险: 内存占用在可接受范围内")
return total_estimated_mb, warnings
# Base64 编码格式
def encode_video(video_path):
with open(video_path, "rb") as video_file:
return base64.b64encode(video_file.read()).decode("utf-8")
def encode_audio(audio_path):
with open(audio_path, "rb") as audio_file:
return base64.b64encode(audio_file.read()).decode("utf-8")
def read_txt_file(txt_path):
"""读取txt文件内容"""
try:
with open(txt_path, 'r', encoding='utf-8') as file:
content = file.read()
print(f"成功读取txt文件: {txt_path}")
print(f"文件内容长度: {len(content)} 字符")
return content
except FileNotFoundError:
print(f"错误: 找不到文件 {txt_path}")
return ""
except Exception as e:
print(f"读取文件时出错: {e}")
return ""
def save_result_to_txt(response_text, video_path, save_dir="results"):
"""将分析结果保存为TXT文件"""
os.makedirs(save_dir, exist_ok=True)
video_name = os.path.splitext(os.path.basename(video_path))[0]
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
txt_filename = f"{video_name}_analysis_{timestamp}.txt"
txt_path = os.path.join(save_dir, txt_filename)
content = f"""视频分析结果
=====================================
视频文件: {video_path}
分析时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
=====================================
{response_text}
"""
try:
with open(txt_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"\n✅ 分析结果已保存到: {txt_path}")
return txt_path
except Exception as e:
print(f"\n❌ 保存TXT文件失败: {e}")
return None
# 初始化内存监控器
monitor = MemoryMonitor()
STREAM_MODE = True
# 文件路径配置
video_path = "/root/autodl-tmp/video2audio/sample_demo_6.mp4"
audio_path = "/root/autodl-tmp/video2audio/sample_demo_6.wav"
#txt_path = "/root/autodl-tmp/hot_video_analyse/source/example_reference.txt"
# 初始内存检查
monitor.checkpoint("程序启动")
monitor.print_memory_status("程序启动时内存状态")
# 分析文件大小和预估内存占用
txt_content = ""
estimated_memory, warnings = analyze_file_sizes(video_path, txt_content=txt_content)
# 如果有警告,询问是否继续
if warnings:
print(f"\n⚠️ 发现以下潜在问题:")
for warning in warnings:
print(f" - {warning}")
print(f"\n建议:")
print(f" - 使用更小的测试文件")
print(f" - 监控内存使用情况")
print(f" - 如遇到错误,尝试压缩文件")
# 编码前内存检查
monitor.checkpoint("开始编码前")
risk_before = monitor.check_memory_risk()
if risk_before["system_risk"] == "" or risk_before["gpu_risk"] == "":
print(f"\n🚨 警告: 当前内存使用率已经很高,继续可能导致内存溢出!")
print(f" 系统内存风险: {risk_before['system_risk']}")
print(f" GPU内存风险: {risk_before['gpu_risk']}")
print("\n开始编码文件...")
encode_start_time = time.time()
try:
base64_video = encode_video(video_path)
print(f"✅ 视频编码完成")
except Exception as e:
print(f"❌ 视频编码失败: {e}")
monitor.print_memory_status("编码失败时内存状态")
exit(1)
base64_audio = encode_audio(audio_path)
# 编码后内存检查
monitor.checkpoint("编码完成")
encode_end_time = time.time()
encode_duration = encode_end_time - encode_start_time
print(f"📁 文件编码完成,耗时: {encode_duration:.2f}")
# 检查编码后内存变化
last_checkpoint = monitor.checkpoints[-1]
if last_checkpoint["memory_diff"]:
diff = last_checkpoint["memory_diff"]
print(f"📊 编码过程内存变化:")
print(f" 进程内存增加: {diff['process_memory_mb']:+.1f} MB")
if diff["gpu_memory"]:
for gpu_diff in diff["gpu_memory"]:
print(f" GPU {gpu_diff['gpu_id']} 内存变化: {gpu_diff['used_mb_diff']:+.0f} MB")
client = OpenAI(
api_key="EMPTY",
base_url="http://localhost:8000/v1",
)
# 构建content列表
content_list = [
{
"type": "video_url",
"video_url": {"url": f"data:video/mp4;base64,{base64_video}"},
"type": "audio_url",
"audio_url": {"url": f"data:audio/wav;base64,{base64_audio}"},
}
]
# 如果txt文件有内容添加到content中
if txt_content.strip():
content_list.append({
"type": "text",
"text": f"参考文档内容:\n{txt_content}\n\n"
})
# 添加主要提示文本(简化版以减少内存使用)
content_list.append({
"type": "text",
"text": """请分析这个抖音短视频的内容:
1. **口播内容**:转录视频中的语音内容
2. **字幕文字**:识别画面中的文字和字幕
3. **勾子分析**:分析视频的开头勾子策略
请用JSON格式输出结果
{
"口播分析": {"是否有口播": "", "口播内容": "", "讲话时长": ""},
"字幕分析": {"是否有字幕": "", "字幕内容": "", "字幕位置": ""},
"勾子分析": {"勾子类型": "", "勾子公式": "", "勾子内容": ""}
}"""
})
# API请求前内存检查
monitor.checkpoint("API请求前")
monitor.print_memory_status("API请求前内存状态")
print(f"\n🚀 开始请求API...")
print(f"📅 请求时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"🔄 Stream模式: {STREAM_MODE}")
print(f"📋 Content项目数量: {len(content_list)}")
# 计算请求大小
total_request_size = sum(len(str(content)) for content in content_list)
print(f"📏 请求总大小: {total_request_size/1024/1024:.2f} MB")
api_start_time = time.time()
try:
completion = client.chat.completions.create(
model="/root/autodl-tmp/llm/Qwen-omni",
messages=[
{
"role": "system",
"content": [{"type":"text","text": "You are a helpful assistant."}]
},
{
"role": "user",
"content": content_list
}
],
stream=STREAM_MODE,
stream_options={"include_usage": True} if STREAM_MODE else None,
max_tokens=1024, # 限制输出长度以节省内存
)
if STREAM_MODE:
full_response = ""
usage_info = None
first_token_time = None
token_count = 0
print("✨ 正在生成回复...")
for chunk in completion:
if chunk.choices:
delta = chunk.choices[0].delta
if delta.content:
if first_token_time is None:
first_token_time = time.time()
first_token_delay = first_token_time - api_start_time
print(f"🚀 首个token延迟: {first_token_delay:.2f}")
full_response += delta.content
token_count += 1
else:
usage_info = chunk.usage
api_end_time = time.time()
total_duration = api_end_time - api_start_time
print("\n" + "="*50)
print("📝 完整回复:")
print("="*50)
print(full_response)
# 保存结果为TXT文件
txt_file_path = save_result_to_txt(full_response, video_path)
# API完成后内存检查
monitor.checkpoint("API完成")
# 输出时间统计信息
print("\n" + "="*50)
print("⏱️ 时间统计:")
print("="*50)
print(f"📁 文件编码时间: {encode_duration:.2f}")
if first_token_time:
print(f"🚀 首个token延迟: {first_token_delay:.2f}")
generation_time = api_end_time - first_token_time
print(f"⚡ 内容生成时间: {generation_time:.2f}")
print(f"🕐 API总响应时间: {total_duration:.2f}")
print(f"📊 生成token数量: {token_count}")
if first_token_time and token_count > 0:
tokens_per_second = token_count / generation_time
print(f"🔥 生成速度: {tokens_per_second:.2f} tokens/秒")
print(f"⏰ 完成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
if usage_info:
print(f"\n📈 使用情况: {usage_info}")
except Exception as e:
print(f"\n❌ API请求失败!")
print(f"错误类型: {type(e)}")
print(f"错误信息: {e}")
# 错误时进行内存检查
monitor.checkpoint("API错误")
monitor.print_memory_status("API错误时内存状态")
# 分析可能的原因
if "Internal Server Error" in str(e) or "OutOfMemoryError" in str(e):
print(f"\n💡 可能的内存溢出原因:")
print(f" - 视频文件过大 ({estimated_memory:.1f} MB)")
print(f" - GPU内存不足")
print(f" - 系统内存不足")
print(f"\n建议解决方案:")
print(f" - 使用更小的视频文件")
print(f" - 重启vLLM服务释放GPU内存")
print(f" - 降低max_tokens限制")
# 最终内存状态报告
print(f"\n{'='*60}")
print("📊 最终内存使用报告")
print(f"{'='*60}")
for i, checkpoint in enumerate(monitor.checkpoints):
print(f"{i+1}. {checkpoint['name']}")
if checkpoint['memory_diff']:
diff = checkpoint['memory_diff']
if abs(diff['process_memory_mb']) > 10: # 只显示显著变化
print(f" 进程内存变化: {diff['process_memory_mb']:+.1f} MB")
if diff['gpu_memory']:
for gpu_diff in diff['gpu_memory']:
if abs(gpu_diff['used_mb_diff']) > 50: # 只显示显著变化
print(f" GPU {gpu_diff['gpu_id']} 变化: {gpu_diff['used_mb_diff']:+.0f} MB")
monitor.print_memory_status("程序结束时内存状态")