113 lines
4.3 KiB
Python
113 lines
4.3 KiB
Python
import tiktoken
|
||
import os
|
||
import cv2
|
||
|
||
def count_tokens(text, model="gpt-4"):
|
||
"""统计文本的token数量"""
|
||
try:
|
||
encoding = tiktoken.encoding_for_model(model)
|
||
tokens = encoding.encode(text)
|
||
return len(tokens)
|
||
except Exception as e:
|
||
print(f"Token统计出错: {e}")
|
||
# 简单估算:中文字符约1.5个token,英文单词约1.3个token
|
||
chinese_chars = sum(1 for char in text if '\u4e00' <= char <= '\u9fff')
|
||
english_words = len([word for word in text.split() if word.isascii()])
|
||
estimated_tokens = int(chinese_chars * 1.5 + english_words * 1.3)
|
||
return estimated_tokens
|
||
|
||
def get_video_token_estimate(video_path):
|
||
"""估算视频的token数量(基于文件大小和时长)"""
|
||
try:
|
||
cap = cv2.VideoCapture(video_path)
|
||
if not cap.isOpened():
|
||
return {'estimated_tokens': 0, 'duration': 0, 'frame_count': 0, 'fps': 0, 'file_size_mb': 0, 'frames_used': 0}
|
||
|
||
# 获取视频信息
|
||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||
duration = frame_count / fps if fps > 0 else 0
|
||
|
||
# 获取文件大小
|
||
file_size = os.path.getsize(video_path)
|
||
|
||
cap.release()
|
||
|
||
# 基于GPT-4V的token估算规则
|
||
# 视频token = 基础token + 帧数 * 每帧token
|
||
base_tokens = 85 # 基础token
|
||
frames_per_second = min(fps, 1) # 每秒最多1帧
|
||
total_frames = min(frame_count, int(duration * frames_per_second))
|
||
tokens_per_frame = 170 # 每帧约170个token
|
||
|
||
estimated_tokens = base_tokens + total_frames * tokens_per_frame
|
||
|
||
return {
|
||
'estimated_tokens': int(estimated_tokens),
|
||
'duration': duration,
|
||
'frame_count': frame_count,
|
||
'fps': fps,
|
||
'file_size_mb': file_size / (1024 * 1024),
|
||
'frames_used': total_frames
|
||
}
|
||
except Exception as e:
|
||
print(f"视频token估算出错: {e}")
|
||
return {'estimated_tokens': 0, 'duration': 0, 'frame_count': 0, 'fps': 0, 'file_size_mb': 0, 'frames_used': 0}
|
||
|
||
def analyze_input_tokens(video_path, text_content="", prompt_text=""):
|
||
"""分析输入token统计"""
|
||
print("\n" + "="*50)
|
||
print("📊 Token统计信息:")
|
||
print("="*50)
|
||
|
||
# 统计视频token
|
||
video_token_info = get_video_token_estimate(video_path)
|
||
print(f"🎬 视频Token统计:")
|
||
print(f" 估算Token数量: {video_token_info['estimated_tokens']:,}")
|
||
print(f" 视频时长: {video_token_info['duration']:.2f}秒")
|
||
print(f" 总帧数: {video_token_info['frame_count']:,}")
|
||
print(f" 帧率: {video_token_info['fps']:.2f} fps")
|
||
print(f" 文件大小: {video_token_info['file_size_mb']:.2f} MB")
|
||
print(f" 使用帧数: {video_token_info['frames_used']:,}")
|
||
|
||
# 统计文本token
|
||
text_tokens = 0
|
||
if text_content.strip():
|
||
text_tokens = count_tokens(text_content)
|
||
print(f"\n📝 文本Token统计:")
|
||
print(f" 文本内容Token: {text_tokens:,}")
|
||
print(f" 文本字符数: {len(text_content):,}")
|
||
|
||
# 统计提示词token
|
||
prompt_tokens = 0
|
||
if prompt_text.strip():
|
||
prompt_tokens = count_tokens(prompt_text)
|
||
print(f" 提示词Token: {prompt_tokens:,}")
|
||
|
||
# 计算总输入token
|
||
total_input_tokens = video_token_info['estimated_tokens'] + text_tokens + prompt_tokens
|
||
print(f"\n📈 总输入Token统计:")
|
||
print(f" 视频Token: {video_token_info['estimated_tokens']:,}")
|
||
print(f" 文本Token: {text_tokens:,}")
|
||
print(f" 提示词Token: {prompt_tokens:,}")
|
||
print(f" 🔥 总输入Token: {total_input_tokens:,}")
|
||
|
||
print("="*50)
|
||
|
||
return {
|
||
'video_tokens': video_token_info['estimated_tokens'],
|
||
'text_tokens': text_tokens,
|
||
'prompt_tokens': prompt_tokens,
|
||
'total_input_tokens': total_input_tokens,
|
||
'video_info': video_token_info
|
||
}
|
||
|
||
if __name__ == "__main__":
|
||
# 测试token统计功能
|
||
test_video = "/root/autodl-tmp/new/哈尔滨.mp4"
|
||
test_text = "这是一个测试文本,包含中英文内容。This is a test text with Chinese and English content."
|
||
test_prompt = "请分析这个视频的内容。"
|
||
|
||
result = analyze_input_tokens(test_video, test_text, test_prompt)
|
||
print(f"\n测试结果: {result}")
|