hot_video_analyse/code/token_counter.py

127 lines
4.9 KiB
Python
Raw Permalink Normal View History

import tiktoken
import os
import cv2
def count_tokens(text, model="gpt-4"):
"""统计文本的token数量"""
try:
encoding = tiktoken.encoding_for_model(model)
tokens = encoding.encode(text)
return len(tokens)
except Exception as e:
print(f"Token统计出错: {e}")
# 简单估算中文字符约1.5个token英文单词约1.3个token
chinese_chars = sum(1 for char in text if '\u4e00' <= char <= '\u9fff')
english_words = len([word for word in text.split() if word.isascii()])
estimated_tokens = int(chinese_chars * 1.5 + english_words * 1.3)
return estimated_tokens
def get_video_token_estimate(video_path):
"""估算视频的token数量基于文件大小和时长"""
try:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
return {'estimated_tokens': 0, 'duration': 0, 'frame_count': 0, 'fps': 0, 'file_size_mb': 0, 'frames_used': 0}
# 获取视频信息
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = frame_count / fps if fps > 0 else 0
# 获取文件大小
file_size = os.path.getsize(video_path)
cap.release()
# 基于GPT-4V的token估算规则
# 视频token = 基础token + 帧数 * 每帧token
base_tokens = 85 # 基础token
frames_per_second = min(fps, 1) # 每秒最多1帧
total_frames = min(frame_count, int(duration * frames_per_second))
tokens_per_frame = 170 # 每帧约170个token
estimated_tokens = base_tokens + total_frames * tokens_per_frame
return {
'estimated_tokens': int(estimated_tokens),
'duration': duration,
'frame_count': frame_count,
'fps': fps,
'file_size_mb': file_size / (1024 * 1024),
'frames_used': total_frames
}
except Exception as e:
print(f"视频token估算出错: {e}")
return {'estimated_tokens': 0, 'duration': 0, 'frame_count': 0, 'fps': 0, 'file_size_mb': 0, 'frames_used': 0}
def analyze_input_tokens(video_path, text_content="", prompt_text=""):
"""分析输入token统计"""
print("\n" + "="*50)
print("📊 Token统计信息:")
print("="*50)
# 统计视频token
video_token_info = get_video_token_estimate(video_path)
print(f"🎬 视频Token统计:")
print(f" 估算Token数量: {video_token_info['estimated_tokens']:,}")
print(f" 视频时长: {video_token_info['duration']:.2f}")
print(f" 总帧数: {video_token_info['frame_count']:,}")
print(f" 帧率: {video_token_info['fps']:.2f} fps")
print(f" 文件大小: {video_token_info['file_size_mb']:.2f} MB")
print(f" 使用帧数: {video_token_info['frames_used']:,}")
# 统计文本token
text_tokens = 0
if text_content.strip():
text_tokens = count_tokens(text_content)
print(f"\n📝 文本Token统计:")
print(f" 文本内容Token: {text_tokens:,}")
print(f" 文本字符数: {len(text_content):,}")
# 统计提示词token
prompt_tokens = 0
if prompt_text.strip():
prompt_tokens = count_tokens(prompt_text)
print(f" 提示词Token: {prompt_tokens:,}")
video_cost = 0.0015
text_cost = 0.0004
total_cost = (video_token_info['estimated_tokens']*video_cost + text_tokens*text_cost + prompt_tokens*text_cost)/1000
# 计算总输入token
total_input_tokens = (video_token_info['estimated_tokens'] + text_tokens + prompt_tokens)
print(f"\n📈 总输入Token统计:")
print(f" 视频Token: {video_token_info['estimated_tokens']:,}")
print(f" 文本Token: {text_tokens:,}")
print(f" 提示词Token: {prompt_tokens:,}")
print(f" 🔥 总输入Token: {total_input_tokens:,}")
print(f" 💰 总费用: {total_cost:.4f}")
print("="*50)
return {
'video_tokens': video_token_info['estimated_tokens'],
'text_tokens': text_tokens,
'prompt_tokens': prompt_tokens,
'total_input_tokens': total_input_tokens,
'video_info': video_token_info,
'total_cost': total_cost
}
if __name__ == "__main__":
# 测试token统计功能
test_video = "/root/autodl-tmp/new/哈尔滨.mp4"
test_text = "这是一个测试文本包含中英文内容。This is a test text with Chinese and English content."
test_prompt = "请分析这个视频的内容。"
result = analyze_input_tokens(test_video, test_text, test_prompt)
print(f"\n测试结果: {result}")
# video_token = result['video_tokens']
# video_cost = 0.0015
# prompt_token = result['prompt_tokens']
# text_token = result['text_tokens']
# text_cost = 0.0004
# total_cost = video_token*video_cost + prompt_token*text_cost + text_token*text_cost
# print(total_cost)