import tiktoken import os import cv2 def count_tokens(text, model="gpt-4"): """统计文本的token数量""" try: encoding = tiktoken.encoding_for_model(model) tokens = encoding.encode(text) return len(tokens) except Exception as e: print(f"Token统计出错: {e}") # 简单估算:中文字符约1.5个token,英文单词约1.3个token chinese_chars = sum(1 for char in text if '\u4e00' <= char <= '\u9fff') english_words = len([word for word in text.split() if word.isascii()]) estimated_tokens = int(chinese_chars * 1.5 + english_words * 1.3) return estimated_tokens def get_video_token_estimate(video_path): """估算视频的token数量(基于文件大小和时长)""" try: cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return {'estimated_tokens': 0, 'duration': 0, 'frame_count': 0, 'fps': 0, 'file_size_mb': 0, 'frames_used': 0} # 获取视频信息 fps = cap.get(cv2.CAP_PROP_FPS) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) duration = frame_count / fps if fps > 0 else 0 # 获取文件大小 file_size = os.path.getsize(video_path) cap.release() # 基于GPT-4V的token估算规则 # 视频token = 基础token + 帧数 * 每帧token base_tokens = 85 # 基础token frames_per_second = min(fps, 1) # 每秒最多1帧 total_frames = min(frame_count, int(duration * frames_per_second)) tokens_per_frame = 170 # 每帧约170个token estimated_tokens = base_tokens + total_frames * tokens_per_frame return { 'estimated_tokens': int(estimated_tokens), 'duration': duration, 'frame_count': frame_count, 'fps': fps, 'file_size_mb': file_size / (1024 * 1024), 'frames_used': total_frames } except Exception as e: print(f"视频token估算出错: {e}") return {'estimated_tokens': 0, 'duration': 0, 'frame_count': 0, 'fps': 0, 'file_size_mb': 0, 'frames_used': 0} def analyze_input_tokens(video_path, text_content="", prompt_text=""): """分析输入token统计""" print("\n" + "="*50) print("📊 Token统计信息:") print("="*50) # 统计视频token video_token_info = get_video_token_estimate(video_path) print(f"🎬 视频Token统计:") print(f" 估算Token数量: {video_token_info['estimated_tokens']:,}") print(f" 视频时长: {video_token_info['duration']:.2f}秒") print(f" 总帧数: {video_token_info['frame_count']:,}") print(f" 帧率: {video_token_info['fps']:.2f} fps") print(f" 文件大小: {video_token_info['file_size_mb']:.2f} MB") print(f" 使用帧数: {video_token_info['frames_used']:,}") # 统计文本token text_tokens = 0 if text_content.strip(): text_tokens = count_tokens(text_content) print(f"\n📝 文本Token统计:") print(f" 文本内容Token: {text_tokens:,}") print(f" 文本字符数: {len(text_content):,}") # 统计提示词token prompt_tokens = 0 if prompt_text.strip(): prompt_tokens = count_tokens(prompt_text) print(f" 提示词Token: {prompt_tokens:,}") video_cost = 0.0015 text_cost = 0.0004 total_cost = (video_token_info['estimated_tokens']*video_cost + text_tokens*text_cost + prompt_tokens*text_cost)/1000 # 计算总输入token total_input_tokens = (video_token_info['estimated_tokens'] + text_tokens + prompt_tokens) print(f"\n📈 总输入Token统计:") print(f" 视频Token: {video_token_info['estimated_tokens']:,}") print(f" 文本Token: {text_tokens:,}") print(f" 提示词Token: {prompt_tokens:,}") print(f" 🔥 总输入Token: {total_input_tokens:,}") print(f" 💰 总费用: {total_cost:.4f}元") print("="*50) return { 'video_tokens': video_token_info['estimated_tokens'], 'text_tokens': text_tokens, 'prompt_tokens': prompt_tokens, 'total_input_tokens': total_input_tokens, 'video_info': video_token_info, 'total_cost': total_cost } if __name__ == "__main__": # 测试token统计功能 test_video = "/root/autodl-tmp/new/哈尔滨.mp4" test_text = "这是一个测试文本,包含中英文内容。This is a test text with Chinese and English content." test_prompt = "请分析这个视频的内容。" result = analyze_input_tokens(test_video, test_text, test_prompt) print(f"\n测试结果: {result}") # video_token = result['video_tokens'] # video_cost = 0.0015 # prompt_token = result['prompt_tokens'] # text_token = result['text_tokens'] # text_cost = 0.0004 # total_cost = video_token*video_cost + prompt_token*text_cost + text_token*text_cost # print(total_cost)