137 lines
4.2 KiB
Python
137 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
简化OCR测试脚本
|
||
"""
|
||
|
||
import cv2
|
||
import json
|
||
from pathlib import Path
|
||
|
||
def test_paddleocr():
|
||
"""测试PaddleOCR基本功能"""
|
||
print("🔧 测试PaddleOCR...")
|
||
|
||
try:
|
||
from paddleocr import PaddleOCR
|
||
|
||
# 初始化OCR
|
||
ocr = PaddleOCR(
|
||
use_textline_orientation=True,
|
||
lang='ch'
|
||
)
|
||
print("✅ PaddleOCR初始化成功")
|
||
|
||
# 测试视频文件
|
||
video_path = "/root/autodl-tmp/hot_video_analyse/video2audio/sample_demo_6.mp4"
|
||
|
||
# 读取一帧进行测试
|
||
cap = cv2.VideoCapture(video_path)
|
||
ret, frame = cap.read()
|
||
cap.release()
|
||
|
||
if not ret:
|
||
print("❌ 无法读取视频帧")
|
||
return False
|
||
|
||
print("📸 成功读取视频帧")
|
||
|
||
# 进行OCR识别 - 使用predict方法
|
||
try:
|
||
result = ocr.predict(frame)
|
||
except:
|
||
# 如果predict不工作,尝试ocr方法
|
||
result = ocr.ocr(frame)
|
||
|
||
print(f"🔍 OCR结果类型: {type(result)}")
|
||
|
||
# 处理结果
|
||
if result:
|
||
if isinstance(result, list) and len(result) > 0:
|
||
detections = result[0] if isinstance(result[0], list) else result
|
||
print(f"🔍 OCR结果: {len(detections)} 个检测")
|
||
|
||
for i, detection in enumerate(detections[:3]): # 只显示前3个
|
||
if detection and len(detection) >= 2:
|
||
try:
|
||
bbox, text_info = detection
|
||
if isinstance(text_info, tuple) and len(text_info) >= 2:
|
||
text, confidence = text_info
|
||
print(f" {i+1}. 文本: '{text}' 置信度: {confidence:.3f}")
|
||
else:
|
||
print(f" {i+1}. 检测到文本但格式异常: {text_info}")
|
||
except Exception as e:
|
||
print(f" {i+1}. 处理检测结果时出错: {e}")
|
||
else:
|
||
print("🔍 OCR结果为空或格式异常")
|
||
else:
|
||
print("🔍 OCR结果为None")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"❌ PaddleOCR测试失败: {e}")
|
||
import traceback
|
||
print(f"详细错误: {traceback.format_exc()}")
|
||
return False
|
||
|
||
def test_easyocr():
|
||
"""测试EasyOCR基本功能"""
|
||
print("\n🔧 测试EasyOCR...")
|
||
|
||
try:
|
||
import easyocr
|
||
|
||
# 初始化OCR
|
||
reader = easyocr.Reader(['ch_sim', 'en'])
|
||
print("✅ EasyOCR初始化成功")
|
||
|
||
# 测试视频文件
|
||
video_path = "/root/autodl-tmp/hot_video_analyse/video2audio/sample_demo_6.mp4"
|
||
|
||
# 读取一帧进行测试
|
||
cap = cv2.VideoCapture(video_path)
|
||
ret, frame = cap.read()
|
||
cap.release()
|
||
|
||
if not ret:
|
||
print("❌ 无法读取视频帧")
|
||
return
|
||
|
||
print("📸 成功读取视频帧")
|
||
|
||
# 进行OCR识别
|
||
result = reader.readtext(frame)
|
||
|
||
print(f"🔍 OCR结果: {len(result)} 个检测")
|
||
|
||
for i, detection in enumerate(result[:3]): # 只显示前3个
|
||
bbox, text, confidence = detection
|
||
print(f" {i+1}. 文本: '{text}' 置信度: {confidence:.3f}")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"❌ EasyOCR测试失败: {e}")
|
||
return False
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("🧪 简化OCR功能测试")
|
||
print("=" * 40)
|
||
|
||
paddle_ok = test_paddleocr()
|
||
easy_ok = test_easyocr()
|
||
|
||
print("\n" + "=" * 40)
|
||
print("📊 测试结果:")
|
||
print(f" PaddleOCR: {'✅ 成功' if paddle_ok else '❌ 失败'}")
|
||
print(f" EasyOCR: {'✅ 成功' if easy_ok else '❌ 失败'}")
|
||
|
||
if paddle_ok or easy_ok:
|
||
print("\n🎉 至少有一个OCR引擎工作正常,可以继续使用!")
|
||
else:
|
||
print("\n⚠️ 所有OCR引擎都有问题,请检查安装")
|
||
|
||
if __name__ == "__main__":
|
||
main() |