autoUpload/test_typing.py

443 lines
17 KiB
Python
Raw Normal View History

import asyncio
import random
import time
import numpy as np
from playwright.async_api import async_playwright
from utils.paste_typing import PasteTypingSimulator
from utils.human_typing_wrapper import HumanTypingWrapper
from utils.human_like import HumanLikeTyper
class BotDetector:
"""检测输入行为是否像机器人"""
@staticmethod
def analyze_typing_pattern(timestamps, actions):
"""分析输入模式
Args:
timestamps: 时间戳列表
actions: 动作列表键盘输入鼠标移动等
Returns:
dict: 分析结果
"""
if not timestamps or len(timestamps) < 2:
return {"risk_score": 0, "reasons": ["样本太少,无法分析"]}
# 计算时间间隔
intervals = np.diff(timestamps)
# 1. 检查时间间隔的一致性
interval_std = np.std(intervals)
interval_mean = np.mean(intervals)
interval_cv = interval_std / interval_mean if interval_mean > 0 else 0
# 2. 检查是否有不自然的快速输入
has_super_fast = any(i < 0.01 for i in intervals) # 小于10ms的间隔
# 3. 检查连续动作的规律性
action_patterns = []
for i in range(len(actions)-2):
pattern = f"{actions[i]}-{actions[i+1]}-{actions[i+2]}"
action_patterns.append(pattern)
unique_patterns = len(set(action_patterns)) / len(action_patterns) if action_patterns else 0
# 评分系统
risk_score = 0
reasons = []
# 评估时间间隔变异系数CV
# 人类输入的CV通常在0.2到0.8之间
if interval_cv < 0.2:
risk_score += 30
reasons.append("时间间隔过于规律")
elif interval_cv > 0.8:
risk_score += 10
reasons.append("时间间隔过于混乱")
# 评估超快速输入
if has_super_fast:
risk_score += 25
reasons.append("存在不自然的快速输入")
# 评估动作模式的多样性
# 人类的动作模式通常更加多样
if unique_patterns < 0.4:
risk_score += 20
reasons.append("动作模式过于单一")
# 评估整体速度
if interval_mean < 0.05: # 平均间隔小于50ms
risk_score += 25
reasons.append("整体输入速度过快")
return {
"risk_score": min(risk_score, 100),
"reasons": reasons,
"metrics": {
"interval_cv": interval_cv,
"interval_mean": interval_mean,
"unique_patterns": unique_patterns
}
}
@staticmethod
def format_detection_result(result):
"""格式化检测结果"""
risk_level = "" if result["risk_score"] < 30 else "" if result["risk_score"] < 70 else ""
output = [
f"机器人风险评分: {result['risk_score']}/100 (风险等级: {risk_level})",
"可疑特征:" if result["reasons"] else "未发现可疑特征"
]
for reason in result["reasons"]:
output.append(f"- {reason}")
if "metrics" in result:
output.extend([
"\n详细指标:",
f"- 时间间隔变异系数: {result['metrics']['interval_cv']:.3f}",
f"- 平均输入间隔: {result['metrics']['interval_mean']*1000:.1f}ms",
f"- 动作模式多样性: {result['metrics']['unique_patterns']:.3f}"
])
return "\n".join(output)
class InputRecorder:
"""记录输入行为"""
def __init__(self):
self.timestamps = []
self.actions = []
self.start_time = None
def start(self):
"""开始记录"""
self.start_time = time.time()
self.timestamps = []
self.actions = []
def record(self, action):
"""记录一个动作"""
self.timestamps.append(time.time() - self.start_time)
self.actions.append(action)
def get_records(self):
"""获取记录"""
return self.timestamps, self.actions
async def xiaohongshu_tag_input_from_examples(page, text: str, selector: str):
"""从examples中的upload_video_to_xiaohongshu.py提取的标签输入方法
但这里我们用它来输入正文文本模拟标签输入的风格
"""
element = await page.wait_for_selector(selector)
await element.click()
await asyncio.sleep(0.5)
await page.keyboard.press("Control+A")
await page.keyboard.press("Delete")
await asyncio.sleep(0.3)
# 将文本按段落分割,并模拟标签输入风格的正文输入
paragraphs = text.split('\n\n')
for i, paragraph in enumerate(paragraphs[:3]): # 只取前3段避免过长
if not paragraph.strip():
continue
for char in paragraph:
# 标签输入风格:较慢的速度,更谨慎
await page.keyboard.type(char, delay=random.randint(200, 400))
await asyncio.sleep(random.uniform(0.1, 0.2))
# 随机停顿,模拟思考
if random.random() < 0.3:
await asyncio.sleep(random.uniform(0.5, 1.0))
# 段落间停顿
if i < len(paragraphs) - 1:
await asyncio.sleep(random.uniform(0.8, 1.2))
await page.keyboard.press("Enter")
await page.keyboard.press("Enter")
await asyncio.sleep(0.5)
async def test_typing_methods():
"""测试四种不同的输入方法"""
test_text = """这是一段测试文本,用来展示人类化输入效果。
这是第二段落包含一些标点符号
以及一些英文字符和数字 Hello World 123
最后一段用来测试段落之间的停顿效果
交通指南
7号线直达施园站步行5分钟入园
自驾导航北京环球城市大道停车场直通园区
建议早9点开园前排队热门项目可节省1小时等待
"""
async with async_playwright() as playwright:
# 启动浏览器,添加剪贴板权限
browser = await playwright.chromium.launch(headless=False) # 设置为False以便观察
context = await browser.new_context(permissions=['clipboard-read', 'clipboard-write'])
page = await context.new_page()
# 创建一个简单的HTML页面包含四个测试区域
await page.set_content('''
<!DOCTYPE html>
<html>
<head>
<title>输入方法测试对比</title>
<style>
body { padding: 20px; font-family: Arial, sans-serif; }
.container {
margin-bottom: 30px;
}
.input-box {
margin-bottom: 20px;
}
.textarea {
width: 100%;
padding: 10px;
border: 1px solid #ccc;
border-radius: 5px;
font-size: 16px;
line-height: 1.5;
margin-top: 10px;
height: 300px;
}
h2 { margin-bottom: 10px; color: #333; }
h3 { margin-bottom: 5px; color: #666; }
.description {
color: #666;
margin-bottom: 10px;
font-size: 14px;
}
hr {
margin: 20px 0;
border: 1px solid #eee;
}
</style>
</head>
<body>
<h1>输入方法测试评估</h1>
<div class="container">
<h2>1. 复制粘贴方法测试</h2>
<div class="input-box">
<h3>PasteTypingSimulator</h3>
<div class="description">使用PasteTypingSimulator类确保文本被复制到剪贴板后再粘贴</div>
<textarea id="paste-method" class="textarea" placeholder="复制粘贴方法测试区域..."></textarea>
</div>
</div>
<hr>
<div class="container">
<h2>2. 优化人类输入测试</h2>
<div class="input-box">
<h3>HumanTypingWrapper</h3>
<div class="description">使用HumanTypingWrapper类提供更高级的人类化输入功能</div>
<textarea id="human-wrapper-method" class="textarea" placeholder="优化人类输入测试区域..."></textarea>
</div>
</div>
<hr>
<div class="container">
<h2>3. 模拟人类输入文本测试</h2>
<div class="input-box">
<h3>HumanLikeTyper</h3>
<div class="description">使用HumanLikeTyper类模拟基本的人类输入行为</div>
<textarea id="human-like-method" class="textarea" placeholder="模拟人类输入测试区域..."></textarea>
</div>
</div>
<hr>
<div class="container">
<h2>4. 小红书标签输入风格测试</h2>
<div class="input-box">
<h3>Xiaohongshu Tag Style</h3>
<div class="description">从examples/upload_video_to_xiaohongshu.py提取的标签输入风格用于输入正文</div>
<textarea id="xhs-tag-method" class="textarea" placeholder="小红书标签输入风格测试区域..."></textarea>
</div>
</div>
</body>
</html>
''')
# 创建行为记录器和检测器
recorder = InputRecorder()
detector = BotDetector()
results = {}
print("\n=== 开始输入方法测试评估 ===\n")
# 1. 测试 PasteTypingSimulator
print("1. 测试 PasteTypingSimulator (复制粘贴方法)")
print("-" * 50)
recorder.start()
# 初始化 PasteTypingSimulator 并执行粘贴操作
paste_typer = PasteTypingSimulator(page)
recorder.record("prepare")
success = await paste_typer.paste_text("#paste-method", test_text)
recorder.record("paste_complete")
print(f"粘贴操作成功: {success}")
# 分析行为
timestamps, actions = recorder.get_records()
paste_result = detector.analyze_typing_pattern(timestamps, actions)
results["paste_typing"] = paste_result
print("行为分析结果:")
print(detector.format_detection_result(paste_result))
print("\n")
await asyncio.sleep(2)
# 2. 测试 HumanTypingWrapper
print("2. 测试 HumanTypingWrapper (优化人类输入)")
print("-" * 50)
recorder.start()
# 初始化 HumanTypingWrapper 并执行输入操作
human_wrapper = HumanTypingWrapper(page)
# 记录每个字符的输入动作
success = await human_wrapper.type_text_human("#human-wrapper-method", test_text)
# 为每个字符记录动作
for char in test_text:
recorder.record("type" if char.isalnum() else "special" if char in ",。!?、,.!?" else "space")
print(f"人类化输入成功: {success}")
# 分析行为
timestamps, actions = recorder.get_records()
wrapper_result = detector.analyze_typing_pattern(timestamps, actions)
results["human_wrapper"] = wrapper_result
print("行为分析结果:")
print(detector.format_detection_result(wrapper_result))
print("\n")
await asyncio.sleep(2)
# 3. 测试 HumanLikeTyper
print("3. 测试 HumanLikeTyper (模拟人类输入文本)")
print("-" * 50)
recorder.start()
# 注意HumanLikeTyper 是同步的,但我们在异步环境中运行
# 我们需要用同步方式处理,或者修改调用方式
# 这里我们使用异步方式模拟同步行为
try:
# 等待元素
await page.wait_for_selector("#human-like-method")
await page.click("#human-like-method")
await asyncio.sleep(0.3)
# 手动实现类似 HumanLikeTyper 的异步版本
current_input = ""
for char in test_text[:200]: # 限制字符数避免测试时间过长
# 随机停顿
if random.random() < 0.1:
await asyncio.sleep(random.uniform(0.5, 2.0))
# 随机错误修正
if current_input and random.random() < 0.08:
await page.keyboard.press("Backspace")
await asyncio.sleep(random.uniform(0.1, 0.3))
wrong_char = random.choice("abcdefghijklmnopqrstuvwxyz ")
await page.keyboard.type(wrong_char, delay=random.randint(50, 150))
await asyncio.sleep(random.uniform(0.1, 0.3))
await page.keyboard.press("Backspace")
await asyncio.sleep(random.uniform(0.1, 0.3))
# 计算延迟并输入字符
speed = random.uniform(3, 8)
delay = 1 / speed + random.uniform(-0.05, 0.1)
await page.keyboard.type(char, delay=delay * 1000)
# 记录动作
recorder.record("type" if char.isalnum() else "special" if char in ",。!?、,.!?" else "space")
current_input += char
await asyncio.sleep(random.uniform(0.01, 0.05))
success = True
except Exception as e:
print(f"HumanLikeTyper 测试出错: {e}")
success = False
print(f"模拟人类输入成功: {success}")
# 分析行为
timestamps, actions = recorder.get_records()
human_like_result = detector.analyze_typing_pattern(timestamps, actions)
results["human_like"] = human_like_result
print("行为分析结果:")
print(detector.format_detection_result(human_like_result))
print("\n")
await asyncio.sleep(2)
# 4. 测试小红书标签输入风格
print("4. 测试 小红书标签输入风格")
print("-" * 50)
recorder.start()
# 使用从examples中提取的标签输入方法
await xiaohongshu_tag_input_from_examples(page, test_text, "#xhs-tag-method")
# 为每个字符记录动作
for char in test_text:
recorder.record("xhs_tag_type" if char.isalnum() else "xhs_tag_special" if char in ",。!?、,.!?" else "xhs_tag_space")
print("小红书标签输入风格测试完成")
# 分析行为
timestamps, actions = recorder.get_records()
xhs_tag_result = detector.analyze_typing_pattern(timestamps, actions)
results["xhs_tag"] = xhs_tag_result
print("行为分析结果:")
print(detector.format_detection_result(xhs_tag_result))
print("\n")
await asyncio.sleep(2)
# 总结所有方法的测试结果
print("=== 输入方法测试评估总结 ===\n")
method_names = {
"paste_typing": "1. PasteTypingSimulator (复制粘贴)",
"human_wrapper": "2. HumanTypingWrapper (优化人类输入)",
"human_like": "3. HumanLikeTyper (模拟人类输入)",
"xhs_tag": "4. 小红书标签输入风格"
}
for method_key, method_name in method_names.items():
if method_key in results:
result = results[method_key]
risk_level = "" if result["risk_score"] < 30 else "" if result["risk_score"] < 70 else ""
print(f"{method_name}: 风险评分 {result['risk_score']}/100 (风险等级: {risk_level})")
if result["reasons"]:
print(" 可疑特征:")
for reason in result["reasons"]:
print(f" - {reason}")
else:
print(" 未发现可疑特征")
print()
# 找出最佳表现的方法
best_method = min(results.items(), key=lambda x: x[1]["risk_score"])
print(f"最佳表现方法: {method_names[best_method[0]]} (风险评分: {best_method[1]['risk_score']}/100)")
# 关闭浏览器
await browser.close()
if __name__ == "__main__":
asyncio.run(test_typing_methods())