autoUpload/test_typing.py

443 lines
17 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import random
import time
import numpy as np
from playwright.async_api import async_playwright
from utils.paste_typing import PasteTypingSimulator
from utils.human_typing_wrapper import HumanTypingWrapper
from utils.human_like import HumanLikeTyper
class BotDetector:
"""检测输入行为是否像机器人"""
@staticmethod
def analyze_typing_pattern(timestamps, actions):
"""分析输入模式
Args:
timestamps: 时间戳列表
actions: 动作列表(键盘输入、鼠标移动等)
Returns:
dict: 分析结果
"""
if not timestamps or len(timestamps) < 2:
return {"risk_score": 0, "reasons": ["样本太少,无法分析"]}
# 计算时间间隔
intervals = np.diff(timestamps)
# 1. 检查时间间隔的一致性
interval_std = np.std(intervals)
interval_mean = np.mean(intervals)
interval_cv = interval_std / interval_mean if interval_mean > 0 else 0
# 2. 检查是否有不自然的快速输入
has_super_fast = any(i < 0.01 for i in intervals) # 小于10ms的间隔
# 3. 检查连续动作的规律性
action_patterns = []
for i in range(len(actions)-2):
pattern = f"{actions[i]}-{actions[i+1]}-{actions[i+2]}"
action_patterns.append(pattern)
unique_patterns = len(set(action_patterns)) / len(action_patterns) if action_patterns else 0
# 评分系统
risk_score = 0
reasons = []
# 评估时间间隔变异系数CV
# 人类输入的CV通常在0.2到0.8之间
if interval_cv < 0.2:
risk_score += 30
reasons.append("时间间隔过于规律")
elif interval_cv > 0.8:
risk_score += 10
reasons.append("时间间隔过于混乱")
# 评估超快速输入
if has_super_fast:
risk_score += 25
reasons.append("存在不自然的快速输入")
# 评估动作模式的多样性
# 人类的动作模式通常更加多样
if unique_patterns < 0.4:
risk_score += 20
reasons.append("动作模式过于单一")
# 评估整体速度
if interval_mean < 0.05: # 平均间隔小于50ms
risk_score += 25
reasons.append("整体输入速度过快")
return {
"risk_score": min(risk_score, 100),
"reasons": reasons,
"metrics": {
"interval_cv": interval_cv,
"interval_mean": interval_mean,
"unique_patterns": unique_patterns
}
}
@staticmethod
def format_detection_result(result):
"""格式化检测结果"""
risk_level = "" if result["risk_score"] < 30 else "" if result["risk_score"] < 70 else ""
output = [
f"机器人风险评分: {result['risk_score']}/100 (风险等级: {risk_level})",
"可疑特征:" if result["reasons"] else "未发现可疑特征"
]
for reason in result["reasons"]:
output.append(f"- {reason}")
if "metrics" in result:
output.extend([
"\n详细指标:",
f"- 时间间隔变异系数: {result['metrics']['interval_cv']:.3f}",
f"- 平均输入间隔: {result['metrics']['interval_mean']*1000:.1f}ms",
f"- 动作模式多样性: {result['metrics']['unique_patterns']:.3f}"
])
return "\n".join(output)
class InputRecorder:
"""记录输入行为"""
def __init__(self):
self.timestamps = []
self.actions = []
self.start_time = None
def start(self):
"""开始记录"""
self.start_time = time.time()
self.timestamps = []
self.actions = []
def record(self, action):
"""记录一个动作"""
self.timestamps.append(time.time() - self.start_time)
self.actions.append(action)
def get_records(self):
"""获取记录"""
return self.timestamps, self.actions
async def xiaohongshu_tag_input_from_examples(page, text: str, selector: str):
"""从examples中的upload_video_to_xiaohongshu.py提取的标签输入方法
但这里我们用它来输入正文文本,模拟标签输入的风格
"""
element = await page.wait_for_selector(selector)
await element.click()
await asyncio.sleep(0.5)
await page.keyboard.press("Control+A")
await page.keyboard.press("Delete")
await asyncio.sleep(0.3)
# 将文本按段落分割,并模拟标签输入风格的正文输入
paragraphs = text.split('\n\n')
for i, paragraph in enumerate(paragraphs[:3]): # 只取前3段避免过长
if not paragraph.strip():
continue
for char in paragraph:
# 标签输入风格:较慢的速度,更谨慎
await page.keyboard.type(char, delay=random.randint(200, 400))
await asyncio.sleep(random.uniform(0.1, 0.2))
# 随机停顿,模拟思考
if random.random() < 0.3:
await asyncio.sleep(random.uniform(0.5, 1.0))
# 段落间停顿
if i < len(paragraphs) - 1:
await asyncio.sleep(random.uniform(0.8, 1.2))
await page.keyboard.press("Enter")
await page.keyboard.press("Enter")
await asyncio.sleep(0.5)
async def test_typing_methods():
"""测试四种不同的输入方法"""
test_text = """这是一段测试文本,用来展示人类化输入效果。
这是第二段落,包含一些标点符号:,。!?
以及一些英文字符和数字 Hello World 123。
最后一段用来测试段落之间的停顿效果。
✨【交通指南】
✅7号线直达施园站步行5分钟入园
✅自驾导航「北京环球城市大道」,停车场直通园区
⏰建议早9点开园前排队热门项目可节省1小时等待
"""
async with async_playwright() as playwright:
# 启动浏览器,添加剪贴板权限
browser = await playwright.chromium.launch(headless=False) # 设置为False以便观察
context = await browser.new_context(permissions=['clipboard-read', 'clipboard-write'])
page = await context.new_page()
# 创建一个简单的HTML页面包含四个测试区域
await page.set_content('''
<!DOCTYPE html>
<html>
<head>
<title>输入方法测试对比</title>
<style>
body { padding: 20px; font-family: Arial, sans-serif; }
.container {
margin-bottom: 30px;
}
.input-box {
margin-bottom: 20px;
}
.textarea {
width: 100%;
padding: 10px;
border: 1px solid #ccc;
border-radius: 5px;
font-size: 16px;
line-height: 1.5;
margin-top: 10px;
height: 300px;
}
h2 { margin-bottom: 10px; color: #333; }
h3 { margin-bottom: 5px; color: #666; }
.description {
color: #666;
margin-bottom: 10px;
font-size: 14px;
}
hr {
margin: 20px 0;
border: 1px solid #eee;
}
</style>
</head>
<body>
<h1>输入方法测试评估</h1>
<div class="container">
<h2>1. 复制粘贴方法测试</h2>
<div class="input-box">
<h3>PasteTypingSimulator</h3>
<div class="description">使用PasteTypingSimulator类确保文本被复制到剪贴板后再粘贴</div>
<textarea id="paste-method" class="textarea" placeholder="复制粘贴方法测试区域..."></textarea>
</div>
</div>
<hr>
<div class="container">
<h2>2. 优化人类输入测试</h2>
<div class="input-box">
<h3>HumanTypingWrapper</h3>
<div class="description">使用HumanTypingWrapper类提供更高级的人类化输入功能</div>
<textarea id="human-wrapper-method" class="textarea" placeholder="优化人类输入测试区域..."></textarea>
</div>
</div>
<hr>
<div class="container">
<h2>3. 模拟人类输入文本测试</h2>
<div class="input-box">
<h3>HumanLikeTyper</h3>
<div class="description">使用HumanLikeTyper类模拟基本的人类输入行为</div>
<textarea id="human-like-method" class="textarea" placeholder="模拟人类输入测试区域..."></textarea>
</div>
</div>
<hr>
<div class="container">
<h2>4. 小红书标签输入风格测试</h2>
<div class="input-box">
<h3>Xiaohongshu Tag Style</h3>
<div class="description">从examples/upload_video_to_xiaohongshu.py提取的标签输入风格用于输入正文</div>
<textarea id="xhs-tag-method" class="textarea" placeholder="小红书标签输入风格测试区域..."></textarea>
</div>
</div>
</body>
</html>
''')
# 创建行为记录器和检测器
recorder = InputRecorder()
detector = BotDetector()
results = {}
print("\n=== 开始输入方法测试评估 ===\n")
# 1. 测试 PasteTypingSimulator
print("1. 测试 PasteTypingSimulator (复制粘贴方法)")
print("-" * 50)
recorder.start()
# 初始化 PasteTypingSimulator 并执行粘贴操作
paste_typer = PasteTypingSimulator(page)
recorder.record("prepare")
success = await paste_typer.paste_text("#paste-method", test_text)
recorder.record("paste_complete")
print(f"粘贴操作成功: {success}")
# 分析行为
timestamps, actions = recorder.get_records()
paste_result = detector.analyze_typing_pattern(timestamps, actions)
results["paste_typing"] = paste_result
print("行为分析结果:")
print(detector.format_detection_result(paste_result))
print("\n")
await asyncio.sleep(2)
# 2. 测试 HumanTypingWrapper
print("2. 测试 HumanTypingWrapper (优化人类输入)")
print("-" * 50)
recorder.start()
# 初始化 HumanTypingWrapper 并执行输入操作
human_wrapper = HumanTypingWrapper(page)
# 记录每个字符的输入动作
success = await human_wrapper.type_text_human("#human-wrapper-method", test_text)
# 为每个字符记录动作
for char in test_text:
recorder.record("type" if char.isalnum() else "special" if char in ",。!?、,.!?" else "space")
print(f"人类化输入成功: {success}")
# 分析行为
timestamps, actions = recorder.get_records()
wrapper_result = detector.analyze_typing_pattern(timestamps, actions)
results["human_wrapper"] = wrapper_result
print("行为分析结果:")
print(detector.format_detection_result(wrapper_result))
print("\n")
await asyncio.sleep(2)
# 3. 测试 HumanLikeTyper
print("3. 测试 HumanLikeTyper (模拟人类输入文本)")
print("-" * 50)
recorder.start()
# 注意HumanLikeTyper 是同步的,但我们在异步环境中运行
# 我们需要用同步方式处理,或者修改调用方式
# 这里我们使用异步方式模拟同步行为
try:
# 等待元素
await page.wait_for_selector("#human-like-method")
await page.click("#human-like-method")
await asyncio.sleep(0.3)
# 手动实现类似 HumanLikeTyper 的异步版本
current_input = ""
for char in test_text[:200]: # 限制字符数避免测试时间过长
# 随机停顿
if random.random() < 0.1:
await asyncio.sleep(random.uniform(0.5, 2.0))
# 随机错误修正
if current_input and random.random() < 0.08:
await page.keyboard.press("Backspace")
await asyncio.sleep(random.uniform(0.1, 0.3))
wrong_char = random.choice("abcdefghijklmnopqrstuvwxyz ")
await page.keyboard.type(wrong_char, delay=random.randint(50, 150))
await asyncio.sleep(random.uniform(0.1, 0.3))
await page.keyboard.press("Backspace")
await asyncio.sleep(random.uniform(0.1, 0.3))
# 计算延迟并输入字符
speed = random.uniform(3, 8)
delay = 1 / speed + random.uniform(-0.05, 0.1)
await page.keyboard.type(char, delay=delay * 1000)
# 记录动作
recorder.record("type" if char.isalnum() else "special" if char in ",。!?、,.!?" else "space")
current_input += char
await asyncio.sleep(random.uniform(0.01, 0.05))
success = True
except Exception as e:
print(f"HumanLikeTyper 测试出错: {e}")
success = False
print(f"模拟人类输入成功: {success}")
# 分析行为
timestamps, actions = recorder.get_records()
human_like_result = detector.analyze_typing_pattern(timestamps, actions)
results["human_like"] = human_like_result
print("行为分析结果:")
print(detector.format_detection_result(human_like_result))
print("\n")
await asyncio.sleep(2)
# 4. 测试小红书标签输入风格
print("4. 测试 小红书标签输入风格")
print("-" * 50)
recorder.start()
# 使用从examples中提取的标签输入方法
await xiaohongshu_tag_input_from_examples(page, test_text, "#xhs-tag-method")
# 为每个字符记录动作
for char in test_text:
recorder.record("xhs_tag_type" if char.isalnum() else "xhs_tag_special" if char in ",。!?、,.!?" else "xhs_tag_space")
print("小红书标签输入风格测试完成")
# 分析行为
timestamps, actions = recorder.get_records()
xhs_tag_result = detector.analyze_typing_pattern(timestamps, actions)
results["xhs_tag"] = xhs_tag_result
print("行为分析结果:")
print(detector.format_detection_result(xhs_tag_result))
print("\n")
await asyncio.sleep(2)
# 总结所有方法的测试结果
print("=== 输入方法测试评估总结 ===\n")
method_names = {
"paste_typing": "1. PasteTypingSimulator (复制粘贴)",
"human_wrapper": "2. HumanTypingWrapper (优化人类输入)",
"human_like": "3. HumanLikeTyper (模拟人类输入)",
"xhs_tag": "4. 小红书标签输入风格"
}
for method_key, method_name in method_names.items():
if method_key in results:
result = results[method_key]
risk_level = "" if result["risk_score"] < 30 else "" if result["risk_score"] < 70 else ""
print(f"{method_name}: 风险评分 {result['risk_score']}/100 (风险等级: {risk_level})")
if result["reasons"]:
print(" 可疑特征:")
for reason in result["reasons"]:
print(f" - {reason}")
else:
print(" 未发现可疑特征")
print()
# 找出最佳表现的方法
best_method = min(results.items(), key=lambda x: x[1]["risk_score"])
print(f"最佳表现方法: {method_names[best_method[0]]} (风险评分: {best_method[1]['risk_score']}/100)")
# 关闭浏览器
await browser.close()
if __name__ == "__main__":
asyncio.run(test_typing_methods())