import asyncio import random import time import numpy as np from playwright.async_api import async_playwright from utils.paste_typing import PasteTypingSimulator from utils.human_typing_wrapper import HumanTypingWrapper from utils.human_like import HumanLikeTyper class BotDetector: """检测输入行为是否像机器人""" @staticmethod def analyze_typing_pattern(timestamps, actions): """分析输入模式 Args: timestamps: 时间戳列表 actions: 动作列表(键盘输入、鼠标移动等) Returns: dict: 分析结果 """ if not timestamps or len(timestamps) < 2: return {"risk_score": 0, "reasons": ["样本太少,无法分析"]} # 计算时间间隔 intervals = np.diff(timestamps) # 1. 检查时间间隔的一致性 interval_std = np.std(intervals) interval_mean = np.mean(intervals) interval_cv = interval_std / interval_mean if interval_mean > 0 else 0 # 2. 检查是否有不自然的快速输入 has_super_fast = any(i < 0.01 for i in intervals) # 小于10ms的间隔 # 3. 检查连续动作的规律性 action_patterns = [] for i in range(len(actions)-2): pattern = f"{actions[i]}-{actions[i+1]}-{actions[i+2]}" action_patterns.append(pattern) unique_patterns = len(set(action_patterns)) / len(action_patterns) if action_patterns else 0 # 评分系统 risk_score = 0 reasons = [] # 评估时间间隔变异系数(CV) # 人类输入的CV通常在0.2到0.8之间 if interval_cv < 0.2: risk_score += 30 reasons.append("时间间隔过于规律") elif interval_cv > 0.8: risk_score += 10 reasons.append("时间间隔过于混乱") # 评估超快速输入 if has_super_fast: risk_score += 25 reasons.append("存在不自然的快速输入") # 评估动作模式的多样性 # 人类的动作模式通常更加多样 if unique_patterns < 0.4: risk_score += 20 reasons.append("动作模式过于单一") # 评估整体速度 if interval_mean < 0.05: # 平均间隔小于50ms risk_score += 25 reasons.append("整体输入速度过快") return { "risk_score": min(risk_score, 100), "reasons": reasons, "metrics": { "interval_cv": interval_cv, "interval_mean": interval_mean, "unique_patterns": unique_patterns } } @staticmethod def format_detection_result(result): """格式化检测结果""" risk_level = "低" if result["risk_score"] < 30 else "中" if result["risk_score"] < 70 else "高" output = [ f"机器人风险评分: {result['risk_score']}/100 (风险等级: {risk_level})", "可疑特征:" if result["reasons"] else "未发现可疑特征" ] for reason in result["reasons"]: output.append(f"- {reason}") if "metrics" in result: output.extend([ "\n详细指标:", f"- 时间间隔变异系数: {result['metrics']['interval_cv']:.3f}", f"- 平均输入间隔: {result['metrics']['interval_mean']*1000:.1f}ms", f"- 动作模式多样性: {result['metrics']['unique_patterns']:.3f}" ]) return "\n".join(output) class InputRecorder: """记录输入行为""" def __init__(self): self.timestamps = [] self.actions = [] self.start_time = None def start(self): """开始记录""" self.start_time = time.time() self.timestamps = [] self.actions = [] def record(self, action): """记录一个动作""" self.timestamps.append(time.time() - self.start_time) self.actions.append(action) def get_records(self): """获取记录""" return self.timestamps, self.actions async def xiaohongshu_tag_input_from_examples(page, text: str, selector: str): """从examples中的upload_video_to_xiaohongshu.py提取的标签输入方法 但这里我们用它来输入正文文本,模拟标签输入的风格 """ element = await page.wait_for_selector(selector) await element.click() await asyncio.sleep(0.5) await page.keyboard.press("Control+A") await page.keyboard.press("Delete") await asyncio.sleep(0.3) # 将文本按段落分割,并模拟标签输入风格的正文输入 paragraphs = text.split('\n\n') for i, paragraph in enumerate(paragraphs[:3]): # 只取前3段避免过长 if not paragraph.strip(): continue for char in paragraph: # 标签输入风格:较慢的速度,更谨慎 await page.keyboard.type(char, delay=random.randint(200, 400)) await asyncio.sleep(random.uniform(0.1, 0.2)) # 随机停顿,模拟思考 if random.random() < 0.3: await asyncio.sleep(random.uniform(0.5, 1.0)) # 段落间停顿 if i < len(paragraphs) - 1: await asyncio.sleep(random.uniform(0.8, 1.2)) await page.keyboard.press("Enter") await page.keyboard.press("Enter") await asyncio.sleep(0.5) async def test_typing_methods(): """测试四种不同的输入方法""" test_text = """这是一段测试文本,用来展示人类化输入效果。 这是第二段落,包含一些标点符号:,。!? 以及一些英文字符和数字 Hello World 123。 最后一段用来测试段落之间的停顿效果。 ✨【交通指南】 ✅7号线直达施园站,步行5分钟入园 ✅自驾导航「北京环球城市大道」,停车场直通园区 ⏰建议早9点开园前排队,热门项目可节省1小时等待 """ async with async_playwright() as playwright: # 启动浏览器,添加剪贴板权限 browser = await playwright.chromium.launch(headless=False) # 设置为False以便观察 context = await browser.new_context(permissions=['clipboard-read', 'clipboard-write']) page = await context.new_page() # 创建一个简单的HTML页面,包含四个测试区域 await page.set_content(''' 输入方法测试对比

输入方法测试评估

1. 复制粘贴方法测试

PasteTypingSimulator

使用PasteTypingSimulator类,确保文本被复制到剪贴板后再粘贴

2. 优化人类输入测试

HumanTypingWrapper

使用HumanTypingWrapper类,提供更高级的人类化输入功能

3. 模拟人类输入文本测试

HumanLikeTyper

使用HumanLikeTyper类,模拟基本的人类输入行为

4. 小红书标签输入风格测试

Xiaohongshu Tag Style

从examples/upload_video_to_xiaohongshu.py提取的标签输入风格,用于输入正文
''') # 创建行为记录器和检测器 recorder = InputRecorder() detector = BotDetector() results = {} print("\n=== 开始输入方法测试评估 ===\n") # 1. 测试 PasteTypingSimulator print("1. 测试 PasteTypingSimulator (复制粘贴方法)") print("-" * 50) recorder.start() # 初始化 PasteTypingSimulator 并执行粘贴操作 paste_typer = PasteTypingSimulator(page) recorder.record("prepare") success = await paste_typer.paste_text("#paste-method", test_text) recorder.record("paste_complete") print(f"粘贴操作成功: {success}") # 分析行为 timestamps, actions = recorder.get_records() paste_result = detector.analyze_typing_pattern(timestamps, actions) results["paste_typing"] = paste_result print("行为分析结果:") print(detector.format_detection_result(paste_result)) print("\n") await asyncio.sleep(2) # 2. 测试 HumanTypingWrapper print("2. 测试 HumanTypingWrapper (优化人类输入)") print("-" * 50) recorder.start() # 初始化 HumanTypingWrapper 并执行输入操作 human_wrapper = HumanTypingWrapper(page) # 记录每个字符的输入动作 success = await human_wrapper.type_text_human("#human-wrapper-method", test_text) # 为每个字符记录动作 for char in test_text: recorder.record("type" if char.isalnum() else "special" if char in ",。!?、,.!?" else "space") print(f"人类化输入成功: {success}") # 分析行为 timestamps, actions = recorder.get_records() wrapper_result = detector.analyze_typing_pattern(timestamps, actions) results["human_wrapper"] = wrapper_result print("行为分析结果:") print(detector.format_detection_result(wrapper_result)) print("\n") await asyncio.sleep(2) # 3. 测试 HumanLikeTyper print("3. 测试 HumanLikeTyper (模拟人类输入文本)") print("-" * 50) recorder.start() # 注意:HumanLikeTyper 是同步的,但我们在异步环境中运行 # 我们需要用同步方式处理,或者修改调用方式 # 这里我们使用异步方式模拟同步行为 try: # 等待元素 await page.wait_for_selector("#human-like-method") await page.click("#human-like-method") await asyncio.sleep(0.3) # 手动实现类似 HumanLikeTyper 的异步版本 current_input = "" for char in test_text[:200]: # 限制字符数避免测试时间过长 # 随机停顿 if random.random() < 0.1: await asyncio.sleep(random.uniform(0.5, 2.0)) # 随机错误修正 if current_input and random.random() < 0.08: await page.keyboard.press("Backspace") await asyncio.sleep(random.uniform(0.1, 0.3)) wrong_char = random.choice("abcdefghijklmnopqrstuvwxyz ") await page.keyboard.type(wrong_char, delay=random.randint(50, 150)) await asyncio.sleep(random.uniform(0.1, 0.3)) await page.keyboard.press("Backspace") await asyncio.sleep(random.uniform(0.1, 0.3)) # 计算延迟并输入字符 speed = random.uniform(3, 8) delay = 1 / speed + random.uniform(-0.05, 0.1) await page.keyboard.type(char, delay=delay * 1000) # 记录动作 recorder.record("type" if char.isalnum() else "special" if char in ",。!?、,.!?" else "space") current_input += char await asyncio.sleep(random.uniform(0.01, 0.05)) success = True except Exception as e: print(f"HumanLikeTyper 测试出错: {e}") success = False print(f"模拟人类输入成功: {success}") # 分析行为 timestamps, actions = recorder.get_records() human_like_result = detector.analyze_typing_pattern(timestamps, actions) results["human_like"] = human_like_result print("行为分析结果:") print(detector.format_detection_result(human_like_result)) print("\n") await asyncio.sleep(2) # 4. 测试小红书标签输入风格 print("4. 测试 小红书标签输入风格") print("-" * 50) recorder.start() # 使用从examples中提取的标签输入方法 await xiaohongshu_tag_input_from_examples(page, test_text, "#xhs-tag-method") # 为每个字符记录动作 for char in test_text: recorder.record("xhs_tag_type" if char.isalnum() else "xhs_tag_special" if char in ",。!?、,.!?" else "xhs_tag_space") print("小红书标签输入风格测试完成") # 分析行为 timestamps, actions = recorder.get_records() xhs_tag_result = detector.analyze_typing_pattern(timestamps, actions) results["xhs_tag"] = xhs_tag_result print("行为分析结果:") print(detector.format_detection_result(xhs_tag_result)) print("\n") await asyncio.sleep(2) # 总结所有方法的测试结果 print("=== 输入方法测试评估总结 ===\n") method_names = { "paste_typing": "1. PasteTypingSimulator (复制粘贴)", "human_wrapper": "2. HumanTypingWrapper (优化人类输入)", "human_like": "3. HumanLikeTyper (模拟人类输入)", "xhs_tag": "4. 小红书标签输入风格" } for method_key, method_name in method_names.items(): if method_key in results: result = results[method_key] risk_level = "低" if result["risk_score"] < 30 else "中" if result["risk_score"] < 70 else "高" print(f"{method_name}: 风险评分 {result['risk_score']}/100 (风险等级: {risk_level})") if result["reasons"]: print(" 可疑特征:") for reason in result["reasons"]: print(f" - {reason}") else: print(" 未发现可疑特征") print() # 找出最佳表现的方法 best_method = min(results.items(), key=lambda x: x[1]["risk_score"]) print(f"最佳表现方法: {method_names[best_method[0]]} (风险评分: {best_method[1]['risk_score']}/100)") # 关闭浏览器 await browser.close() if __name__ == "__main__": asyncio.run(test_typing_methods())