autoUpload/test_typing.py

import asyncio
import random
import time
import numpy as np
from playwright.async_api import async_playwright
from utils.paste_typing import PasteTypingSimulator
from utils.human_typing_wrapper import HumanTypingWrapper
from utils.human_like import HumanLikeTyper

class BotDetector:
    """检测输入行为是否像机器人"""
    
    @staticmethod
    def analyze_typing_pattern(timestamps, actions):
        """分析输入模式
        
        Args:
            timestamps: 时间戳列表
            actions: 动作列表（键盘输入、鼠标移动等）
            
        Returns:
            dict: 分析结果
        """
        if not timestamps or len(timestamps) < 2:
            return {"risk_score": 0, "reasons": ["样本太少，无法分析"]}
            
        # 计算时间间隔
        intervals = np.diff(timestamps)
        
        # 1. 检查时间间隔的一致性
        interval_std = np.std(intervals)
        interval_mean = np.mean(intervals)
        interval_cv = interval_std / interval_mean if interval_mean > 0 else 0
        
        # 2. 检查是否有不自然的快速输入
        has_super_fast = any(i < 0.01 for i in intervals)  # 小于10ms的间隔
        
        # 3. 检查连续动作的规律性
        action_patterns = []
        for i in range(len(actions)-2):
            pattern = f"{actions[i]}-{actions[i+1]}-{actions[i+2]}"
            action_patterns.append(pattern)
        unique_patterns = len(set(action_patterns)) / len(action_patterns) if action_patterns else 0
        
        # 评分系统
        risk_score = 0
        reasons = []
        
        # 评估时间间隔变异系数（CV）
        # 人类输入的CV通常在0.2到0.8之间
        if interval_cv < 0.2:
            risk_score += 30
            reasons.append("时间间隔过于规律")
        elif interval_cv > 0.8:
            risk_score += 10
            reasons.append("时间间隔过于混乱")
            
        # 评估超快速输入
        if has_super_fast:
            risk_score += 25
            reasons.append("存在不自然的快速输入")
            
        # 评估动作模式的多样性
        # 人类的动作模式通常更加多样
        if unique_patterns < 0.4:
            risk_score += 20
            reasons.append("动作模式过于单一")
            
        # 评估整体速度
        if interval_mean < 0.05:  # 平均间隔小于50ms
            risk_score += 25
            reasons.append("整体输入速度过快")
            
        return {
            "risk_score": min(risk_score, 100),
            "reasons": reasons,
            "metrics": {
                "interval_cv": interval_cv,
                "interval_mean": interval_mean,
                "unique_patterns": unique_patterns
            }
        }
        
    @staticmethod
    def format_detection_result(result):
        """格式化检测结果"""
        risk_level = "低" if result["risk_score"] < 30 else "中" if result["risk_score"] < 70 else "高"
        
        output = [
            f"机器人风险评分: {result['risk_score']}/100 (风险等级: {risk_level})",
            "可疑特征:" if result["reasons"] else "未发现可疑特征"
        ]
        
        for reason in result["reasons"]:
            output.append(f"- {reason}")
            
        if "metrics" in result:
            output.extend([
                "\n详细指标:",
                f"- 时间间隔变异系数: {result['metrics']['interval_cv']:.3f}",
                f"- 平均输入间隔: {result['metrics']['interval_mean']*1000:.1f}ms",
                f"- 动作模式多样性: {result['metrics']['unique_patterns']:.3f}"
            ])
            
        return "\n".join(output)

class InputRecorder:
    """记录输入行为"""
    def __init__(self):
        self.timestamps = []
        self.actions = []
        self.start_time = None
        
    def start(self):
        """开始记录"""
        self.start_time = time.time()
        self.timestamps = []
        self.actions = []
        
    def record(self, action):
        """记录一个动作"""
        self.timestamps.append(time.time() - self.start_time)
        self.actions.append(action)
        
    def get_records(self):
        """获取记录"""
        return self.timestamps, self.actions

async def xiaohongshu_tag_input_from_examples(page, text: str, selector: str):
    """从examples中的upload_video_to_xiaohongshu.py提取的标签输入方法
    但这里我们用它来输入正文文本，模拟标签输入的风格
    """
    element = await page.wait_for_selector(selector)
    await element.click()
    await asyncio.sleep(0.5)
    await page.keyboard.press("Control+A")
    await page.keyboard.press("Delete")
    await asyncio.sleep(0.3)
    
    # 将文本按段落分割，并模拟标签输入风格的正文输入
    paragraphs = text.split('\n\n')
    for i, paragraph in enumerate(paragraphs[:3]):  # 只取前3段避免过长
        if not paragraph.strip():
            continue
            
        for char in paragraph:
            # 标签输入风格：较慢的速度，更谨慎
            await page.keyboard.type(char, delay=random.randint(200, 400))
            await asyncio.sleep(random.uniform(0.1, 0.2))
            
            # 随机停顿，模拟思考
            if random.random() < 0.3:
                await asyncio.sleep(random.uniform(0.5, 1.0))
        
        # 段落间停顿
        if i < len(paragraphs) - 1:
            await asyncio.sleep(random.uniform(0.8, 1.2))
            await page.keyboard.press("Enter")
            await page.keyboard.press("Enter")
    
    await asyncio.sleep(0.5)

async def test_typing_methods():
    """测试四种不同的输入方法"""
    test_text = """这是一段测试文本，用来展示人类化输入效果。

这是第二段落，包含一些标点符号：，。！？
以及一些英文字符和数字 Hello World 123。

最后一段用来测试段落之间的停顿效果。

✨【交通指南】
✅7号线直达施园站，步行5分钟入园
✅自驾导航「北京环球城市大道」，停车场直通园区
⏰建议早9点开园前排队，热门项目可节省1小时等待

"""

    async with async_playwright() as playwright:
        # 启动浏览器，添加剪贴板权限
        browser = await playwright.chromium.launch(headless=False)  # 设置为False以便观察
        context = await browser.new_context(permissions=['clipboard-read', 'clipboard-write'])
        page = await context.new_page()
        
        # 创建一个简单的HTML页面，包含四个测试区域
        await page.set_content('''
        <!DOCTYPE html>
        <html>
        <head>
            <title>输入方法测试对比</title>
            <style>
                body { padding: 20px; font-family: Arial, sans-serif; }
                .container {
                    margin-bottom: 30px;
                }
                .input-box {
                    margin-bottom: 20px;
                }
                .textarea {
                    width: 100%;
                    padding: 10px;
                    border: 1px solid #ccc;
                    border-radius: 5px;
                    font-size: 16px;
                    line-height: 1.5;
                    margin-top: 10px;
                    height: 300px;
                }
                h2 { margin-bottom: 10px; color: #333; }
                h3 { margin-bottom: 5px; color: #666; }
                .description {
                    color: #666;
                    margin-bottom: 10px;
                    font-size: 14px;
                }
                hr {
                    margin: 20px 0;
                    border: 1px solid #eee;
                }
            </style>
        </head>
        <body>
            <h1>输入方法测试评估</h1>
            
            <div class="container">
                <h2>1. 复制粘贴方法测试</h2>
                <div class="input-box">
                    <h3>PasteTypingSimulator</h3>
                    <div class="description">使用PasteTypingSimulator类，确保文本被复制到剪贴板后再粘贴</div>
                    <textarea id="paste-method" class="textarea" placeholder="复制粘贴方法测试区域..."></textarea>
                </div>
            </div>
            <hr>
            
            <div class="container">
                <h2>2. 优化人类输入测试</h2>
                <div class="input-box">
                    <h3>HumanTypingWrapper</h3>
                    <div class="description">使用HumanTypingWrapper类，提供更高级的人类化输入功能</div>
                    <textarea id="human-wrapper-method" class="textarea" placeholder="优化人类输入测试区域..."></textarea>
                </div>
            </div>
            <hr>
            
            <div class="container">
                <h2>3. 模拟人类输入文本测试</h2>
                <div class="input-box">
                    <h3>HumanLikeTyper</h3>
                    <div class="description">使用HumanLikeTyper类，模拟基本的人类输入行为</div>
                    <textarea id="human-like-method" class="textarea" placeholder="模拟人类输入测试区域..."></textarea>
                </div>
            </div>
            <hr>
            
            <div class="container">
                <h2>4. 小红书标签输入风格测试</h2>
                <div class="input-box">
                    <h3>Xiaohongshu Tag Style</h3>
                    <div class="description">从examples/upload_video_to_xiaohongshu.py提取的标签输入风格，用于输入正文</div>
                    <textarea id="xhs-tag-method" class="textarea" placeholder="小红书标签输入风格测试区域..."></textarea>
                </div>
            </div>
        </body>
        </html>
        ''')

        # 创建行为记录器和检测器
        recorder = InputRecorder()
        detector = BotDetector()
        results = {}
        
        print("\n=== 开始输入方法测试评估 ===\n")
        
        # 1. 测试 PasteTypingSimulator
        print("1. 测试 PasteTypingSimulator (复制粘贴方法)")
        print("-" * 50)
        recorder.start()
        
        # 初始化 PasteTypingSimulator 并执行粘贴操作
        paste_typer = PasteTypingSimulator(page)
        recorder.record("prepare")
        success = await paste_typer.paste_text("#paste-method", test_text)
        recorder.record("paste_complete")
        
        print(f"粘贴操作成功: {success}")
        
        # 分析行为
        timestamps, actions = recorder.get_records()
        paste_result = detector.analyze_typing_pattern(timestamps, actions)
        results["paste_typing"] = paste_result
        
        print("行为分析结果:")
        print(detector.format_detection_result(paste_result))
        print("\n")
        
        await asyncio.sleep(2)
        
        # 2. 测试 HumanTypingWrapper
        print("2. 测试 HumanTypingWrapper (优化人类输入)")
        print("-" * 50)
        recorder.start()
        
        # 初始化 HumanTypingWrapper 并执行输入操作
        human_wrapper = HumanTypingWrapper(page)
        # 记录每个字符的输入动作
        success = await human_wrapper.type_text_human("#human-wrapper-method", test_text)
        
        # 为每个字符记录动作
        for char in test_text:
            recorder.record("type" if char.isalnum() else "special" if char in "，。！？、,.!?" else "space")
        
        print(f"人类化输入成功: {success}")
        
        # 分析行为
        timestamps, actions = recorder.get_records()
        wrapper_result = detector.analyze_typing_pattern(timestamps, actions)
        results["human_wrapper"] = wrapper_result
        
        print("行为分析结果:")
        print(detector.format_detection_result(wrapper_result))
        print("\n")
        
        await asyncio.sleep(2)
        
        # 3. 测试 HumanLikeTyper
        print("3. 测试 HumanLikeTyper (模拟人类输入文本)")
        print("-" * 50)
        recorder.start()
        
        # 注意：HumanLikeTyper 是同步的，但我们在异步环境中运行
        # 我们需要用同步方式处理，或者修改调用方式
        # 这里我们使用异步方式模拟同步行为
        try:
            # 等待元素
            await page.wait_for_selector("#human-like-method")
            await page.click("#human-like-method")
            await asyncio.sleep(0.3)
            
            # 手动实现类似 HumanLikeTyper 的异步版本
            current_input = ""
            for char in test_text[:200]:  # 限制字符数避免测试时间过长
                # 随机停顿
                if random.random() < 0.1:
                    await asyncio.sleep(random.uniform(0.5, 2.0))
                
                # 随机错误修正
                if current_input and random.random() < 0.08:
                    await page.keyboard.press("Backspace")
                    await asyncio.sleep(random.uniform(0.1, 0.3))
                    wrong_char = random.choice("abcdefghijklmnopqrstuvwxyz ")
                    await page.keyboard.type(wrong_char, delay=random.randint(50, 150))
                    await asyncio.sleep(random.uniform(0.1, 0.3))
                    await page.keyboard.press("Backspace")
                    await asyncio.sleep(random.uniform(0.1, 0.3))
                
                # 计算延迟并输入字符
                speed = random.uniform(3, 8)
                delay = 1 / speed + random.uniform(-0.05, 0.1)
                await page.keyboard.type(char, delay=delay * 1000)
                
                # 记录动作
                recorder.record("type" if char.isalnum() else "special" if char in "，。！？、,.!?" else "space")
                
                current_input += char
                await asyncio.sleep(random.uniform(0.01, 0.05))
            
            success = True
        except Exception as e:
            print(f"HumanLikeTyper 测试出错: {e}")
            success = False
        
        print(f"模拟人类输入成功: {success}")
        
        # 分析行为
        timestamps, actions = recorder.get_records()
        human_like_result = detector.analyze_typing_pattern(timestamps, actions)
        results["human_like"] = human_like_result
        
        print("行为分析结果:")
        print(detector.format_detection_result(human_like_result))
        print("\n")
        
        await asyncio.sleep(2)
        
        # 4. 测试小红书标签输入风格
        print("4. 测试 小红书标签输入风格")
        print("-" * 50)
        recorder.start()
        
        # 使用从examples中提取的标签输入方法
        await xiaohongshu_tag_input_from_examples(page, test_text, "#xhs-tag-method")
        
        # 为每个字符记录动作
        for char in test_text:
            recorder.record("xhs_tag_type" if char.isalnum() else "xhs_tag_special" if char in "，。！？、,.!?" else "xhs_tag_space")
        
        print("小红书标签输入风格测试完成")
        
        # 分析行为
        timestamps, actions = recorder.get_records()
        xhs_tag_result = detector.analyze_typing_pattern(timestamps, actions)
        results["xhs_tag"] = xhs_tag_result
        
        print("行为分析结果:")
        print(detector.format_detection_result(xhs_tag_result))
        print("\n")
        
        await asyncio.sleep(2)
        
        # 总结所有方法的测试结果
        print("=== 输入方法测试评估总结 ===\n")
        
        method_names = {
            "paste_typing": "1. PasteTypingSimulator (复制粘贴)",
            "human_wrapper": "2. HumanTypingWrapper (优化人类输入)",
            "human_like": "3. HumanLikeTyper (模拟人类输入)",
            "xhs_tag": "4. 小红书标签输入风格"
        }
        
        for method_key, method_name in method_names.items():
            if method_key in results:
                result = results[method_key]
                risk_level = "低" if result["risk_score"] < 30 else "中" if result["risk_score"] < 70 else "高"
                print(f"{method_name}: 风险评分 {result['risk_score']}/100 (风险等级: {risk_level})")
                
                if result["reasons"]:
                    print("  可疑特征:")
                    for reason in result["reasons"]:
                        print(f"    - {reason}")
                else:
                    print("  未发现可疑特征")
                print()
        
        # 找出最佳表现的方法
        best_method = min(results.items(), key=lambda x: x[1]["risk_score"])
        print(f"最佳表现方法: {method_names[best_method[0]]} (风险评分: {best_method[1]['risk_score']}/100)")
        
        # 关闭浏览器
        await browser.close()

if __name__ == "__main__":
    asyncio.run(test_typing_methods())