autoUpload/utils/anti_detection.py

282 lines
8.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
反自动化检测工具模块
提供统一的反检测措施,供各个社交媒体上传器使用
"""
from typing import Dict, Any, Optional, List
from playwright.async_api import BrowserContext, Browser, Playwright
class AntiDetectionConfig:
"""反检测配置类"""
# 标准的反检测浏览器参数
STANDARD_BROWSER_ARGS = [
'--no-sandbox',
'--disable-blink-features=AutomationControlled', # 核心:禁用自动化控制标识
'--disable-web-security',
'--disable-features=VizDisplayCompositor',
'--disable-dev-shm-usage',
'--disable-infobars',
'--disable-extensions',
'--disable-gpu',
'--disable-dev-shm-usage',
'--no-first-run',
'--no-default-browser-check',
'--lang=zh-CN'
]
# 真实的用户代理字符串
REAL_USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0'
]
# 默认的上下文选项
DEFAULT_CONTEXT_OPTIONS = {
'viewport': {'width': 1920, 'height': 1080},
'locale': 'zh-CN',
'timezone_id': 'Asia/Shanghai',
'device_scale_factor': 1,
'has_touch': False,
'is_mobile': False
}
async def create_stealth_browser(
playwright: Playwright,
headless: bool = True,
executable_path: Optional[str] = None,
custom_args: Optional[List[str]] = None
) -> Browser:
"""
创建具有反检测功能的浏览器实例
Args:
playwright: Playwright实例
headless: 是否使用无头模式
executable_path: 自定义浏览器可执行文件路径
custom_args: 自定义浏览器参数(会与标准参数合并)
Returns:
配置好的Browser实例
"""
# 合并浏览器参数
browser_args = AntiDetectionConfig.STANDARD_BROWSER_ARGS.copy()
if custom_args:
browser_args.extend(custom_args)
# 浏览器启动选项
launch_options = {
'headless': headless,
'args': browser_args
}
if executable_path:
launch_options['executable_path'] = executable_path
return await playwright.chromium.launch(**launch_options)
async def create_stealth_context(
browser: Browser,
account_file: str,
headless: bool = True,
custom_options: Optional[Dict[str, Any]] = None
) -> BrowserContext:
"""
创建具有反检测功能的浏览器上下文
Args:
browser: Browser实例
account_file: Cookie文件路径
headless: 是否为无头模式
custom_options: 自定义上下文选项
Returns:
配置好的BrowserContext实例
"""
# 基础上下文选项
context_options = {
'storage_state': account_file,
}
# 为无头模式添加额外的反检测措施
if headless:
context_options.update(AntiDetectionConfig.DEFAULT_CONTEXT_OPTIONS)
# 使用随机的真实用户代理
import random
user_agent = random.choice(AntiDetectionConfig.REAL_USER_AGENTS)
context_options['user_agent'] = user_agent
# 合并自定义选项
if custom_options:
context_options.update(custom_options)
return await browser.new_context(**context_options)
async def setup_stealth_page(context: BrowserContext, url: str):
"""
创建并设置具有反检测功能的页面
Args:
context: BrowserContext实例
url: 要访问的URL
Returns:
配置好的Page实例
"""
from utils.base_social_media import set_init_script
# 应用stealth脚本
context = await set_init_script(context)
# 创建页面
page = await context.new_page()
# 访问URL使用更温和的等待策略
try:
await page.goto(url, wait_until='domcontentloaded', timeout=30000)
except Exception as e:
print(f"页面导航警告: {e}")
# 即使导航有问题,也继续返回页面对象
return page
class SocialMediaUploader:
"""
社交媒体上传器基类
提供统一的反检测功能
"""
def __init__(self, headless: bool = True, executable_path: Optional[str] = None):
self.headless = headless
self.executable_path = executable_path
self.browser = None
self.context = None
self.page = None
async def create_browser_session(
self,
playwright: Playwright,
account_file: str,
target_url: str,
custom_browser_args: Optional[List[str]] = None,
custom_context_options: Optional[Dict[str, Any]] = None
):
"""
创建完整的浏览器会话(浏览器+上下文+页面)
Args:
playwright: Playwright实例
account_file: Cookie文件路径
target_url: 目标URL
custom_browser_args: 自定义浏览器参数
custom_context_options: 自定义上下文选项
Returns:
tuple: (browser, context, page)
"""
# 创建浏览器
self.browser = await create_stealth_browser(
playwright=playwright,
headless=self.headless,
executable_path=self.executable_path,
custom_args=custom_browser_args
)
# 创建上下文
self.context = await create_stealth_context(
browser=self.browser,
account_file=account_file,
headless=self.headless,
custom_options=custom_context_options
)
# 创建页面
self.page = await setup_stealth_page(self.context, target_url)
return self.browser, self.context, self.page
async def close_session(self):
"""关闭浏览器会话"""
if self.context:
await self.context.close()
if self.browser:
await self.browser.close()
def get_anti_detection_info() -> Dict[str, Any]:
"""
获取反检测配置信息,用于调试
Returns:
包含配置信息的字典
"""
return {
'browser_args_count': len(AntiDetectionConfig.STANDARD_BROWSER_ARGS),
'browser_args': AntiDetectionConfig.STANDARD_BROWSER_ARGS,
'user_agents_count': len(AntiDetectionConfig.REAL_USER_AGENTS),
'sample_user_agent': AntiDetectionConfig.REAL_USER_AGENTS[0],
'default_viewport': AntiDetectionConfig.DEFAULT_CONTEXT_OPTIONS['viewport'],
'locale': AntiDetectionConfig.DEFAULT_CONTEXT_OPTIONS['locale'],
'timezone': AntiDetectionConfig.DEFAULT_CONTEXT_OPTIONS['timezone_id']
}
# 便捷函数,用于快速设置
async def quick_setup_stealth_browser(
playwright: Playwright,
account_file: str,
target_url: str,
headless: bool = True,
executable_path: Optional[str] = None
):
"""
快速设置具有反检测功能的完整浏览器环境
Args:
playwright: Playwright实例
account_file: Cookie文件路径
target_url: 目标URL
headless: 是否使用无头模式
executable_path: 浏览器可执行文件路径
Returns:
tuple: (browser, context, page)
"""
uploader = SocialMediaUploader(headless=headless, executable_path=executable_path)
return await uploader.create_browser_session(
playwright=playwright,
account_file=account_file,
target_url=target_url
)
# 调试工具
def print_anti_detection_config():
"""打印当前的反检测配置,用于调试"""
info = get_anti_detection_info()
print("🛡️ 反检测配置信息:")
print("=" * 50)
print(f"浏览器参数数量: {info['browser_args_count']}")
print(f"用户代理数量: {info['user_agents_count']}")
print(f"默认视口: {info['default_viewport']}")
print(f"语言区域: {info['locale']}")
print(f"时区: {info['timezone']}")
print("\n📋 浏览器参数:")
for i, arg in enumerate(info['browser_args'], 1):
print(f" {i:2d}. {arg}")
print(f"\n🌐 示例用户代理:\n {info['sample_user_agent']}")
if __name__ == "__main__":
# 测试和演示
print_anti_detection_config()