新增类人打字方法
This commit is contained in:
parent
43bae145b3
commit
b954c8adcf
@ -441,7 +441,7 @@ class XiaoHongShuVideo(object):
|
|||||||
|
|
||||||
|
|
||||||
class XiaoHongShuImage(object):
|
class XiaoHongShuImage(object):
|
||||||
def __init__(self, title, image_paths, tags, publish_date: datetime, account_file, location=None, content=None, headless=True):
|
def __init__(self, title, image_paths, tags, publish_date: datetime, account_file, location=None, content=None, headless=True, use_enhanced_typing=True):
|
||||||
self.title = title # 图文标题
|
self.title = title # 图文标题
|
||||||
self.image_paths = image_paths if isinstance(image_paths, list) else [image_paths] # 支持单张或多张图片
|
self.image_paths = image_paths if isinstance(image_paths, list) else [image_paths] # 支持单张或多张图片
|
||||||
self.tags = tags
|
self.tags = tags
|
||||||
@ -452,27 +452,35 @@ class XiaoHongShuImage(object):
|
|||||||
self.date_format = '%Y年%m月%d日 %H:%M'
|
self.date_format = '%Y年%m月%d日 %H:%M'
|
||||||
self.local_executable_path = LOCAL_CHROME_PATH
|
self.local_executable_path = LOCAL_CHROME_PATH
|
||||||
self.headless = headless
|
self.headless = headless
|
||||||
|
self.use_enhanced_typing = use_enhanced_typing # 是否使用增强版输入
|
||||||
|
|
||||||
async def set_schedule_time_xiaohongshu(self, page, publish_date):
|
async def set_schedule_time_xiaohongshu(self, page, publish_date):
|
||||||
"""设置定时发布时间"""
|
"""设置定时发布时间"""
|
||||||
print(" [-] 正在设置定时发布时间...")
|
xiaohongshu_logger.info(" [-] 正在设置定时发布时间...")
|
||||||
print(f"publish_date: {publish_date}")
|
|
||||||
|
try:
|
||||||
# 选择包含特定文本内容的 label 元素
|
# 定位并点击定时发布复选框
|
||||||
label_element = page.locator("label:has-text('定时发布')")
|
schedule_checkbox = await page.wait_for_selector('input[type="checkbox"]', timeout=3000)
|
||||||
# 在选中的 label 元素下点击 checkbox
|
await schedule_checkbox.click()
|
||||||
await label_element.click()
|
await asyncio.sleep(random.uniform(0.5, 1.0))
|
||||||
await asyncio.sleep(1)
|
|
||||||
publish_date_hour = publish_date.strftime("%Y-%m-%d %H:%M")
|
# 定位并点击时间输入框
|
||||||
print(f"publish_date_hour: {publish_date_hour}")
|
date_input = await page.wait_for_selector('input[placeholder="选择日期和时间"]', timeout=3000)
|
||||||
|
await date_input.click()
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(random.uniform(0.3, 0.5))
|
||||||
await page.locator('.el-input__inner[placeholder="选择日期和时间"]').click()
|
|
||||||
await page.keyboard.press("Control+KeyA")
|
# 输入发布时间
|
||||||
await page.keyboard.type(str(publish_date_hour))
|
publish_date_str = publish_date.strftime("%Y-%m-%d %H:%M")
|
||||||
await page.keyboard.press("Enter")
|
await date_input.fill(publish_date_str)
|
||||||
|
await asyncio.sleep(random.uniform(0.3, 0.5))
|
||||||
await asyncio.sleep(1)
|
await page.keyboard.press("Enter")
|
||||||
|
|
||||||
|
xiaohongshu_logger.success(f" [-] 定时发布时间设置完成: {publish_date_str}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
xiaohongshu_logger.error(f" [-] 设置定时发布时间失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
async def upload_images(self, page):
|
async def upload_images(self, page):
|
||||||
"""上传图片"""
|
"""上传图片"""
|
||||||
@ -480,50 +488,26 @@ class XiaoHongShuImage(object):
|
|||||||
xiaohongshu_logger.info(f'[+] 正在上传图片,共{len(self.image_paths)}张')
|
xiaohongshu_logger.info(f'[+] 正在上传图片,共{len(self.image_paths)}张')
|
||||||
|
|
||||||
# 等待页面加载
|
# 等待页面加载
|
||||||
await asyncio.sleep(3)
|
|
||||||
|
|
||||||
# 查找上传元素(简化选择器,移除详细日志)
|
|
||||||
upload_selectors = [
|
|
||||||
"input[class='upload-input'][type='file'][multiple]",
|
|
||||||
"input[accept='.jpg,.jpeg,.png,.webp']",
|
|
||||||
"input[type='file'][multiple]",
|
|
||||||
"input[type='file']",
|
|
||||||
]
|
|
||||||
|
|
||||||
upload_input = None
|
|
||||||
for selector in upload_selectors:
|
|
||||||
try:
|
|
||||||
upload_input = await page.wait_for_selector(selector, timeout=3000)
|
|
||||||
if upload_input:
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not upload_input:
|
|
||||||
# 尝试点击上传按钮
|
|
||||||
try:
|
|
||||||
upload_button = await page.wait_for_selector('button:has-text("上传图片")', timeout=3000)
|
|
||||||
if not upload_button:
|
|
||||||
upload_button = await page.wait_for_selector('div:has-text("上传图片")', timeout=3000)
|
|
||||||
|
|
||||||
if upload_button:
|
|
||||||
await upload_button.click()
|
|
||||||
await asyncio.sleep(2)
|
|
||||||
upload_input = await page.wait_for_selector("input[type='file']", timeout=3000)
|
|
||||||
|
|
||||||
if not upload_input:
|
|
||||||
raise Exception("未找到图片上传元素")
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"图片上传失败: {e}")
|
|
||||||
|
|
||||||
# 上传图片(显示文件名而不是完整路径)
|
|
||||||
file_names = [Path(p).name for p in self.image_paths]
|
|
||||||
xiaohongshu_logger.info(f" [-] 上传文件: {', '.join(file_names)}")
|
|
||||||
await upload_input.set_input_files(self.image_paths)
|
|
||||||
|
|
||||||
# 等待上传完成
|
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
await self.wait_for_images_upload_complete(page)
|
|
||||||
|
try:
|
||||||
|
# 直接定位上传输入框
|
||||||
|
upload_input = await page.wait_for_selector("input[type='file']", timeout=5000)
|
||||||
|
if not upload_input:
|
||||||
|
raise Exception("未找到图片上传元素")
|
||||||
|
|
||||||
|
# 上传图片
|
||||||
|
file_names = [Path(p).name for p in self.image_paths]
|
||||||
|
xiaohongshu_logger.info(f" [-] 上传文件: {', '.join(file_names)}")
|
||||||
|
await upload_input.set_input_files(self.image_paths)
|
||||||
|
|
||||||
|
# 等待上传完成
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
await self.wait_for_images_upload_complete(page)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
xiaohongshu_logger.error(f"图片上传失败: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
async def wait_for_images_upload_complete(self, page):
|
async def wait_for_images_upload_complete(self, page):
|
||||||
"""等待图片上传完成"""
|
"""等待图片上传完成"""
|
||||||
@ -532,58 +516,39 @@ class XiaoHongShuImage(object):
|
|||||||
|
|
||||||
while wait_count < max_wait_time:
|
while wait_count < max_wait_time:
|
||||||
try:
|
try:
|
||||||
# 简化检查逻辑,移除详细日志
|
|
||||||
# 检查添加按钮
|
|
||||||
add_selectors = [
|
|
||||||
'div.entry:has-text("添加")',
|
|
||||||
'div:has-text("添加")',
|
|
||||||
'[class*="add"]:has-text("添加")'
|
|
||||||
]
|
|
||||||
|
|
||||||
for selector in add_selectors:
|
|
||||||
try:
|
|
||||||
add_button = await page.query_selector(selector)
|
|
||||||
if add_button:
|
|
||||||
xiaohongshu_logger.success(" [-] 图片上传完成")
|
|
||||||
return
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 检查图片预览
|
# 检查图片预览
|
||||||
try:
|
images = await page.query_selector_all('img')
|
||||||
images = await page.query_selector_all('img')
|
valid_images = [img for img in images if await img.get_attribute('src')]
|
||||||
valid_images = []
|
|
||||||
for img in images:
|
|
||||||
src = await img.get_attribute('src')
|
|
||||||
if src and ('data:image' in src or 'blob:' in src or len(src) > 50):
|
|
||||||
valid_images.append(img)
|
|
||||||
|
|
||||||
if len(valid_images) >= len(self.image_paths):
|
|
||||||
xiaohongshu_logger.success(f" [-] 图片上传完成 ({len(valid_images)}张)")
|
|
||||||
await asyncio.sleep(2)
|
|
||||||
return
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# 检查加载状态
|
if len(valid_images) >= len(self.image_paths):
|
||||||
loading_elements = await page.query_selector_all('[class*="loading"], [class*="uploading"]')
|
xiaohongshu_logger.success(f" [-] 图片上传完成 ({len(valid_images)}张)")
|
||||||
if not loading_elements:
|
# 随机等待一小段时间确保图片完全加载
|
||||||
xiaohongshu_logger.success(" [-] 图片上传完成")
|
await asyncio.sleep(random.uniform(1.5, 2.5))
|
||||||
return
|
return
|
||||||
|
|
||||||
# 减少日志频率:每15秒输出一次进度
|
# 检查是否还在上传
|
||||||
if wait_count % 15 == 0 and wait_count > 0:
|
loading = await page.query_selector('[class*="loading"], [class*="uploading"]')
|
||||||
|
if not loading:
|
||||||
|
# 再次检查图片数量
|
||||||
|
images = await page.query_selector_all('img')
|
||||||
|
if len(images) >= len(self.image_paths):
|
||||||
|
xiaohongshu_logger.success(" [-] 图片上传完成")
|
||||||
|
await asyncio.sleep(random.uniform(1.0, 2.0))
|
||||||
|
return
|
||||||
|
|
||||||
|
# 每10秒输出一次进度
|
||||||
|
if wait_count % 10 == 0:
|
||||||
xiaohongshu_logger.info(f" [-] 等待图片上传... ({wait_count}/{max_wait_time}秒)")
|
xiaohongshu_logger.info(f" [-] 等待图片上传... ({wait_count}/{max_wait_time}秒)")
|
||||||
|
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(random.uniform(2.0, 3.0))
|
||||||
wait_count += 3
|
wait_count += 2
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
xiaohongshu_logger.debug(f" [-] 检查上传状态出错: {e}")
|
xiaohongshu_logger.error(f" [-] 检查上传状态出错: {e}")
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(random.uniform(1.0, 2.0))
|
||||||
wait_count += 3
|
wait_count += 2
|
||||||
|
|
||||||
xiaohongshu_logger.warning(" [-] 图片上传等待超时,继续流程")
|
raise Exception("图片上传超时")
|
||||||
|
|
||||||
async def locate_content_editor(self, page):
|
async def locate_content_editor(self, page):
|
||||||
"""定位正文编辑区域"""
|
"""定位正文编辑区域"""
|
||||||
@ -615,105 +580,78 @@ class XiaoHongShuImage(object):
|
|||||||
"""填充标题和内容"""
|
"""填充标题和内容"""
|
||||||
xiaohongshu_logger.info(f' [-] 正在填充标题和话题...')
|
xiaohongshu_logger.info(f' [-] 正在填充标题和话题...')
|
||||||
|
|
||||||
# 使用传入的人类化输入包装器(避免重复创建)
|
# 等待页面加载
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
# 填充标题
|
# 填充标题
|
||||||
title_container = page.locator('div.plugin.title-container').locator('input.d-text')
|
title_container = page.locator('div.plugin.title-container').locator('input.d-text')
|
||||||
if await title_container.count():
|
if await title_container.count():
|
||||||
# 使用人类化输入填充标题
|
# 使用人类化输入填充标题
|
||||||
success = await human_typer.type_text_human(
|
await title_container.click()
|
||||||
'div.plugin.title-container input.d-text',
|
await asyncio.sleep(0.5)
|
||||||
self.title[:30],
|
await page.keyboard.press("Control+A")
|
||||||
clear_first=True
|
await page.keyboard.press("Delete")
|
||||||
)
|
await asyncio.sleep(0.3)
|
||||||
|
|
||||||
if not success:
|
# 使用视频上传中的标题输入方式
|
||||||
xiaohongshu_logger.warning("标题人类化输入失败,使用传统方式")
|
for char in self.title[:30]:
|
||||||
await title_container.fill(self.title[:30])
|
await page.keyboard.type(char, delay=random.randint(100, 200))
|
||||||
|
await asyncio.sleep(random.uniform(0.05, 0.15))
|
||||||
|
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
else:
|
else:
|
||||||
# 使用人类化输入的备用方案
|
# 使用备用方案
|
||||||
success = await human_typer.type_text_human(".notranslate", self.title, clear_first=True)
|
titlecontainer = page.locator(".notranslate")
|
||||||
if not success:
|
await titlecontainer.click()
|
||||||
xiaohongshu_logger.warning("标题人类化输入失败,使用传统方式")
|
await asyncio.sleep(0.5)
|
||||||
titlecontainer = page.locator(".notranslate")
|
await page.keyboard.press("Control+A")
|
||||||
await titlecontainer.click()
|
await page.keyboard.press("Delete")
|
||||||
await page.keyboard.press("Backspace")
|
await asyncio.sleep(0.3)
|
||||||
await page.keyboard.press("Control+KeyA")
|
|
||||||
await page.keyboard.press("Delete")
|
for char in self.title:
|
||||||
await page.keyboard.type(self.title)
|
await page.keyboard.type(char, delay=random.randint(100, 200))
|
||||||
await page.keyboard.press("Enter")
|
await asyncio.sleep(random.uniform(0.05, 0.15))
|
||||||
|
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
# 定位正文编辑区域
|
# 定位正文编辑区域
|
||||||
content_element, css_selector = await self.locate_content_editor(page)
|
content_element, css_selector = await self.locate_content_editor(page)
|
||||||
|
|
||||||
# 🔧 创建专门用于正文输入的人类化输入包装器
|
|
||||||
from utils.human_typing_wrapper import HumanTypingWrapper
|
|
||||||
|
|
||||||
# 根据正文长度调整输入速度配置
|
|
||||||
content_length = len(self.content) if self.content else len(self.title) + 2
|
|
||||||
|
|
||||||
# 为长文本使用更慢的输入速度,提高真实性
|
|
||||||
if content_length > 100:
|
|
||||||
# 长文本:更慢更谨慎
|
|
||||||
content_config = {
|
|
||||||
'min_delay': 80, # 最小延迟80ms
|
|
||||||
'max_delay': 200, # 最大延迟200ms
|
|
||||||
'pause_probability': 0.15, # 15%概率暂停思考
|
|
||||||
'pause_min': 800, # 暂停最少800ms
|
|
||||||
'pause_max': 2000, # 暂停最多2秒
|
|
||||||
'correction_probability': 0.02, # 2%概率打错字
|
|
||||||
'backspace_probability': 0.01, # 1%概率退格重输
|
|
||||||
}
|
|
||||||
xiaohongshu_logger.info(f" [-] 长文本模式 ({content_length}字符),使用慢速人类化输入")
|
|
||||||
else:
|
|
||||||
# 短文本:相对较快但仍然人类化
|
|
||||||
content_config = {
|
|
||||||
'min_delay': 60, # 最小延迟60ms
|
|
||||||
'max_delay': 150, # 最大延迟150ms
|
|
||||||
'pause_probability': 0.1, # 10%概率暂停
|
|
||||||
'pause_min': 500, # 暂停最少500ms
|
|
||||||
'pause_max': 1200, # 暂停最多1.2秒
|
|
||||||
'correction_probability': 0.01, # 1%概率打错字
|
|
||||||
'backspace_probability': 0.005, # 0.5%概率退格
|
|
||||||
}
|
|
||||||
xiaohongshu_logger.info(f" [-] 短文本模式 ({content_length}字符),使用标准人类化输入")
|
|
||||||
|
|
||||||
# 创建专门的正文输入器
|
|
||||||
content_typer = HumanTypingWrapper(page, content_config)
|
|
||||||
|
|
||||||
# 准备正文内容
|
# 准备正文内容
|
||||||
if self.content:
|
if self.content:
|
||||||
# 如果有自定义正文内容,使用自定义内容
|
|
||||||
content_text = self.content
|
content_text = self.content
|
||||||
xiaohongshu_logger.info(f" [-] 使用自定义正文内容,长度: {len(content_text)} 字符")
|
xiaohongshu_logger.info(f" [-] 使用自定义正文内容,长度: {len(content_text)} 字符")
|
||||||
else:
|
else:
|
||||||
# 如果没有自定义内容,使用标题作为开头
|
|
||||||
content_text = f"{self.title}\n\n"
|
content_text = f"{self.title}\n\n"
|
||||||
xiaohongshu_logger.info(" [-] 使用默认正文内容(标题)")
|
xiaohongshu_logger.info(" [-] 使用默认正文内容(标题)")
|
||||||
|
|
||||||
# 🔧 使用优化的人类化输入正文
|
try:
|
||||||
xiaohongshu_logger.info(f" [-] 开始人类化输入正文内容...")
|
# 使用增强版人类输入模拟器
|
||||||
|
from utils.enhanced_human_typing import EnhancedHumanTypingSimulator
|
||||||
# 对于长文本,分段输入更加真实
|
human_typer = EnhancedHumanTypingSimulator(page)
|
||||||
if content_length > 200:
|
|
||||||
xiaohongshu_logger.info(" [-] 长文本分段输入模式")
|
|
||||||
success = await self._input_long_content_in_segments(page, content_typer, css_selector, content_text)
|
|
||||||
else:
|
|
||||||
# 短文本直接输入
|
|
||||||
success = await content_typer.type_text_human(
|
|
||||||
css_selector,
|
|
||||||
content_text,
|
|
||||||
clear_first=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
xiaohongshu_logger.warning(" [-] 正文人类化输入失败,使用传统方式")
|
|
||||||
await content_element.click()
|
|
||||||
await asyncio.sleep(0.5) # 点击后稍作等待
|
|
||||||
|
|
||||||
# 传统方式也要模拟人类输入速度
|
# 输入正文内容
|
||||||
xiaohongshu_logger.info(" [-] 使用传统方式进行人类化输入...")
|
success = await human_typer.type_text(content_text, css_selector)
|
||||||
await self._fallback_human_typing(page, content_text)
|
|
||||||
|
if not success:
|
||||||
|
xiaohongshu_logger.error(" [-] 增强版输入失败,尝试使用备用方案")
|
||||||
|
# 点击并清空输入区域
|
||||||
|
await content_element.click()
|
||||||
|
await asyncio.sleep(random.uniform(0.3, 0.5))
|
||||||
|
await page.keyboard.press("Control+A")
|
||||||
|
await page.keyboard.press("Delete")
|
||||||
|
await asyncio.sleep(random.uniform(0.2, 0.4))
|
||||||
|
|
||||||
|
# 使用简单的输入方式
|
||||||
|
for char in content_text:
|
||||||
|
await page.keyboard.type(char, delay=random.randint(100, 200))
|
||||||
|
await asyncio.sleep(random.uniform(0.05, 0.1))
|
||||||
|
|
||||||
|
xiaohongshu_logger.success(f" [-] 正文输入完成,共 {len(content_text)} 字符")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
xiaohongshu_logger.error(f" [-] 正文输入失败: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
xiaohongshu_logger.success(f" [-] 正文输入完成,共 {len(content_text)} 字符")
|
xiaohongshu_logger.success(f" [-] 正文输入完成,共 {len(content_text)} 字符")
|
||||||
|
|
||||||
@ -732,50 +670,72 @@ class XiaoHongShuImage(object):
|
|||||||
# 添加两个换行,将标签与正文分开
|
# 添加两个换行,将标签与正文分开
|
||||||
await page.keyboard.press("Enter")
|
await page.keyboard.press("Enter")
|
||||||
await page.keyboard.press("Enter")
|
await page.keyboard.press("Enter")
|
||||||
await asyncio.sleep(0.3)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
# 标签输入(参考视频标签添加方式)
|
# 输入标签
|
||||||
xiaohongshu_logger.info(f" [-] 开始输入标签 ({len(self.tags)}个)...")
|
xiaohongshu_logger.info(f" [-] 开始输入标签 ({len(self.tags)}个)...")
|
||||||
|
|
||||||
# 创建标签输入器
|
# 创建专门用于慢速标签输入的人类化输入包装器
|
||||||
from utils.human_typing_wrapper import HumanTypingWrapper
|
from utils.human_typing_wrapper import HumanTypingWrapper
|
||||||
tag_config = {
|
|
||||||
'min_delay': 400, 'max_delay': 700, 'pause_probability': 0.25,
|
|
||||||
'pause_min': 400, 'pause_max': 1000, 'correction_probability': 0.02,
|
|
||||||
'backspace_probability': 0.01,
|
|
||||||
}
|
|
||||||
tag_typer = HumanTypingWrapper(page, tag_config)
|
|
||||||
|
|
||||||
# 输入标签(简化日志)
|
slow_config = {
|
||||||
|
'min_delay': 500, # 最小延迟150ms(更慢)
|
||||||
|
'max_delay': 800, # 最大延迟300ms
|
||||||
|
'pause_probability': 0.3, # 30%概率暂停
|
||||||
|
'pause_min': 500, # 暂停最少500ms
|
||||||
|
'pause_max': 1200, # 暂停最多1200ms
|
||||||
|
'correction_probability': 0.0, # 禁用错误修正
|
||||||
|
'backspace_probability': 0.0, # 禁用退格重输
|
||||||
|
}
|
||||||
|
|
||||||
|
# 创建专门的慢速输入器
|
||||||
|
slow_typer = HumanTypingWrapper(page, slow_config)
|
||||||
|
|
||||||
|
# 逐个标签输入,每个标签后都有停顿
|
||||||
success = True
|
success = True
|
||||||
for i, tag in enumerate(self.tags):
|
for i, tag in enumerate(self.tags):
|
||||||
# 标签间思考时间
|
tag_text = f"#{tag}"
|
||||||
if i > 0:
|
|
||||||
import random
|
|
||||||
await asyncio.sleep(random.uniform(0.8, 1.5))
|
|
||||||
|
|
||||||
# 输入标签
|
# 输入标签文本(使用慢速配置)
|
||||||
tag_success = await tag_typer.type_text_human(
|
# 先输入#号(需要按Shift+3)
|
||||||
css_selector, f"#{tag}", clear_first=False
|
await page.keyboard.press("Shift")
|
||||||
|
await asyncio.sleep(random.uniform(0.1, 0.2))
|
||||||
|
await page.keyboard.press("Digit3")
|
||||||
|
await page.keyboard.up("Shift")
|
||||||
|
await asyncio.sleep(random.uniform(0.2, 0.4))
|
||||||
|
|
||||||
|
# 输入标签内容
|
||||||
|
tag_success = await slow_typer.type_text_human(
|
||||||
|
css_selector,
|
||||||
|
tag,
|
||||||
|
clear_first=False
|
||||||
)
|
)
|
||||||
|
|
||||||
if not tag_success:
|
if not tag_success:
|
||||||
success = False
|
success = False
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# 输入换行符并添加停顿
|
||||||
|
await page.keyboard.press("Enter")
|
||||||
|
await page.wait_for_timeout(800) # 换行后停顿800ms
|
||||||
|
|
||||||
# 处理标签建议
|
xiaohongshu_logger.info(f"已输入标签: {tag} ({i+1}/{len(self.tags)})")
|
||||||
await self._handle_tag_suggestions_after_input(page, tag)
|
|
||||||
|
|
||||||
# 标签间分隔
|
|
||||||
if i < len(self.tags) - 1:
|
|
||||||
await page.keyboard.type(" ")
|
|
||||||
import random
|
|
||||||
await asyncio.sleep(random.uniform(0.2, 0.5))
|
|
||||||
|
|
||||||
# 备用输入方式
|
|
||||||
if not success:
|
if not success:
|
||||||
xiaohongshu_logger.warning(" [-] 使用备用标签输入方式")
|
xiaohongshu_logger.warning("标签人类化输入失败,使用传统方式")
|
||||||
await self._fallback_tag_input(page, css_selector)
|
await page.click(css_selector)
|
||||||
|
for index, tag in enumerate(self.tags, start=1):
|
||||||
|
# 输入#号(需要按Shift+3)
|
||||||
|
await page.keyboard.press("Shift")
|
||||||
|
await asyncio.sleep(random.uniform(0.1, 0.2))
|
||||||
|
await page.keyboard.press("Digit3")
|
||||||
|
await page.keyboard.up("Shift")
|
||||||
|
await asyncio.sleep(random.uniform(0.2, 0.4))
|
||||||
|
|
||||||
|
for char in tag:
|
||||||
|
await page.keyboard.type(char, delay=500)
|
||||||
|
await page.wait_for_timeout(1000)
|
||||||
|
await page.keyboard.press("Enter")
|
||||||
|
|
||||||
xiaohongshu_logger.success(f' [-] 标签输入完成 ({len(self.tags)}个)')
|
xiaohongshu_logger.success(f' [-] 标签输入完成 ({len(self.tags)}个)')
|
||||||
|
|
||||||
@ -856,9 +816,19 @@ class XiaoHongShuImage(object):
|
|||||||
|
|
||||||
# 段落间添加换行和思考时间
|
# 段落间添加换行和思考时间
|
||||||
if i < len(paragraphs):
|
if i < len(paragraphs):
|
||||||
|
# 先停顿一下,模拟思考下一段内容
|
||||||
|
await asyncio.sleep(random.uniform(2.0, 4.0))
|
||||||
|
|
||||||
|
# 输入第一个换行,短暂停顿
|
||||||
await page.keyboard.press("Enter")
|
await page.keyboard.press("Enter")
|
||||||
|
await asyncio.sleep(random.uniform(0.3, 0.6))
|
||||||
|
|
||||||
|
# 输入第二个换行,再次短暂停顿
|
||||||
await page.keyboard.press("Enter")
|
await page.keyboard.press("Enter")
|
||||||
await asyncio.sleep(random.uniform(1.0, 3.0))
|
await asyncio.sleep(random.uniform(0.5, 1.0))
|
||||||
|
|
||||||
|
# 段落间再次思考
|
||||||
|
await asyncio.sleep(random.uniform(1.5, 3.0))
|
||||||
|
|
||||||
xiaohongshu_logger.success(" [-] 分段输入完成")
|
xiaohongshu_logger.success(" [-] 分段输入完成")
|
||||||
return True
|
return True
|
||||||
@ -1135,64 +1105,24 @@ class XiaoHongShuImage(object):
|
|||||||
xiaohongshu_logger.info(f" [-] 开始设置地理位置: {location}")
|
xiaohongshu_logger.info(f" [-] 开始设置地理位置: {location}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. 点击地点输入框
|
# 定位并点击地点选择框
|
||||||
xiaohongshu_logger.info(" [-] 点击地点输入框...")
|
location_input = await page.wait_for_selector('div[class*="d-select"]', timeout=3000)
|
||||||
selectors = [
|
await location_input.click()
|
||||||
'div.d-select--color-text-title--color-bg-fill',
|
await asyncio.sleep(random.uniform(0.3, 0.5))
|
||||||
'div.d-text.d-select-placeholder.d-text-ellipsis.d-text-nowrap',
|
|
||||||
'div[class*="d-select"]'
|
|
||||||
]
|
|
||||||
|
|
||||||
clicked = False
|
# 输入地点名称
|
||||||
for selector in selectors:
|
|
||||||
try:
|
|
||||||
element = await page.wait_for_selector(selector, timeout=3000)
|
|
||||||
await element.click()
|
|
||||||
clicked = True
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not clicked:
|
|
||||||
xiaohongshu_logger.error(" [-] 未找到地点输入框")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 2. 输入地点名称
|
|
||||||
xiaohongshu_logger.info(f" [-] 输入地点名称: {location}")
|
|
||||||
await page.keyboard.press("Control+a")
|
|
||||||
await page.keyboard.type(location)
|
await page.keyboard.type(location)
|
||||||
await asyncio.sleep(2) # 等待下拉选项加载
|
await asyncio.sleep(random.uniform(1.0, 1.5)) # 等待下拉列表加载
|
||||||
|
|
||||||
# 3. 选择匹配的地点选项
|
# 选择第一个匹配的选项
|
||||||
xiaohongshu_logger.info(" [-] 查找匹配的地点选项...")
|
option = await page.wait_for_selector(f'div[class*="name"]:has-text("{location}")', timeout=3000)
|
||||||
|
if option:
|
||||||
|
await option.click()
|
||||||
|
xiaohongshu_logger.success(f" [-] 成功选择地点: {location}")
|
||||||
|
return True
|
||||||
|
|
||||||
# 尝试多种选择器找到包含地点名称的选项
|
xiaohongshu_logger.warning(f" [-] 未找到匹配的地点选项: {location}")
|
||||||
option_selectors = [
|
return False
|
||||||
f'//div[contains(@class, "name") and contains(text(), "{location}")]',
|
|
||||||
f'//div[contains(text(), "{location}市")]',
|
|
||||||
f'//div[contains(text(), "{location}")]'
|
|
||||||
]
|
|
||||||
|
|
||||||
selected = False
|
|
||||||
for selector in option_selectors:
|
|
||||||
try:
|
|
||||||
options = await page.query_selector_all(selector)
|
|
||||||
if options:
|
|
||||||
# 选择第一个匹配的选项
|
|
||||||
option = options[0]
|
|
||||||
option_text = await option.inner_text()
|
|
||||||
await option.click()
|
|
||||||
xiaohongshu_logger.success(f" [-] 成功选择地点: {option_text}")
|
|
||||||
selected = True
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not selected:
|
|
||||||
xiaohongshu_logger.warning(f" [-] 未找到匹配的地点选项: {location}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
xiaohongshu_logger.error(f" [-] 设置地理位置失败: {e}")
|
xiaohongshu_logger.error(f" [-] 设置地理位置失败: {e}")
|
||||||
@ -1200,118 +1130,98 @@ class XiaoHongShuImage(object):
|
|||||||
|
|
||||||
async def upload(self, playwright: Playwright) -> None:
|
async def upload(self, playwright: Playwright) -> None:
|
||||||
"""主要的上传流程"""
|
"""主要的上传流程"""
|
||||||
# 🔧 使用增强的反检测浏览器配置
|
|
||||||
from utils.anti_detection import AntiDetectionConfig
|
|
||||||
import random
|
|
||||||
|
|
||||||
# 反检测浏览器参数
|
|
||||||
browser_args = AntiDetectionConfig.STANDARD_BROWSER_ARGS.copy()
|
|
||||||
|
|
||||||
# 使用 Chromium 浏览器启动一个浏览器实例
|
# 使用 Chromium 浏览器启动一个浏览器实例
|
||||||
if self.local_executable_path:
|
if self.local_executable_path:
|
||||||
browser = await playwright.chromium.launch(
|
browser = await playwright.chromium.launch(
|
||||||
headless=self.headless,
|
headless=self.headless,
|
||||||
executable_path=self.local_executable_path,
|
executable_path=self.local_executable_path
|
||||||
args=browser_args # 🔧 添加反检测参数
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
browser = await playwright.chromium.launch(
|
browser = await playwright.chromium.launch(
|
||||||
headless=self.headless,
|
headless=self.headless
|
||||||
args=browser_args # 🔧 添加反检测参数
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 🔧 创建增强的浏览器上下文
|
|
||||||
context_options = {
|
|
||||||
"storage_state": f"{self.account_file}",
|
|
||||||
"locale": "zh-CN",
|
|
||||||
"timezone_id": "Asia/Shanghai"
|
|
||||||
}
|
|
||||||
|
|
||||||
# 🔧 为无头模式添加完整的反检测设置
|
# 创建一个浏览器上下文,使用基本配置
|
||||||
if self.headless:
|
context = await browser.new_context(
|
||||||
context_options.update({
|
viewport={"width": 1600, "height": 900},
|
||||||
'viewport': {'width': 1920, 'height': 1080}, # 🔧 使用文档建议的分辨率
|
storage_state=f"{self.account_file}"
|
||||||
'device_scale_factor': 1,
|
)
|
||||||
'has_touch': False,
|
|
||||||
'is_mobile': False
|
|
||||||
})
|
|
||||||
|
|
||||||
# 使用随机用户代理
|
|
||||||
user_agent = random.choice(AntiDetectionConfig.REAL_USER_AGENTS)
|
|
||||||
context_options["user_agent"] = user_agent
|
|
||||||
xiaohongshu_logger.info(f" [-] 无头模式设置: 1920x1080")
|
|
||||||
xiaohongshu_logger.info(f" [-] 使用用户代理: {user_agent[:50]}...")
|
|
||||||
else:
|
|
||||||
# 有头模式使用较小的窗口
|
|
||||||
context_options["viewport"] = {"width": 1600, "height": 900}
|
|
||||||
xiaohongshu_logger.info(f" [-] 有头模式设置: 1600x900")
|
|
||||||
|
|
||||||
context = await browser.new_context(**context_options)
|
|
||||||
context = await set_init_script(context)
|
context = await set_init_script(context)
|
||||||
|
|
||||||
# 创建一个新的页面
|
# 创建一个新的页面
|
||||||
page = await context.new_page()
|
page = await context.new_page()
|
||||||
|
|
||||||
# 🔧 创建人类化输入包装器(关键修复)
|
# 🔧 创建人类化输入包装器(关键修复)
|
||||||
human_typer = create_human_typer(page)
|
if self.use_enhanced_typing:
|
||||||
xiaohongshu_logger.info(" [-] 已创建人类化输入包装器")
|
from utils.enhanced_human_typing import EnhancedHumanTypingSimulator
|
||||||
|
human_typer = EnhancedHumanTypingSimulator(page)
|
||||||
# 直接访问小红书图文发布页面
|
xiaohongshu_logger.info(" [-] 已创建增强版人类化输入模拟器")
|
||||||
await page.goto("https://creator.xiaohongshu.com/publish/publish?from=tab_switch&target=image")
|
|
||||||
xiaohongshu_logger.info(f'[+]正在上传图文-------{self.title}')
|
|
||||||
|
|
||||||
# 等待页面加载
|
|
||||||
xiaohongshu_logger.info(f'[-] 正在打开图文发布页面...')
|
|
||||||
await page.wait_for_url("https://creator.xiaohongshu.com/publish/publish*")
|
|
||||||
|
|
||||||
# 上传图片
|
|
||||||
await self.upload_images(page)
|
|
||||||
|
|
||||||
# 填充内容(传递人类化输入包装器)
|
|
||||||
await self.fill_content(page, human_typer)
|
|
||||||
|
|
||||||
# 设置位置(如果有指定地点)
|
|
||||||
if self.location and self.location.strip():
|
|
||||||
xiaohongshu_logger.info(f" [-] 开始设置地理位置: {self.location}")
|
|
||||||
await self.set_location(page, self.location)
|
|
||||||
else:
|
else:
|
||||||
xiaohongshu_logger.info(" [-] 未指定地点或地点为空,跳过位置设置")
|
human_typer = create_human_typer(page)
|
||||||
|
xiaohongshu_logger.info(" [-] 已创建标准人类化输入包装器")
|
||||||
|
|
||||||
# 设置定时发布(如果需要)
|
try:
|
||||||
if self.publish_date != 0:
|
# 直接访问小红书图文发布页面
|
||||||
await self.set_schedule_time_xiaohongshu(page, self.publish_date)
|
await page.goto("https://creator.xiaohongshu.com/publish/publish?from=tab_switch&target=image")
|
||||||
|
xiaohongshu_logger.info(f'[+]正在上传图文-------{self.title}')
|
||||||
|
|
||||||
|
# 等待页面加载
|
||||||
|
xiaohongshu_logger.info(f'[-] 正在打开图文发布页面...')
|
||||||
|
await page.wait_for_url("https://creator.xiaohongshu.com/publish/publish*")
|
||||||
|
await asyncio.sleep(2) # 等待页面完全加载
|
||||||
|
|
||||||
|
# 上传图片
|
||||||
|
await self.upload_images(page)
|
||||||
|
|
||||||
|
# 等待页面稳定
|
||||||
|
await asyncio.sleep(3) # 增加等待时间,确保页面稳定
|
||||||
|
|
||||||
|
# 填充内容(传递人类化输入包装器)
|
||||||
|
await self.fill_content(page, human_typer)
|
||||||
|
|
||||||
|
# 设置位置(如果有指定地点)
|
||||||
|
if self.location and self.location.strip():
|
||||||
|
xiaohongshu_logger.info(f" [-] 开始设置地理位置: {self.location}")
|
||||||
|
await self.set_location(page, self.location)
|
||||||
|
else:
|
||||||
|
xiaohongshu_logger.info(" [-] 未指定地点或地点为空,跳过位置设置")
|
||||||
|
|
||||||
|
# 设置定时发布(如果需要)
|
||||||
|
if self.publish_date != 0:
|
||||||
|
await self.set_schedule_time_xiaohongshu(page, self.publish_date)
|
||||||
|
except Exception as e:
|
||||||
|
xiaohongshu_logger.error(f"页面操作出错: {e}")
|
||||||
|
# 保存页面截图以便调试
|
||||||
|
await page.screenshot(path="error_screenshot.png", full_page=True)
|
||||||
|
raise
|
||||||
|
|
||||||
# 发布图文(增强反检测等待策略)
|
# 发布图文
|
||||||
xiaohongshu_logger.info(" [-] 准备发布图文...")
|
xiaohongshu_logger.info(" [-] 准备发布图文...")
|
||||||
await asyncio.sleep(1) # 发布前等待
|
await asyncio.sleep(random.uniform(0.5, 1.0)) # 发布前等待
|
||||||
|
|
||||||
while True:
|
try:
|
||||||
try:
|
# 定位并点击发布按钮
|
||||||
# 等待并点击发布按钮
|
button_text = "定时发布" if self.publish_date != 0 else "发布"
|
||||||
if self.publish_date != 0:
|
publish_button = await page.wait_for_selector(f'button:has-text("{button_text}")', timeout=3000)
|
||||||
xiaohongshu_logger.info(" [-] 点击定时发布按钮...")
|
|
||||||
await page.locator('button:has-text("定时发布")').click()
|
if not publish_button:
|
||||||
else:
|
raise Exception(f"未找到{button_text}按钮")
|
||||||
xiaohongshu_logger.info(" [-] 点击发布按钮...")
|
|
||||||
await page.locator('button:has-text("发布")').click()
|
# 点击发布按钮
|
||||||
|
await publish_button.click()
|
||||||
# 增加发布后的等待时间
|
await asyncio.sleep(random.uniform(0.5, 1.0))
|
||||||
await asyncio.sleep(1)
|
|
||||||
|
# 等待发布成功
|
||||||
await page.wait_for_url(
|
success_url = "https://creator.xiaohongshu.com/publish/success"
|
||||||
"https://creator.xiaohongshu.com/publish/success?**",
|
await page.wait_for_url(f"{success_url}?**", timeout=5000)
|
||||||
timeout=5000 # 增加超时时间到5秒
|
|
||||||
)
|
xiaohongshu_logger.success(" [-] 图文发布成功")
|
||||||
xiaohongshu_logger.success(" [-]图文发布成功")
|
|
||||||
break
|
except Exception as e:
|
||||||
except Exception as e:
|
xiaohongshu_logger.error(f" [-] 发布失败: {e}")
|
||||||
xiaohongshu_logger.info(" [-] 图文正在发布中...")
|
# 保存错误截图
|
||||||
xiaohongshu_logger.debug(f" [-] 等待详情: {str(e)}")
|
await page.screenshot(path="publish_error.png", full_page=True)
|
||||||
await page.screenshot(full_page=True)
|
raise
|
||||||
# 使用随机等待时间,模拟人类行为
|
|
||||||
import random
|
|
||||||
wait_time = random.uniform(1.0, 2.0) # 1-2秒随机等待
|
|
||||||
await asyncio.sleep(wait_time)
|
|
||||||
|
|
||||||
# 保存cookie并关闭浏览器
|
# 保存cookie并关闭浏览器
|
||||||
await context.storage_state(path=self.account_file)
|
await context.storage_state(path=self.account_file)
|
||||||
|
|||||||
@ -2,64 +2,68 @@ import time
|
|||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import jieba
|
||||||
|
import jieba.posseg as pseg
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
# 初始化结巴分词的词典
|
||||||
|
jieba.initialize()
|
||||||
|
|
||||||
class EnhancedHumanTypingSimulator:
|
class EnhancedHumanTypingSimulator:
|
||||||
def __init__(self, page=None):
|
def __init__(self, page=None):
|
||||||
# 保留原方案的简单配置
|
|
||||||
self.base_config = {
|
|
||||||
'min_typing_speed': 5,
|
|
||||||
'max_typing_speed': 15,
|
|
||||||
'pause_probability': 0.1,
|
|
||||||
'chunk_input': True,
|
|
||||||
'max_chunk_length': 50
|
|
||||||
}
|
|
||||||
|
|
||||||
# 新增高级特性配置
|
|
||||||
self.advanced_config = {
|
|
||||||
# 人类状态模拟
|
|
||||||
'energy_level': random.uniform(0.7, 1.0),
|
|
||||||
'typing_proficiency': random.uniform(0.6, 0.9),
|
|
||||||
'emotion_state': random.uniform(0.8, 1.0),
|
|
||||||
|
|
||||||
# 错误处理
|
|
||||||
'base_error_rate': random.uniform(0.02, 0.05),
|
|
||||||
'error_correction_speed': random.uniform(0.3, 0.8),
|
|
||||||
|
|
||||||
# 速度控制
|
|
||||||
'speed_variance': random.uniform(0.1, 0.2),
|
|
||||||
'burst_speed_probability': 0.1
|
|
||||||
}
|
|
||||||
|
|
||||||
self.page = page
|
self.page = page
|
||||||
self.typing_session = {
|
# 优化配置管理
|
||||||
'start_time': None,
|
self.config = {
|
||||||
'chars_typed': 0,
|
'char_delay': (80, 150), # 减少基础字符延迟
|
||||||
'last_break_time': time.time()
|
'punct_delay': (150, 250), # 减少标点符号延迟
|
||||||
|
'paragraph_pause': (0.5, 1.0), # 减少段落停顿
|
||||||
|
'natural_pause': 0.08, # 降低自然停顿概率
|
||||||
|
'thought_pause': (0.2, 0.4), # 减少思考停顿时间
|
||||||
|
'word_pause': (0.1, 0.25), # 减少词语间停顿
|
||||||
|
'chunk_pause': (0.2, 0.4), # 减少语义块停顿
|
||||||
|
'char_count_pause': (25, 35), # 增加字符计数范围
|
||||||
|
'char_count_delay': (0.1, 0.3), # 减少字符计数停顿
|
||||||
|
'fatigue_threshold': 300, # 增加疲劳阈值
|
||||||
|
'error_rate_base': 0.01, # 降低基础错误率
|
||||||
|
'error_rate_max': 0.05, # 降低最大错误率
|
||||||
|
'distraction_probability': 0.02 # 降低分心概率
|
||||||
|
}
|
||||||
|
|
||||||
|
# 状态管理
|
||||||
|
self.state = {
|
||||||
|
'fatigue': 0.0, # 疲劳度 (0-1)
|
||||||
|
'attention': 1.0, # 注意力 (0-1)
|
||||||
|
'chars_typed': 0, # 已输入字符数
|
||||||
|
'last_break_time': 0, # 上次休息时间
|
||||||
|
'continuous_typing': 0 # 连续输入时间
|
||||||
}
|
}
|
||||||
|
|
||||||
async def type_text(self, text: str, selector: str = None) -> bool:
|
async def type_text(self, text: str, selector: str = None) -> bool:
|
||||||
"""增强版的文本输入方法"""
|
"""优化的文本输入方法"""
|
||||||
try:
|
try:
|
||||||
if selector:
|
if selector:
|
||||||
# 等待并点击元素
|
|
||||||
await self._prepare_input(selector)
|
await self._prepare_input(selector)
|
||||||
|
|
||||||
# 初始化会话
|
# 简单分段
|
||||||
self.typing_session['start_time'] = time.time()
|
paragraphs = text.split('\n\n')
|
||||||
|
|
||||||
# 智能分段
|
for i, paragraph in enumerate(paragraphs):
|
||||||
chunks = self._smart_split_text(text)
|
if not paragraph.strip():
|
||||||
|
continue
|
||||||
for chunk in chunks:
|
|
||||||
# 获取当前状态
|
# 段落输入
|
||||||
current_state = self._get_current_state()
|
await self._type_paragraph(paragraph)
|
||||||
|
|
||||||
# 输入当前段落
|
# 段落间添加换行和思考时间
|
||||||
await self._type_chunk(chunk, current_state)
|
if i < len(paragraphs) - 1:
|
||||||
|
# 段落结束,停顿思考
|
||||||
# 段落间自然停顿
|
await asyncio.sleep(random.uniform(0.5, 1.0))
|
||||||
await self._natural_pause(current_state)
|
# 输入两个换行
|
||||||
|
await self.page.keyboard.press("Enter")
|
||||||
|
await asyncio.sleep(random.uniform(0.1, 0.2))
|
||||||
|
await self.page.keyboard.press("Enter")
|
||||||
|
# 准备输入下一段
|
||||||
|
await asyncio.sleep(random.uniform(0.8, 1.5))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -67,166 +71,330 @@ class EnhancedHumanTypingSimulator:
|
|||||||
print(f"输入文本时出错: {e}")
|
print(f"输入文本时出错: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _smart_split_text(self, text: str) -> List[str]:
|
def _split_text_into_chunks(self, text: str) -> list:
|
||||||
"""智能文本分段"""
|
"""使用结巴分词进行智能分词"""
|
||||||
paragraphs = text.split('\n')
|
|
||||||
chunks = []
|
chunks = []
|
||||||
|
|
||||||
for para in paragraphs:
|
# 使用结巴词性标注
|
||||||
if len(para) <= self.base_config['max_chunk_length']:
|
words = pseg.cut(text)
|
||||||
if para.strip():
|
|
||||||
chunks.append(para)
|
current_chunk = ""
|
||||||
|
for word, flag in words:
|
||||||
|
# 处理标点符号
|
||||||
|
if flag == 'x':
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
chunks.append(word)
|
||||||
|
current_chunk = ""
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 处理空格
|
||||||
|
if word.isspace():
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
chunks.append(word)
|
||||||
|
current_chunk = ""
|
||||||
continue
|
continue
|
||||||
|
|
||||||
sentences = re.split(r'([。!?,:;])', para)
|
# 处理表情符号和特殊字符
|
||||||
current_chunk = ''
|
if re.match(r'[^\u4e00-\u9fff\w\s]', word):
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
chunks.append(word)
|
||||||
|
current_chunk = ""
|
||||||
|
continue
|
||||||
|
|
||||||
for sent in sentences:
|
# 根据词性决定是否需要独立成块
|
||||||
if len(current_chunk) + len(sent) < self.base_config['max_chunk_length']:
|
if flag in ['n', 'v', 'a']: # 名词、动词、形容词
|
||||||
current_chunk += sent
|
# 如果当前词较长,可能需要再次切分
|
||||||
else:
|
if len(word) > 3:
|
||||||
if current_chunk.strip():
|
sub_chunks = self._split_long_word(word)
|
||||||
|
if current_chunk:
|
||||||
chunks.append(current_chunk)
|
chunks.append(current_chunk)
|
||||||
current_chunk = sent
|
chunks.extend(sub_chunks)
|
||||||
|
current_chunk = ""
|
||||||
if current_chunk.strip():
|
else:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
chunks.append(word)
|
||||||
|
current_chunk = ""
|
||||||
|
else:
|
||||||
|
# 对于其他词性,累积到当前块
|
||||||
|
current_chunk += word
|
||||||
|
# 如果累积的块太长,进行切分
|
||||||
|
if len(current_chunk) >= 3:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
current_chunk = ""
|
||||||
|
|
||||||
|
# 添加最后剩余的块
|
||||||
|
if current_chunk:
|
||||||
chunks.append(current_chunk)
|
chunks.append(current_chunk)
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
def _get_current_state(self) -> Dict:
|
def _split_long_word(self, word: str) -> List[str]:
|
||||||
"""获取当前输入状态"""
|
"""处理长词的切分"""
|
||||||
typing_duration = time.time() - self.typing_session['start_time']
|
result = []
|
||||||
fatigue = min(typing_duration / 300, 0.7)
|
temp = ""
|
||||||
|
for char in word:
|
||||||
self.advanced_config['energy_level'] *= (1 - fatigue * 0.1)
|
temp += char
|
||||||
self.advanced_config['emotion_state'] *= random.uniform(0.98, 1.02)
|
if len(temp) == 2: # 按双字切分
|
||||||
|
result.append(temp)
|
||||||
return {
|
temp = ""
|
||||||
'energy_level': max(0.3, self.advanced_config['energy_level']),
|
if temp: # 处理剩余字符
|
||||||
'emotion_state': max(0.4, min(1.0, self.advanced_config['emotion_state'])),
|
result.append(temp)
|
||||||
'typing_proficiency': self.advanced_config['typing_proficiency'],
|
return result
|
||||||
'current_error_rate': self._calculate_error_rate(fatigue)
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _type_chunk(self, chunk: str, state: Dict):
|
def _update_state(self, chars_typed: int = 1):
|
||||||
"""输入文本块"""
|
"""更新状态"""
|
||||||
for char in chunk:
|
current_time = time.time()
|
||||||
typing_speed = self._calculate_typing_speed(state)
|
|
||||||
|
# 更新连续输入时间
|
||||||
if random.random() < state['current_error_rate']:
|
if current_time - self.state['last_break_time'] > 5: # 如果超过5秒没有长停顿
|
||||||
await self._handle_typing_error(char, state)
|
self.state['continuous_typing'] += chars_typed
|
||||||
|
|
||||||
|
# 更新疲劳度
|
||||||
|
fatigue_increase = chars_typed / self.config['fatigue_threshold']
|
||||||
|
self.state['fatigue'] = min(1.0, self.state['fatigue'] + fatigue_increase)
|
||||||
|
|
||||||
|
# 更新注意力
|
||||||
|
if self.state['continuous_typing'] > 100: # 连续输入超过100个字符
|
||||||
|
self.state['attention'] *= 0.95 # 注意力下降
|
||||||
|
|
||||||
|
# 记录字符数
|
||||||
|
self.state['chars_typed'] += chars_typed
|
||||||
|
|
||||||
|
# 检查是否需要休息
|
||||||
|
if self.state['fatigue'] > 0.7 or self.state['attention'] < 0.5:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _take_break(self):
|
||||||
|
"""模拟休息"""
|
||||||
|
self.state['fatigue'] *= 0.5 # 疲劳度减半
|
||||||
|
self.state['attention'] = min(1.0, self.state['attention'] * 1.5) # 注意力恢复
|
||||||
|
self.state['continuous_typing'] = 0 # 重置连续输入
|
||||||
|
self.state['last_break_time'] = time.time() # 更新休息时间
|
||||||
|
|
||||||
|
def _get_current_error_rate(self) -> float:
|
||||||
|
"""获取当前错误率"""
|
||||||
|
base_rate = self.config['error_rate_base']
|
||||||
|
fatigue_factor = self.state['fatigue'] * (self.config['error_rate_max'] - base_rate)
|
||||||
|
attention_factor = (1 - self.state['attention']) * 0.05
|
||||||
|
return min(self.config['error_rate_max'], base_rate + fatigue_factor + attention_factor)
|
||||||
|
|
||||||
|
async def _simulate_error(self, char: str):
|
||||||
|
"""模拟输入错误"""
|
||||||
|
# 随机选择一个错误字符
|
||||||
|
wrong_chars = '的地得了着过去来到和与及' if '\u4e00' <= char <= '\u9fff' else 'asdfjkl;'
|
||||||
|
wrong_char = random.choice(wrong_chars)
|
||||||
|
|
||||||
|
# 输入错误字符
|
||||||
|
await self.page.keyboard.type(wrong_char)
|
||||||
|
await asyncio.sleep(random.uniform(0.2, 0.5)) # 察觉错误的时间
|
||||||
|
|
||||||
|
# 删除错误字符
|
||||||
|
await self.page.keyboard.press('Backspace')
|
||||||
|
await asyncio.sleep(random.uniform(0.1, 0.3)) # 删除后的停顿
|
||||||
|
|
||||||
|
# 输入正确字符
|
||||||
|
await self.page.keyboard.type(char)
|
||||||
|
|
||||||
|
async def _simulate_distraction(self):
|
||||||
|
"""模拟轻微分心"""
|
||||||
|
distraction_time = random.uniform(0.8, 1.5) # 减少分心时间
|
||||||
|
await asyncio.sleep(distraction_time)
|
||||||
|
self._take_break() # 分心也算是一种休息
|
||||||
|
|
||||||
|
async def _type_paragraph(self, paragraph: str):
|
||||||
|
"""优化的段落输入方法"""
|
||||||
|
# 将段落分割成词语块
|
||||||
|
chunks = self._split_text_into_chunks(paragraph)
|
||||||
|
|
||||||
|
# 计算语义块(通常是3-4个词语一组)
|
||||||
|
semantic_chunks = []
|
||||||
|
current_semantic = []
|
||||||
|
word_count = 0
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
current_semantic.append(chunk)
|
||||||
|
if chunk in ',。!?、;:': # 遇到标点就是一个语义块的结束
|
||||||
|
semantic_chunks.append(current_semantic)
|
||||||
|
current_semantic = []
|
||||||
|
word_count = 0
|
||||||
else:
|
else:
|
||||||
await self._type_char(char, typing_speed)
|
word_count += 1
|
||||||
|
if word_count >= random.randint(2, 3): # 2-3个词语组成一个语义块
|
||||||
|
semantic_chunks.append(current_semantic)
|
||||||
|
current_semantic = []
|
||||||
|
word_count = 0
|
||||||
|
|
||||||
|
if current_semantic:
|
||||||
|
semantic_chunks.append(current_semantic)
|
||||||
|
|
||||||
|
# 输入每个语义块
|
||||||
|
for semantic_block in semantic_chunks:
|
||||||
|
# 语义块之前可能停顿思考
|
||||||
|
if random.random() < self.config['natural_pause']:
|
||||||
|
await asyncio.sleep(random.uniform(*self.config['thought_pause']))
|
||||||
|
|
||||||
self.typing_session['chars_typed'] += 1
|
# 输入语义块中的每个词语
|
||||||
await self._micro_pause(state)
|
for chunk in semantic_block:
|
||||||
|
# 检查疲劳状态
|
||||||
def _calculate_typing_speed(self, state: Dict) -> float:
|
if self._update_state(len(chunk)):
|
||||||
"""计算实时打字速度"""
|
# 需要短暂休息
|
||||||
base_speed = random.uniform(
|
await asyncio.sleep(random.uniform(0.5, 1.0)) # 减少休息时间
|
||||||
self.base_config['min_typing_speed'],
|
self._take_break()
|
||||||
self.base_config['max_typing_speed']
|
|
||||||
)
|
# 检查是否轻微分心
|
||||||
|
if random.random() < self.config['distraction_probability'] and len(chunk) > 2:
|
||||||
speed = base_speed * (
|
await self._simulate_distraction()
|
||||||
0.7 + state['energy_level'] * 0.3 +
|
|
||||||
state['emotion_state'] * 0.2 +
|
# 词语输入
|
||||||
state['typing_proficiency'] * 0.3
|
for char in chunk:
|
||||||
)
|
# 检查是否出错(只在疲劳时更容易出错)
|
||||||
|
if self.state['fatigue'] > 0.6: # 只有疲劳时才容易出错
|
||||||
speed *= random.uniform(
|
current_error_rate = self._get_current_error_rate()
|
||||||
1 - self.advanced_config['speed_variance'],
|
if random.random() < current_error_rate:
|
||||||
1 + self.advanced_config['speed_variance']
|
await self._simulate_error(char)
|
||||||
)
|
continue
|
||||||
|
|
||||||
return speed
|
# 正常字符输入
|
||||||
|
if char in ',。!?、;:':
|
||||||
def _calculate_error_rate(self, fatigue: float) -> float:
|
# 标点符号输入
|
||||||
"""计算当前错误率"""
|
delay = random.randint(*self.config['punct_delay'])
|
||||||
base_rate = self.advanced_config['base_error_rate']
|
# 疲劳会增加延迟
|
||||||
error_rate = base_rate * (1 + fatigue)
|
delay = int(delay * (1 + self.state['fatigue'] * 0.5))
|
||||||
error_rate *= random.uniform(0.8, 1.2)
|
await self.page.keyboard.type(char, delay=delay)
|
||||||
return min(error_rate, 0.15)
|
# 标点符号后一定停顿
|
||||||
|
await asyncio.sleep(random.uniform(*self.config['word_pause']))
|
||||||
async def _handle_typing_error(self, char: str, state: Dict):
|
else:
|
||||||
"""处理打字错误"""
|
# 普通字符输入
|
||||||
error_types = ['typo', 'double_hit', 'delay']
|
delay = random.randint(*self.config['char_delay'])
|
||||||
error_type = random.choice(error_types)
|
# 疲劳和注意力影响输入速度
|
||||||
|
delay = int(delay * (1 + self.state['fatigue'] * 0.5 - self.state['attention'] * 0.2))
|
||||||
if error_type == 'typo':
|
await self.page.keyboard.type(char, delay=delay)
|
||||||
wrong_char = self._get_similar_char(char)
|
|
||||||
await self._type_char(wrong_char, self._calculate_typing_speed(state))
|
# 更新状态
|
||||||
await asyncio.sleep(random.uniform(0.2, 0.5))
|
self._update_state()
|
||||||
await self._press_key("Backspace")
|
|
||||||
await self._type_char(char, self._calculate_typing_speed(state))
|
# 词语间停顿
|
||||||
|
pause_time = random.uniform(*self.config['word_pause'])
|
||||||
|
# 疲劳会增加停顿时间
|
||||||
|
pause_time *= (1 + self.state['fatigue'] * 0.3)
|
||||||
|
await asyncio.sleep(pause_time)
|
||||||
|
|
||||||
elif error_type == 'double_hit':
|
# 语义块之间的停顿
|
||||||
await self._type_char(char, self._calculate_typing_speed(state))
|
pause_time = random.uniform(*self.config['chunk_pause'])
|
||||||
await self._type_char(char, self._calculate_typing_speed(state))
|
# 疲劳和注意力影响停顿时间
|
||||||
await asyncio.sleep(random.uniform(0.1, 0.3))
|
pause_time *= (1 + self.state['fatigue'] * 0.5 - self.state['attention'] * 0.2)
|
||||||
await self._press_key("Backspace")
|
await asyncio.sleep(pause_time)
|
||||||
|
|
||||||
else: # delay
|
|
||||||
await asyncio.sleep(random.uniform(0.3, 0.8))
|
|
||||||
await self._type_char(char, self._calculate_typing_speed(state))
|
|
||||||
|
|
||||||
async def _natural_pause(self, state: Dict):
|
|
||||||
"""自然停顿"""
|
|
||||||
base_pause = random.uniform(0.5, 1.5)
|
|
||||||
|
|
||||||
if state['energy_level'] < 0.5:
|
|
||||||
base_pause *= 1.3
|
|
||||||
if state['emotion_state'] < 0.6:
|
|
||||||
base_pause *= 1.2
|
|
||||||
|
|
||||||
await asyncio.sleep(base_pause * random.uniform(0.8, 1.2))
|
|
||||||
|
|
||||||
async def _micro_pause(self, state: Dict):
|
|
||||||
"""字符间的微小停顿"""
|
|
||||||
pause_time = random.uniform(0.05, 0.15)
|
|
||||||
if state['energy_level'] < 0.5:
|
|
||||||
pause_time *= 1.2
|
|
||||||
await asyncio.sleep(pause_time)
|
|
||||||
|
|
||||||
def _get_similar_char(self, char: str) -> str:
|
|
||||||
"""获取相似字符"""
|
|
||||||
similar_chars = {
|
|
||||||
'的': '地得',
|
|
||||||
'了': '着啦',
|
|
||||||
'和': '与跟',
|
|
||||||
'我': '我我',
|
|
||||||
'是': '市师',
|
|
||||||
'在': '再在',
|
|
||||||
'有': '又有',
|
|
||||||
'都': '都读',
|
|
||||||
'好': '号毫'
|
|
||||||
}
|
|
||||||
return random.choice(similar_chars.get(char, char + char))
|
|
||||||
|
|
||||||
async def _prepare_input(self, selector: str):
|
async def _prepare_input(self, selector: str):
|
||||||
"""准备输入"""
|
"""准备输入"""
|
||||||
try:
|
try:
|
||||||
await self.page.wait_for_selector(selector, timeout=5000)
|
element = await self.page.wait_for_selector(selector, timeout=5000)
|
||||||
await self.page.click(selector)
|
await element.click()
|
||||||
await asyncio.sleep(random.uniform(0.3, 0.8))
|
await asyncio.sleep(random.uniform(0.3, 0.8))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"准备输入失败: {e}")
|
print(f"准备输入失败: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def _type_char(self, char: str, speed: float):
|
class OptimizedXHSTyping:
|
||||||
"""输入单个字符"""
|
"""优化的小红书输入模拟器"""
|
||||||
try:
|
def __init__(self, page):
|
||||||
delay = 1000 / speed # 转换为毫秒
|
self.page = page
|
||||||
await self.page.keyboard.type(char, delay=delay)
|
self.typing_config = {
|
||||||
except Exception as e:
|
'char_delay': (100, 200), # 基础字符延迟
|
||||||
print(f"输入字符失败: {e}")
|
'punct_delay': (200, 300), # 标点符号延迟
|
||||||
raise
|
'paragraph_pause': (0.5, 1.0), # 段落停顿
|
||||||
|
'natural_pause': 0.05 # 自然停顿概率
|
||||||
|
}
|
||||||
|
|
||||||
async def _press_key(self, key: str):
|
async def type_text(self, text: str):
|
||||||
"""按键操作"""
|
paragraphs = text.split('\n\n')
|
||||||
try:
|
|
||||||
await self.page.keyboard.press(key)
|
for i, para in enumerate(paragraphs):
|
||||||
except Exception as e:
|
# 段落输入
|
||||||
print(f"按键操作失败: {e}")
|
await self._type_paragraph(para)
|
||||||
raise
|
|
||||||
|
# 段落间自然停顿
|
||||||
|
if i < len(paragraphs) - 1:
|
||||||
|
await asyncio.sleep(random.uniform(*self.typing_config['paragraph_pause']))
|
||||||
|
|
||||||
|
async def _type_paragraph(self, paragraph: str):
|
||||||
|
char_count = 0
|
||||||
|
|
||||||
|
for char in paragraph:
|
||||||
|
# 随机自然停顿
|
||||||
|
if random.random() < self.typing_config['natural_pause']:
|
||||||
|
await asyncio.sleep(random.uniform(0.2, 0.5))
|
||||||
|
|
||||||
|
# 字符输入
|
||||||
|
if char in ',。!?、;:':
|
||||||
|
delay = random.randint(*self.typing_config['punct_delay'])
|
||||||
|
else:
|
||||||
|
delay = random.randint(*self.typing_config['char_delay'])
|
||||||
|
|
||||||
|
await self.page.keyboard.type(char, delay=delay)
|
||||||
|
char_count += 1
|
||||||
|
|
||||||
|
# 每20-30个字符后可能停顿
|
||||||
|
if char_count % random.randint(20, 30) == 0:
|
||||||
|
await asyncio.sleep(random.uniform(0.1, 0.3))
|
||||||
|
|
||||||
|
class XHSEnhancedTyping(EnhancedHumanTypingSimulator):
|
||||||
|
"""小红书专用增强版输入模拟器"""
|
||||||
|
def __init__(self, page=None):
|
||||||
|
super().__init__(page)
|
||||||
|
self.tag_mode = False
|
||||||
|
|
||||||
|
async def type_text(self, text: str, selector: str = None) -> bool:
|
||||||
|
"""重写文本输入方法"""
|
||||||
|
if self.tag_mode:
|
||||||
|
# 标签模式下使用较慢的输入速度
|
||||||
|
self.base_config.update({
|
||||||
|
'min_typing_speed': 5,
|
||||||
|
'max_typing_speed': 12
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# 正常文本模式
|
||||||
|
self.base_config.update({
|
||||||
|
'min_typing_speed': 8,
|
||||||
|
'max_typing_speed': 20
|
||||||
|
})
|
||||||
|
|
||||||
|
return await super().type_text(text, selector)
|
||||||
|
|
||||||
|
async def handle_tag_input(self, tag: str):
|
||||||
|
"""标签输入处理"""
|
||||||
|
self.tag_mode = True
|
||||||
|
|
||||||
|
# 输入#号
|
||||||
|
await self.page.keyboard.press("Shift")
|
||||||
|
await asyncio.sleep(random.uniform(0.1, 0.2))
|
||||||
|
await self.page.keyboard.press("3")
|
||||||
|
await self.page.keyboard.up("Shift")
|
||||||
|
|
||||||
|
# 输入标签文本
|
||||||
|
await self.type_text(tag)
|
||||||
|
|
||||||
|
# 等待建议出现
|
||||||
|
await asyncio.sleep(random.uniform(0.3, 0.5))
|
||||||
|
|
||||||
|
# 70%概率选择建议
|
||||||
|
if random.random() < 0.7:
|
||||||
|
try:
|
||||||
|
suggestions = await self.page.query_selector_all('.suggestion-item')
|
||||||
|
if suggestions:
|
||||||
|
await random.choice(suggestions[:2]).click()
|
||||||
|
await asyncio.sleep(random.uniform(0.2, 0.4))
|
||||||
|
self.tag_mode = False
|
||||||
|
return
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 如果没有选择建议,直接回车
|
||||||
|
await self.page.keyboard.press("Enter")
|
||||||
|
await asyncio.sleep(random.uniform(0.2, 0.4))
|
||||||
|
self.tag_mode = False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user