1325 lines
57 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
from datetime import datetime
from playwright.async_api import Playwright, async_playwright, Page
import os
import asyncio
from conf import LOCAL_CHROME_PATH
from utils.base_social_media import set_init_script
from utils.log import xiaohongshu_logger
from utils.human_typing_wrapper import create_human_typer
async def cookie_auth(account_file):
async with async_playwright() as playwright:
browser = await playwright.chromium.launch(headless=True)
context = await browser.new_context(storage_state=account_file)
context = await set_init_script(context)
# 创建一个新的页面
page = await context.new_page()
# 访问指定的 URL
await page.goto("https://creator.xiaohongshu.com/creator-micro/content/upload")
try:
await page.wait_for_url("https://creator.xiaohongshu.com/creator-micro/content/upload", timeout=5000)
except:
print("[+] 等待5秒 cookie 失效")
await context.close()
await browser.close()
return False
# 2024.06.17 抖音创作者中心改版
if await page.get_by_text('手机号登录').count() or await page.get_by_text('扫码登录').count():
print("[+] 等待5秒 cookie 失效")
return False
else:
print("[+] cookie 有效")
return True
async def xiaohongshu_setup(account_file, handle=False):
if not os.path.exists(account_file) or not await cookie_auth(account_file):
if not handle:
# Todo alert message
return False
xiaohongshu_logger.info('[+] cookie文件不存在或已失效即将自动打开浏览器请扫码登录登陆后会自动生成cookie文件')
await xiaohongshu_cookie_gen(account_file)
return True
async def xiaohongshu_cookie_gen(account_file):
async with async_playwright() as playwright:
options = {
'headless': False
}
# Make sure to run headed.
browser = await playwright.chromium.launch(**options)
# Setup context however you like.
context = await browser.new_context() # Pass any options
context = await set_init_script(context)
# Pause the page, and start recording manually.
page = await context.new_page()
await page.goto("https://creator.xiaohongshu.com/")
await page.pause()
# 点击调试器的继续保存cookie
await context.storage_state(path=account_file)
class XiaoHongShuVideo(object):
def __init__(self, title, file_path, tags, publish_date: datetime, account_file, thumbnail_path=None, headless=True):
self.title = title # 视频标题
self.file_path = file_path
self.tags = tags
self.publish_date = publish_date
self.account_file = account_file
self.date_format = '%Y年%m月%d%H:%M'
self.local_executable_path = LOCAL_CHROME_PATH
self.thumbnail_path = thumbnail_path
self.headless = headless
async def set_schedule_time_xiaohongshu(self, page, publish_date):
print(" [-] 正在设置定时发布时间...")
print(f"publish_date: {publish_date}")
# 使用文本内容定位元素
# element = await page.wait_for_selector(
# 'label:has-text("定时发布")',
# timeout=5000 # 5秒超时时间
# )
# await element.click()
# # 选择包含特定文本内容的 label 元素
label_element = page.locator("label:has-text('定时发布')")
# # 在选中的 label 元素下点击 checkbox
await label_element.click()
await asyncio.sleep(1)
publish_date_hour = publish_date.strftime("%Y-%m-%d %H:%M")
print(f"publish_date_hour: {publish_date_hour}")
await asyncio.sleep(1)
await page.locator('.el-input__inner[placeholder="选择日期和时间"]').click()
await page.keyboard.press("Control+KeyA")
await page.keyboard.type(str(publish_date_hour))
await page.keyboard.press("Enter")
await asyncio.sleep(1)
async def handle_upload_error(self, page):
xiaohongshu_logger.info('视频出错了,重新上传中')
await page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(self.file_path)
async def upload(self, playwright: Playwright) -> None:
# 使用 Chromium 浏览器启动一个浏览器实例
if self.local_executable_path:
browser = await playwright.chromium.launch(headless=self.headless, executable_path=self.local_executable_path)
else:
browser = await playwright.chromium.launch(headless=self.headless)
# 创建一个浏览器上下文,使用指定的 cookie 文件
context = await browser.new_context(
viewport={"width": 1600, "height": 900},
storage_state=f"{self.account_file}"
)
context = await set_init_script(context)
# 创建一个新的页面
page = await context.new_page()
# 创建人类化输入包装器
human_typer = create_human_typer(page)
# 访问指定的 URL
await page.goto("https://creator.xiaohongshu.com/publish/publish?from=homepage&target=video")
xiaohongshu_logger.info(f'[+]正在上传-------{self.title}.mp4')
# 等待页面跳转到指定的 URL没进入则自动等待到超时
xiaohongshu_logger.info(f'[-] 正在打开主页...')
await page.wait_for_url("https://creator.xiaohongshu.com/publish/publish?from=homepage&target=video")
# 点击 "上传视频" 按钮
await page.locator("div[class^='upload-content'] input[class='upload-input']").set_input_files(self.file_path)
# 等待页面跳转到指定的 URL 2025.01.08修改在原有基础上兼容两种页面
while True:
try:
# 等待upload-input元素出现
upload_input = await page.wait_for_selector('input.upload-input', timeout=3000)
# 获取下一个兄弟元素
preview_new = await upload_input.query_selector(
'xpath=following-sibling::div[contains(@class, "preview-new")]')
if preview_new:
# 在preview-new元素中查找包含"上传成功"的stage元素
stage_elements = await preview_new.query_selector_all('div.stage')
upload_success = False
for stage in stage_elements:
text_content = await page.evaluate('(element) => element.textContent', stage)
if '上传成功' in text_content:
upload_success = True
break
if upload_success:
xiaohongshu_logger.info("[+] 检测到上传成功标识!")
break # 成功检测到上传成功后跳出循环
else:
print(" [-] 未找到上传成功标识,继续等待...")
else:
print(" [-] 未找到预览元素,继续等待...")
await asyncio.sleep(1)
except Exception as e:
print(f" [-] 检测过程出错: {str(e)},重新尝试...")
await asyncio.sleep(0.5) # 等待0.5秒后重新尝试
# 填充标题和话题
# 检查是否存在包含输入框的元素
# 这里为了避免页面变化故使用相对位置定位作品标题父级右侧第一个元素的input子元素
await asyncio.sleep(1)
xiaohongshu_logger.info(f' [-] 正在填充标题和话题...')
title_container = page.locator('div.plugin.title-container').locator('input.d-text')
if await title_container.count():
# 使用人类化输入填充标题
success = await human_typer.type_text_human(
'div.plugin.title-container input.d-text',
self.title[:30],
clear_first=True
)
if not success:
xiaohongshu_logger.warning("标题人类化输入失败,使用传统方式")
await title_container.fill(self.title[:30])
else:
# 使用人类化输入的备用方案
success = await human_typer.type_text_human(".notranslate", self.title, clear_first=True)
if not success:
xiaohongshu_logger.warning("标题人类化输入失败,使用传统方式")
titlecontainer = page.locator(".notranslate")
await titlecontainer.click()
await page.keyboard.press("Backspace")
await page.keyboard.press("Control+KeyA")
await page.keyboard.press("Delete")
await page.keyboard.type(self.title)
await page.keyboard.press("Enter")
css_selector = "#publish-container .editor-content > div > div" # 不能加上 .ql-blank 属性,这样只能获取第一次非空状态
# 创建专门用于慢速标签输入的人类化输入包装器
from utils.human_typing_wrapper import HumanTypingWrapper
slow_config = {
'min_delay': 500, # 最小延迟150ms更慢
'max_delay': 800, # 最大延迟300ms
'pause_probability': 0.3, # 30%概率暂停
'pause_min': 500, # 暂停最少500ms
'pause_max': 1200, # 暂停最多1200ms
'correction_probability': 0.0, # 禁用错误修正
'backspace_probability': 0.0, # 禁用退格重输
}
# 创建专门的慢速输入器
slow_typer = HumanTypingWrapper(page, slow_config)
# 逐个标签输入,每个标签后都有停顿
success = True
for i, tag in enumerate(self.tags):
tag_text = f"#{tag}"
# 输入标签文本(使用慢速配置)
tag_success = await slow_typer.type_text_human(
css_selector,
tag_text,
clear_first=False
)
if not tag_success:
success = False
break
# 输入换行符并添加停顿
await page.keyboard.press("Enter")
await page.wait_for_timeout(800) # 换行后停顿800ms
xiaohongshu_logger.info(f"已输入标签: {tag} ({i+1}/{len(self.tags)})")
if not success:
xiaohongshu_logger.warning("标签人类化输入失败,使用传统方式")
await page.click(css_selector)
for index, tag in enumerate(self.tags, start=1):
#await page.type(css_selector, "#" + tag , delay=100)
await page.keyboard.type("#")
for char in tag:
await page.keyboard.type(char,delay=500)
await page.wait_for_timeout(1000)
await page.keyboard.type("\r")
#await page.wait_for_timeout(1000)
#await page.press(css_selector, "Enter")
#await page.press(css_selector, "Space")
#await page.wait_for_timeout(5000)
xiaohongshu_logger.info(f'总共添加{len(self.tags)}个话题')
# 调试用暂停10秒让你有时间观察页面上的标签是否正确输入
# await page.wait_for_timeout(10000)
# xiaohongshu_logger.info("观察时间结束,中断程序")
# # 强制中断(后续代码不执行)
# raise SystemExit("调试中断:标签输入流程完成")
# while True:
# # 判断重新上传按钮是否存在,如果不存在,代表视频正在上传,则等待
# try:
# # 新版:定位重新上传
# number = await page.locator('[class^="long-card"] div:has-text("重新上传")').count()
# if number > 0:
# xiaohongshu_logger.success(" [-]视频上传完毕")
# break
# else:
# xiaohongshu_logger.info(" [-] 正在上传视频中...")
# await asyncio.sleep(2)
# if await page.locator('div.progress-div > div:has-text("上传失败")').count():
# xiaohongshu_logger.error(" [-] 发现上传出错了... 准备重试")
# await self.handle_upload_error(page)
# except:
# xiaohongshu_logger.info(" [-] 正在上传视频中...")
# await asyncio.sleep(2)
# 上传视频封面
# await self.set_thumbnail(page, self.thumbnail_path)
#更换可见元素
await self.set_location(page, "青岛市")
# # 頭條/西瓜
# third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch'
# # 定位是否有第三方平台
# if await page.locator(third_part_element).count():
# # 检测是否是已选中状态
# if 'semi-switch-checked' not in await page.eval_on_selector(third_part_element, 'div => div.className'):
# await page.locator(third_part_element).locator('input.semi-switch-native-control').click()
if self.publish_date != 0:
await self.set_schedule_time_xiaohongshu(page, self.publish_date)
# 判断视频是否发布成功
while True:
try:
# 等待包含"定时发布"文本的button元素出现并点击
if self.publish_date != 0:
await page.locator('button:has-text("定时发布")').click()
else:
await page.locator('button:has-text("发布")').click()
await page.wait_for_url(
"https://creator.xiaohongshu.com/publish/success?**",
timeout=3000
) # 如果自动跳转到作品页面,则代表发布成功
xiaohongshu_logger.success(" [-]视频发布成功")
break
except:
xiaohongshu_logger.info(" [-] 视频正在发布中...")
await page.screenshot(full_page=True)
await asyncio.sleep(0.5)
await context.storage_state(path=self.account_file) # 保存cookie
xiaohongshu_logger.success(' [-]cookie更新完毕')
await asyncio.sleep(2) # 这里延迟是为了方便眼睛直观的观看
# 关闭浏览器上下文和浏览器实例
await context.close()
await browser.close()
async def set_thumbnail(self, page: Page, thumbnail_path: str):
if thumbnail_path:
await page.click('text="选择封面"')
await page.wait_for_selector("div.semi-modal-content:visible")
await page.click('text="设置竖封面"')
await page.wait_for_timeout(2000) # 等待2秒
# 定位到上传区域并点击
await page.locator("div[class^='semi-upload upload'] >> input.semi-upload-hidden-input").set_input_files(thumbnail_path)
await page.wait_for_timeout(2000) # 等待2秒
await page.locator("div[class^='extractFooter'] button:visible:has-text('完成')").click()
# finish_confirm_element = page.locator("div[class^='confirmBtn'] >> div:has-text('完成')")
# if await finish_confirm_element.count():
# await finish_confirm_element.click()
# await page.locator("div[class^='footer'] button:has-text('完成')").click()
async def set_location(self, page: Page, location: str = "青岛市"):
print(f"开始设置位置: {location}")
# 点击地点输入框
print("等待地点输入框加载...")
loc_ele = await page.wait_for_selector('div.d-text.d-select-placeholder.d-text-ellipsis.d-text-nowrap')
print(f"已定位到地点输入框: {loc_ele}")
await loc_ele.click()
print("点击地点输入框完成")
# 输入位置名称
print(f"等待1秒后输入位置名称: {location}")
await page.wait_for_timeout(1000)
await page.keyboard.type(location)
print(f"位置名称输入完成: {location}")
# 等待下拉列表加载
print("等待下拉列表加载...")
dropdown_selector = 'div.d-popover.d-popover-default.d-dropdown.--size-min-width-large'
await page.wait_for_timeout(3000)
try:
await page.wait_for_selector(dropdown_selector, timeout=3000)
print("下拉列表已加载")
except:
print("下拉列表未按预期显示,可能结构已变化")
# 增加等待时间以确保内容加载完成
print("额外等待1秒确保内容渲染完成...")
await page.wait_for_timeout(1000)
# 尝试更灵活的XPath选择器
print("尝试使用更灵活的XPath选择器...")
flexible_xpath = (
f'//div[contains(@class, "d-popover") and contains(@class, "d-dropdown")]'
f'//div[contains(@class, "d-options-wrapper")]'
f'//div[contains(@class, "d-grid") and contains(@class, "d-options")]'
f'//div[contains(@class, "name") and text()="{location}"]'
)
await page.wait_for_timeout(3000)
# 尝试定位元素
print(f"尝试定位包含'{location}'的选项...")
try:
# 先尝试使用更灵活的选择器
location_option = await page.wait_for_selector(
flexible_xpath,
timeout=3000
)
if location_option:
print(f"使用灵活选择器定位成功: {location_option}")
else:
# 如果灵活选择器失败,再尝试原选择器
print("灵活选择器未找到元素,尝试原始选择器...")
location_option = await page.wait_for_selector(
f'//div[contains(@class, "d-popover") and contains(@class, "d-dropdown")]'
f'//div[contains(@class, "d-options-wrapper")]'
f'//div[contains(@class, "d-grid") and contains(@class, "d-options")]'
f'/div[1]//div[contains(@class, "name") and text()="{location}"]',
timeout=2000
)
# 滚动到元素并点击
print("滚动到目标选项...")
await location_option.scroll_into_view_if_needed()
print("元素已滚动到视图内")
# 增加元素可见性检查
is_visible = await location_option.is_visible()
print(f"目标选项是否可见: {is_visible}")
# 点击元素
print("准备点击目标选项...")
await location_option.click()
print(f"成功选择位置: {location}")
return True
except Exception as e:
print(f"定位位置失败: {e}")
# 打印更多调试信息
print("尝试获取下拉列表中的所有选项...")
try:
all_options = await page.query_selector_all(
'//div[contains(@class, "d-popover") and contains(@class, "d-dropdown")]'
'//div[contains(@class, "d-options-wrapper")]'
'//div[contains(@class, "d-grid") and contains(@class, "d-options")]'
'/div'
)
print(f"找到 {len(all_options)} 个选项")
# 打印前3个选项的文本内容
for i, option in enumerate(all_options[:3]):
option_text = await option.inner_text()
print(f"选项 {i+1}: {option_text.strip()[:50]}...")
except Exception as e:
print(f"获取选项列表失败: {e}")
# 截图保存(取消注释使用)
# await page.screenshot(path=f"location_error_{location}.png")
return False
async def main(self):
async with async_playwright() as playwright:
await self.upload(playwright)
class XiaoHongShuImage(object):
def __init__(self, title, image_paths, tags, publish_date: datetime, account_file, location=None, content=None, headless=True):
self.title = title # 图文标题
self.image_paths = image_paths if isinstance(image_paths, list) else [image_paths] # 支持单张或多张图片
self.tags = tags
self.publish_date = publish_date
self.account_file = account_file
self.location = location # 地点信息,可以从文本文件导入
self.content = content # 正文内容,可以从文本文件导入
self.date_format = '%Y年%m月%d%H:%M'
self.local_executable_path = LOCAL_CHROME_PATH
self.headless = headless
async def set_schedule_time_xiaohongshu(self, page, publish_date):
"""设置定时发布时间"""
print(" [-] 正在设置定时发布时间...")
print(f"publish_date: {publish_date}")
# 选择包含特定文本内容的 label 元素
label_element = page.locator("label:has-text('定时发布')")
# 在选中的 label 元素下点击 checkbox
await label_element.click()
await asyncio.sleep(1)
publish_date_hour = publish_date.strftime("%Y-%m-%d %H:%M")
print(f"publish_date_hour: {publish_date_hour}")
await asyncio.sleep(1)
await page.locator('.el-input__inner[placeholder="选择日期和时间"]').click()
await page.keyboard.press("Control+KeyA")
await page.keyboard.type(str(publish_date_hour))
await page.keyboard.press("Enter")
await asyncio.sleep(1)
async def upload_images(self, page):
"""上传图片"""
from pathlib import Path
xiaohongshu_logger.info(f'[+] 正在上传图片,共{len(self.image_paths)}')
# 等待页面加载
await asyncio.sleep(3)
# 查找上传元素(简化选择器,移除详细日志)
upload_selectors = [
"input[class='upload-input'][type='file'][multiple]",
"input[accept='.jpg,.jpeg,.png,.webp']",
"input[type='file'][multiple]",
"input[type='file']",
]
upload_input = None
for selector in upload_selectors:
try:
upload_input = await page.wait_for_selector(selector, timeout=3000)
if upload_input:
break
except:
continue
if not upload_input:
# 尝试点击上传按钮
try:
upload_button = await page.wait_for_selector('button:has-text("上传图片")', timeout=3000)
if not upload_button:
upload_button = await page.wait_for_selector('div:has-text("上传图片")', timeout=3000)
if upload_button:
await upload_button.click()
await asyncio.sleep(2)
upload_input = await page.wait_for_selector("input[type='file']", timeout=3000)
if not upload_input:
raise Exception("未找到图片上传元素")
except Exception as e:
raise Exception(f"图片上传失败: {e}")
# 上传图片(显示文件名而不是完整路径)
file_names = [Path(p).name for p in self.image_paths]
xiaohongshu_logger.info(f" [-] 上传文件: {', '.join(file_names)}")
await upload_input.set_input_files(self.image_paths)
# 等待上传完成
await asyncio.sleep(2)
await self.wait_for_images_upload_complete(page)
async def wait_for_images_upload_complete(self, page):
"""等待图片上传完成"""
max_wait_time = 60
wait_count = 0
while wait_count < max_wait_time:
try:
# 简化检查逻辑,移除详细日志
# 检查添加按钮
add_selectors = [
'div.entry:has-text("添加")',
'div:has-text("添加")',
'[class*="add"]:has-text("添加")'
]
for selector in add_selectors:
try:
add_button = await page.query_selector(selector)
if add_button:
xiaohongshu_logger.success(" [-] 图片上传完成")
return
except:
continue
# 检查图片预览
try:
images = await page.query_selector_all('img')
valid_images = []
for img in images:
src = await img.get_attribute('src')
if src and ('data:image' in src or 'blob:' in src or len(src) > 50):
valid_images.append(img)
if len(valid_images) >= len(self.image_paths):
xiaohongshu_logger.success(f" [-] 图片上传完成 ({len(valid_images)}张)")
await asyncio.sleep(2)
return
except:
pass
# 检查加载状态
loading_elements = await page.query_selector_all('[class*="loading"], [class*="uploading"]')
if not loading_elements:
xiaohongshu_logger.success(" [-] 图片上传完成")
return
# 减少日志频率每15秒输出一次进度
if wait_count % 15 == 0 and wait_count > 0:
xiaohongshu_logger.info(f" [-] 等待图片上传... ({wait_count}/{max_wait_time}秒)")
await asyncio.sleep(3)
wait_count += 3
except Exception as e:
xiaohongshu_logger.debug(f" [-] 检查上传状态出错: {e}")
await asyncio.sleep(3)
wait_count += 3
xiaohongshu_logger.warning(" [-] 图片上传等待超时,继续流程")
async def locate_content_editor(self, page):
"""定位正文编辑区域"""
# 方法1基于class的精确定位
primary_selector = "div.editor-content"
# 方法2基于属性的备用定位
backup_selector = "div[contenteditable='true'][role='textbox']"
xiaohongshu_logger.info(" [-] 查找正文输入区域...")
# 尝试主选择器
try:
element = await page.wait_for_selector(primary_selector, timeout=3000)
xiaohongshu_logger.info(f" [-] 使用主选择器成功定位: {primary_selector}")
return element, primary_selector
except:
xiaohongshu_logger.warning(" [-] 主选择器定位失败,尝试备用选择器...")
# 尝试备用选择器
try:
element = await page.wait_for_selector(backup_selector, timeout=3000)
xiaohongshu_logger.info(f" [-] 使用备用选择器成功定位: {backup_selector}")
return element, backup_selector
except:
xiaohongshu_logger.error(" [-] 所有选择器都无法定位正文区域")
raise Exception("无法找到正文输入区域")
async def fill_content(self, page, human_typer):
"""填充标题和内容"""
xiaohongshu_logger.info(f' [-] 正在填充标题和话题...')
# 使用传入的人类化输入包装器(避免重复创建)
# 填充标题
title_container = page.locator('div.plugin.title-container').locator('input.d-text')
if await title_container.count():
# 使用人类化输入填充标题
success = await human_typer.type_text_human(
'div.plugin.title-container input.d-text',
self.title[:30],
clear_first=True
)
if not success:
xiaohongshu_logger.warning("标题人类化输入失败,使用传统方式")
await title_container.fill(self.title[:30])
else:
# 使用人类化输入的备用方案
success = await human_typer.type_text_human(".notranslate", self.title, clear_first=True)
if not success:
xiaohongshu_logger.warning("标题人类化输入失败,使用传统方式")
titlecontainer = page.locator(".notranslate")
await titlecontainer.click()
await page.keyboard.press("Backspace")
await page.keyboard.press("Control+KeyA")
await page.keyboard.press("Delete")
await page.keyboard.type(self.title)
await page.keyboard.press("Enter")
# 定位正文编辑区域
content_element, css_selector = await self.locate_content_editor(page)
# 🔧 创建专门用于正文输入的人类化输入包装器
from utils.human_typing_wrapper import HumanTypingWrapper
# 根据正文长度调整输入速度配置
content_length = len(self.content) if self.content else len(self.title) + 2
# 为长文本使用更慢的输入速度,提高真实性
if content_length > 100:
# 长文本:更慢更谨慎
content_config = {
'min_delay': 80, # 最小延迟80ms
'max_delay': 200, # 最大延迟200ms
'pause_probability': 0.15, # 15%概率暂停思考
'pause_min': 800, # 暂停最少800ms
'pause_max': 2000, # 暂停最多2秒
'correction_probability': 0.02, # 2%概率打错字
'backspace_probability': 0.01, # 1%概率退格重输
}
xiaohongshu_logger.info(f" [-] 长文本模式 ({content_length}字符),使用慢速人类化输入")
else:
# 短文本:相对较快但仍然人类化
content_config = {
'min_delay': 60, # 最小延迟60ms
'max_delay': 150, # 最大延迟150ms
'pause_probability': 0.1, # 10%概率暂停
'pause_min': 500, # 暂停最少500ms
'pause_max': 1200, # 暂停最多1.2秒
'correction_probability': 0.01, # 1%概率打错字
'backspace_probability': 0.005, # 0.5%概率退格
}
xiaohongshu_logger.info(f" [-] 短文本模式 ({content_length}字符),使用标准人类化输入")
# 创建专门的正文输入器
content_typer = HumanTypingWrapper(page, content_config)
# 准备正文内容
if self.content:
# 如果有自定义正文内容,使用自定义内容
content_text = self.content
xiaohongshu_logger.info(f" [-] 使用自定义正文内容,长度: {len(content_text)} 字符")
else:
# 如果没有自定义内容,使用标题作为开头
content_text = f"{self.title}\n\n"
xiaohongshu_logger.info(" [-] 使用默认正文内容(标题)")
# 🔧 使用优化的人类化输入正文
xiaohongshu_logger.info(f" [-] 开始人类化输入正文内容...")
# 对于长文本,分段输入更加真实
if content_length > 200:
xiaohongshu_logger.info(" [-] 长文本分段输入模式")
success = await self._input_long_content_in_segments(page, content_typer, css_selector, content_text)
else:
# 短文本直接输入
success = await content_typer.type_text_human(
css_selector,
content_text,
clear_first=True
)
if not success:
xiaohongshu_logger.warning(" [-] 正文人类化输入失败,使用传统方式")
await content_element.click()
await asyncio.sleep(0.5) # 点击后稍作等待
# 传统方式也要模拟人类输入速度
xiaohongshu_logger.info(" [-] 使用传统方式进行人类化输入...")
await self._fallback_human_typing(page, content_text)
xiaohongshu_logger.success(f" [-] 正文输入完成,共 {len(content_text)} 字符")
# 在正文后面添加标签
xiaohongshu_logger.info(" [-] 开始在正文后面添加标签...")
# 确保光标在正文的最后位置
await content_element.click()
await asyncio.sleep(0.3)
# 移动光标到文本末尾
await page.keyboard.press("End")
await page.keyboard.press("Control+End") # 确保到达最末尾
await asyncio.sleep(0.3)
# 添加两个换行,将标签与正文分开
await page.keyboard.press("Enter")
await page.keyboard.press("Enter")
await asyncio.sleep(0.3)
# 标签输入(参考视频标签添加方式)
xiaohongshu_logger.info(f" [-] 开始输入标签 ({len(self.tags)}个)...")
# 创建标签输入器
from utils.human_typing_wrapper import HumanTypingWrapper
tag_config = {
'min_delay': 400, 'max_delay': 700, 'pause_probability': 0.25,
'pause_min': 400, 'pause_max': 1000, 'correction_probability': 0.02,
'backspace_probability': 0.01,
}
tag_typer = HumanTypingWrapper(page, tag_config)
# 输入标签(简化日志)
success = True
for i, tag in enumerate(self.tags):
# 标签间思考时间
if i > 0:
import random
await asyncio.sleep(random.uniform(0.8, 1.5))
# 输入标签
tag_success = await tag_typer.type_text_human(
css_selector, f"#{tag}", clear_first=False
)
if not tag_success:
success = False
break
# 处理标签建议
await self._handle_tag_suggestions_after_input(page, tag)
# 标签间分隔
if i < len(self.tags) - 1:
await page.keyboard.type(" ")
import random
await asyncio.sleep(random.uniform(0.2, 0.5))
# 备用输入方式
if not success:
xiaohongshu_logger.warning(" [-] 使用备用标签输入方式")
await self._fallback_tag_input(page, css_selector)
xiaohongshu_logger.success(f' [-] 标签输入完成 ({len(self.tags)}个)')
async def _handle_tag_suggestions_after_input(self, page: Page, tag: str) -> None:
"""标签输入后处理建议选择"""
try:
import random
await asyncio.sleep(random.uniform(0.5, 1.0))
suggestion_found = await self._handle_tag_suggestions(page, tag)
if not suggestion_found:
await asyncio.sleep(random.uniform(0.2, 0.5))
await page.keyboard.press("Enter")
except Exception as e:
xiaohongshu_logger.debug(f" [-] 标签建议处理出错: {e}")
async def _fallback_tag_input(self, page: Page, css_selector: str) -> None:
"""备用标签输入方法"""
try:
import random
await page.click(css_selector)
await asyncio.sleep(0.5)
for index, tag in enumerate(self.tags, start=1):
# 输入标签(移除详细日志)
await page.keyboard.type("#")
await asyncio.sleep(random.uniform(0.1, 0.3))
for char in tag:
await page.keyboard.type(char, delay=random.randint(300, 600))
await asyncio.sleep(random.uniform(0.8, 1.2))
# 标签间分隔
if index < len(self.tags):
await page.keyboard.type(" ")
await asyncio.sleep(random.uniform(0.3, 0.6))
except Exception as e:
xiaohongshu_logger.error(f" [-] 备用标签输入失败: {e}")
async def _input_long_content_in_segments(self, page: Page, content_typer, css_selector: str, content_text: str) -> bool:
"""
分段输入长文本,模拟真实的写作过程
Args:
page: Playwright页面对象
content_typer: 人类化输入包装器
css_selector: 内容区域选择器
content_text: 要输入的文本内容
Returns:
bool: 是否输入成功
"""
try:
import random
# 按段落分割文本(简化日志)
paragraphs = content_text.split('\n\n')
# 清空输入区域
await content_typer.clear_element(css_selector)
await asyncio.sleep(0.5)
for i, paragraph in enumerate(paragraphs, 1):
if not paragraph.strip():
continue
# 输入段落内容(移除详细日志)
success = await content_typer.type_text_human(
css_selector, paragraph, clear_first=False
)
if not success:
xiaohongshu_logger.warning(f" [-] 第 {i} 段落输入失败")
return False
# 段落间添加换行和思考时间
if i < len(paragraphs):
await page.keyboard.press("Enter")
await page.keyboard.press("Enter")
await asyncio.sleep(random.uniform(1.0, 3.0))
xiaohongshu_logger.success(" [-] 分段输入完成")
return True
except Exception as e:
xiaohongshu_logger.error(f" [-] 分段输入失败: {e}")
return False
async def _fallback_human_typing(self, page: Page, content_text: str) -> None:
"""
备用的人类化输入方法
Args:
page: Playwright页面对象
content_text: 要输入的文本内容
"""
import random
char_count = 0
for char in content_text:
await page.keyboard.type(char)
char_count += 1
# 随机延迟,模拟人类打字
delay = random.randint(50, 120) # 50-120ms随机延迟
await asyncio.sleep(delay / 1000)
# 偶尔暂停,模拟思考(移除详细日志)
if random.random() < 0.05: # 5%概率暂停
pause_time = random.randint(300, 800)
await asyncio.sleep(pause_time / 1000)
# 减少进度日志频率每100个字符显示一次
if char_count % 100 == 0:
xiaohongshu_logger.debug(f" [-] 输入进度: {char_count}/{len(content_text)}")
async def _input_single_tag(self, page: Page, tag: str, current: int, total: int) -> None:
"""
输入单个标签并智能处理建议选择(增强人类化行为)
Args:
page: Playwright页面对象
tag: 标签内容
current: 当前标签序号
total: 总标签数量
"""
import random
tag_text = f"#{tag}"
xiaohongshu_logger.info(f" [-] 输入标签: {tag_text} ({current}/{total})")
try:
# 🔧 1. 标签输入前的思考暂停(模拟用户思考下一个标签)
if current > 1: # 第一个标签不需要思考时间
think_time = random.uniform(0.5, 2.0) # 0.5-2秒思考时间
xiaohongshu_logger.debug(f" [-] 思考下一个标签... ({think_time:.1f}秒)")
await asyncio.sleep(think_time)
# 🔧 2. 更人类化的逐字符输入
await self._human_like_tag_typing(page, tag_text)
# 🔧 3. 输入完成后的短暂停顿(模拟用户检查输入)
check_pause = random.uniform(0.3, 0.8)
await asyncio.sleep(check_pause)
# 🔧 4. 等待并处理标签建议(随机等待时间)
suggestion_wait = random.uniform(0.6, 1.2) # 0.6-1.2秒随机等待
xiaohongshu_logger.debug(f" [-] 等待标签建议... ({suggestion_wait:.1f}秒)")
await asyncio.sleep(suggestion_wait)
# 5. 查找标签建议
suggestion_found = await self._handle_tag_suggestions(page, tag)
if suggestion_found:
xiaohongshu_logger.info(f" [-] 选择了匹配的标签建议: {tag}")
else:
# 🔧 没有匹配建议时的犹豫行为
hesitate_time = random.uniform(0.2, 0.6) # 犹豫0.2-0.6秒
xiaohongshu_logger.debug(f" [-] 未找到建议,犹豫中... ({hesitate_time:.1f}秒)")
await asyncio.sleep(hesitate_time)
xiaohongshu_logger.info(f" [-] 未找到匹配建议,生成新标签: {tag}")
await page.keyboard.press("Enter")
await asyncio.sleep(random.uniform(0.2, 0.5)) # 随机确认时间
# 🔧 6. 标签间的自然间隔
if current < total:
# 模拟用户在标签间的自然停顿
inter_tag_pause = random.uniform(0.3, 0.8)
await asyncio.sleep(inter_tag_pause)
await page.keyboard.type(" ")
# 空格后的微小停顿
space_pause = random.uniform(0.1, 0.3)
await asyncio.sleep(space_pause)
xiaohongshu_logger.info(f" [-] 标签处理完成: {tag} ({current}/{total})")
except Exception as e:
xiaohongshu_logger.error(f" [-] 输入标签 {tag} 时出错: {e}")
# 出错时也要模拟人类的反应时间
await asyncio.sleep(random.uniform(0.2, 0.5))
await page.keyboard.press("Enter")
await asyncio.sleep(random.uniform(0.3, 0.6))
async def _human_like_tag_typing(self, page: Page, tag_text: str) -> None:
"""
更人类化的标签输入方法
Args:
page: Playwright页面对象
tag_text: 要输入的标签文本
"""
import random
# 模拟不同的打字节奏
for i, char in enumerate(tag_text):
# 🔧 更宽泛的延迟范围,模拟真实打字速度变化
if char == '#':
# 输入#号时稍慢一些用户需要按shift+3
delay = random.randint(120, 250)
elif char.isalpha():
# 字母输入相对较快
delay = random.randint(60, 180)
else:
# 其他字符(数字、符号)稍慢
delay = random.randint(80, 200)
await page.keyboard.type(char)
await asyncio.sleep(delay / 1000)
# 🔧 模拟偶尔的打字错误和修正2%概率)
if random.random() < 0.02 and i < len(tag_text) - 1:
# 打错一个字符
wrong_char = random.choice('abcdefghijklmnopqrstuvwxyz')
await page.keyboard.type(wrong_char)
await asyncio.sleep(random.randint(100, 300) / 1000)
# 发现错误,退格删除
await asyncio.sleep(random.uniform(0.2, 0.5)) # 发现错误的反应时间
await page.keyboard.press("Backspace")
await asyncio.sleep(random.randint(80, 150) / 1000)
xiaohongshu_logger.debug(f" [-] 模拟打字错误并修正")
# 🔧 模拟偶尔的思考停顿5%概率)
if random.random() < 0.05 and i < len(tag_text) - 1:
pause_time = random.uniform(0.3, 0.8)
xiaohongshu_logger.debug(f" [-] 输入中思考停顿 ({pause_time:.1f}秒)")
await asyncio.sleep(pause_time)
async def _handle_tag_suggestions(self, page: Page, tag: str) -> bool:
"""
处理标签建议选择
Args:
page: Playwright页面对象
tag: 标签内容
Returns:
bool: 是否找到并选择了匹配的建议
"""
try:
xiaohongshu_logger.info(f" [-] 查找标签 '{tag}' 的建议...")
# 查找标签建议容器的多种可能选择器
suggestion_selectors = [
'div[class*="suggestion"]',
'div[class*="dropdown"]',
'div[class*="popover"]',
'ul[class*="options"]',
'div[class*="tag-suggestion"]',
'[role="listbox"]',
'[role="menu"]'
]
suggestion_container = None
for selector in suggestion_selectors:
try:
suggestion_container = await page.wait_for_selector(selector, timeout=1000)
if suggestion_container:
xiaohongshu_logger.debug(f" 找到建议容器: {selector}")
break
except:
continue
if not suggestion_container:
xiaohongshu_logger.debug(" [-] 未找到标签建议容器")
return False
# 查找匹配的标签建议
suggestion_items = await page.query_selector_all(
f'{suggestion_selectors[0]} div, '
f'{suggestion_selectors[0]} li, '
f'{suggestion_selectors[0]} span'
)
xiaohongshu_logger.debug(f" [-] 找到 {len(suggestion_items)} 个建议项")
# 寻找最佳匹配
best_match = None
exact_match = None
for item in suggestion_items:
try:
item_text = await item.inner_text()
if not item_text:
continue
# 清理文本(移除#号和额外空格)
clean_text = item_text.strip()
if clean_text.startswith('#'):
clean_text = clean_text[1:].strip()
xiaohongshu_logger.debug(f" 建议项: {clean_text}")
# 精确匹配
if clean_text == tag:
exact_match = item
xiaohongshu_logger.info(f" [-] 找到精确匹配: {clean_text}")
break
# 包含匹配(作为备选)
if tag in clean_text or clean_text in tag:
if not best_match:
best_match = item
xiaohongshu_logger.debug(f" 备选匹配: {clean_text}")
except Exception as e:
xiaohongshu_logger.debug(f" 处理建议项时出错: {e}")
continue
# 选择最佳匹配
selected_item = exact_match or best_match
if selected_item:
try:
# 🔧 模拟用户查看和选择建议的过程
import random
# 查看建议的时间(用户需要读取和比较)
review_time = random.uniform(0.4, 1.0)
xiaohongshu_logger.debug(f" [-] 查看标签建议... ({review_time:.1f}秒)")
await asyncio.sleep(review_time)
# 偶尔犹豫一下是否选择这个建议10%概率)
if random.random() < 0.1:
hesitate_time = random.uniform(0.3, 0.7)
xiaohongshu_logger.debug(f" [-] 犹豫是否选择建议... ({hesitate_time:.1f}秒)")
await asyncio.sleep(hesitate_time)
await selected_item.click()
# 点击后的确认时间
confirm_time = random.uniform(0.2, 0.5)
await asyncio.sleep(confirm_time)
xiaohongshu_logger.info(f" [-] 成功选择标签建议")
return True
except Exception as e:
xiaohongshu_logger.warning(f" [-] 点击标签建议失败: {e}")
return False
else:
xiaohongshu_logger.debug(f" [-] 未找到匹配的标签建议")
return False
except Exception as e:
xiaohongshu_logger.debug(f" [-] 处理标签建议时出错: {e}")
return False
async def set_location(self, page: Page, location: str) -> bool:
"""设置地理位置信息"""
xiaohongshu_logger.info(f" [-] 开始设置地理位置: {location}")
try:
# 1. 点击地点输入框
xiaohongshu_logger.info(" [-] 点击地点输入框...")
selectors = [
'div.d-select--color-text-title--color-bg-fill',
'div.d-text.d-select-placeholder.d-text-ellipsis.d-text-nowrap',
'div[class*="d-select"]'
]
clicked = False
for selector in selectors:
try:
element = await page.wait_for_selector(selector, timeout=3000)
await element.click()
clicked = True
break
except:
continue
if not clicked:
xiaohongshu_logger.error(" [-] 未找到地点输入框")
return False
# 2. 输入地点名称
xiaohongshu_logger.info(f" [-] 输入地点名称: {location}")
await page.keyboard.press("Control+a")
await page.keyboard.type(location)
await asyncio.sleep(2) # 等待下拉选项加载
# 3. 选择匹配的地点选项
xiaohongshu_logger.info(" [-] 查找匹配的地点选项...")
# 尝试多种选择器找到包含地点名称的选项
option_selectors = [
f'//div[contains(@class, "name") and contains(text(), "{location}")]',
f'//div[contains(text(), "{location}")]',
f'//div[contains(text(), "{location}")]'
]
selected = False
for selector in option_selectors:
try:
options = await page.query_selector_all(selector)
if options:
# 选择第一个匹配的选项
option = options[0]
option_text = await option.inner_text()
await option.click()
xiaohongshu_logger.success(f" [-] 成功选择地点: {option_text}")
selected = True
break
except:
continue
if not selected:
xiaohongshu_logger.warning(f" [-] 未找到匹配的地点选项: {location}")
return False
return True
except Exception as e:
xiaohongshu_logger.error(f" [-] 设置地理位置失败: {e}")
return False
async def upload(self, playwright: Playwright) -> None:
"""主要的上传流程"""
# 🔧 使用增强的反检测浏览器配置
from utils.anti_detection import AntiDetectionConfig
import random
# 反检测浏览器参数
browser_args = AntiDetectionConfig.STANDARD_BROWSER_ARGS.copy()
# 使用 Chromium 浏览器启动一个浏览器实例
if self.local_executable_path:
browser = await playwright.chromium.launch(
headless=self.headless,
executable_path=self.local_executable_path,
args=browser_args # 🔧 添加反检测参数
)
else:
browser = await playwright.chromium.launch(
headless=self.headless,
args=browser_args # 🔧 添加反检测参数
)
# 🔧 创建增强的浏览器上下文
context_options = {
"storage_state": f"{self.account_file}",
"locale": "zh-CN",
"timezone_id": "Asia/Shanghai"
}
# 🔧 为无头模式添加完整的反检测设置
if self.headless:
context_options.update({
'viewport': {'width': 1920, 'height': 1080}, # 🔧 使用文档建议的分辨率
'device_scale_factor': 1,
'has_touch': False,
'is_mobile': False
})
# 使用随机用户代理
user_agent = random.choice(AntiDetectionConfig.REAL_USER_AGENTS)
context_options["user_agent"] = user_agent
xiaohongshu_logger.info(f" [-] 无头模式设置: 1920x1080")
xiaohongshu_logger.info(f" [-] 使用用户代理: {user_agent[:50]}...")
else:
# 有头模式使用较小的窗口
context_options["viewport"] = {"width": 1600, "height": 900}
xiaohongshu_logger.info(f" [-] 有头模式设置: 1600x900")
context = await browser.new_context(**context_options)
context = await set_init_script(context)
# 创建一个新的页面
page = await context.new_page()
# 🔧 创建人类化输入包装器(关键修复)
human_typer = create_human_typer(page)
xiaohongshu_logger.info(" [-] 已创建人类化输入包装器")
# 直接访问小红书图文发布页面
await page.goto("https://creator.xiaohongshu.com/publish/publish?from=tab_switch&target=image")
xiaohongshu_logger.info(f'[+]正在上传图文-------{self.title}')
# 等待页面加载
xiaohongshu_logger.info(f'[-] 正在打开图文发布页面...')
await page.wait_for_url("https://creator.xiaohongshu.com/publish/publish*")
# 上传图片
await self.upload_images(page)
# 填充内容(传递人类化输入包装器)
await self.fill_content(page, human_typer)
# 设置位置(如果有指定地点)
if self.location and self.location.strip():
xiaohongshu_logger.info(f" [-] 开始设置地理位置: {self.location}")
await self.set_location(page, self.location)
else:
xiaohongshu_logger.info(" [-] 未指定地点或地点为空,跳过位置设置")
# 设置定时发布(如果需要)
if self.publish_date != 0:
await self.set_schedule_time_xiaohongshu(page, self.publish_date)
# 发布图文(增强反检测等待策略)
xiaohongshu_logger.info(" [-] 准备发布图文...")
await asyncio.sleep(1) # 发布前等待
while True:
try:
# 等待并点击发布按钮
if self.publish_date != 0:
xiaohongshu_logger.info(" [-] 点击定时发布按钮...")
await page.locator('button:has-text("定时发布")').click()
else:
xiaohongshu_logger.info(" [-] 点击发布按钮...")
await page.locator('button:has-text("发布")').click()
# 增加发布后的等待时间
await asyncio.sleep(1)
await page.wait_for_url(
"https://creator.xiaohongshu.com/publish/success?**",
timeout=5000 # 增加超时时间到5秒
)
xiaohongshu_logger.success(" [-]图文发布成功")
break
except Exception as e:
xiaohongshu_logger.info(" [-] 图文正在发布中...")
xiaohongshu_logger.debug(f" [-] 等待详情: {str(e)}")
await page.screenshot(full_page=True)
# 使用随机等待时间,模拟人类行为
import random
wait_time = random.uniform(1.0, 2.0) # 1-2秒随机等待
await asyncio.sleep(wait_time)
# 保存cookie并关闭浏览器
await context.storage_state(path=self.account_file)
xiaohongshu_logger.success(' [-]cookie更新完毕')
await asyncio.sleep(2)
await context.close()
await browser.close()
async def main(self):
async with async_playwright() as playwright:
await self.upload(playwright)