628 lines
22 KiB
Python
628 lines
22 KiB
Python
"""
|
|
小红书适配器实现
|
|
"""
|
|
import asyncio
|
|
from typing import Optional, List, Dict, Any
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
from playwright.async_api import Page
|
|
|
|
from ..base_adapter import BaseAdapter
|
|
from ...core.models import (
|
|
PlatformType,
|
|
AccountInfo,
|
|
Content,
|
|
ImageNote,
|
|
VideoContent,
|
|
PublishResult,
|
|
UploadStatus,
|
|
PublishStatus
|
|
)
|
|
from ...auth.xiaohongshu_auth import XiaoHongShuAuth
|
|
from ...config.platform_config import get_platform_url, get_selectors, get_wait_times, get_platform_config
|
|
from ...utils.browser import browser_manager
|
|
from ...utils.human_behavior import HumanBehaviorSimulator
|
|
from ...utils.logger import get_platform_logger, get_task_logger
|
|
from ...utils.exceptions import (
|
|
UploadFailedError,
|
|
ContentRejectedError,
|
|
ElementNotFoundError,
|
|
TimeoutError,
|
|
ValidationError
|
|
)
|
|
|
|
logger = get_platform_logger("xiaohongshu")
|
|
|
|
|
|
class XiaoHongShuAdapter(BaseAdapter):
|
|
"""小红书适配器"""
|
|
|
|
def __init__(self):
|
|
super().__init__(PlatformType.XIAOHONGSHU)
|
|
self.auth = XiaoHongShuAuth()
|
|
self.human_behavior = HumanBehaviorSimulator()
|
|
self.config = get_platform_config(PlatformType.XIAOHONGSHU)
|
|
|
|
def get_authenticator(self):
|
|
"""获取认证器实例"""
|
|
return self.auth
|
|
|
|
async def login(self, account_info: AccountInfo, headless: bool = False) -> bool:
|
|
"""登录小红书"""
|
|
return await self.auth.login(account_info, headless)
|
|
|
|
async def check_login_status(self, page: Page) -> bool:
|
|
"""检查小红书登录状态"""
|
|
return await self.auth.check_login_status(page)
|
|
|
|
async def publish_content(
|
|
self,
|
|
page: Page,
|
|
content: Content,
|
|
account_info: AccountInfo
|
|
) -> PublishResult:
|
|
"""
|
|
发布内容到小红书
|
|
|
|
Args:
|
|
page: Playwright页面对象
|
|
content: 要发布的内容
|
|
account_info: 账号信息
|
|
|
|
Returns:
|
|
发布结果
|
|
"""
|
|
task_logger = get_task_logger(
|
|
f"xhs_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
|
self.platform_name,
|
|
account_info.username
|
|
)
|
|
|
|
start_time = asyncio.get_event_loop().time()
|
|
task_logger.start(f"开始发布内容: {content.title}")
|
|
|
|
try:
|
|
# 验证内容
|
|
is_valid, error_msg = await self.validate_content(content)
|
|
if not is_valid:
|
|
raise ValidationError(error_msg)
|
|
|
|
# 根据内容类型选择发布方法
|
|
if isinstance(content, ImageNote):
|
|
result = await self._publish_image_note(page, content, account_info, task_logger)
|
|
elif isinstance(content, VideoContent):
|
|
result = await self._publish_video_note(page, content, account_info, task_logger)
|
|
else:
|
|
raise ValidationError(f"不支持的内容类型: {type(content)}")
|
|
|
|
# 计算耗时
|
|
duration = asyncio.get_event_loop().time() - start_time
|
|
result.duration = duration
|
|
|
|
if result.success:
|
|
task_logger.success(f"内容发布成功,耗时: {duration:.2f}秒")
|
|
else:
|
|
task_logger.failure(f"内容发布失败: {result.message}")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
duration = asyncio.get_event_loop().time() - start_time
|
|
error_msg = f"发布过程异常: {str(e)}"
|
|
task_logger.failure(error_msg)
|
|
|
|
return self.create_publish_result(
|
|
success=False,
|
|
message=error_msg,
|
|
task_id=task_logger.task_id,
|
|
account=account_info.username,
|
|
error_details={"exception": str(e), "type": type(e).__name__},
|
|
duration=duration
|
|
)
|
|
|
|
async def validate_content(self, content: Content) -> tuple[bool, str]:
|
|
"""验证小红书内容"""
|
|
# 基础验证
|
|
is_valid, error_msg = await super().validate_content(content)
|
|
if not is_valid:
|
|
return False, error_msg
|
|
|
|
# 小红书特定验证
|
|
if isinstance(content, ImageNote):
|
|
if not content.images:
|
|
return False, "图文笔记必须包含图片"
|
|
|
|
if len(content.images) > self.config.extra_config.get("max_images", 9):
|
|
return False, f"图片数量不能超过{self.config.extra_config.get('max_images', 9)}张"
|
|
|
|
# 检查图片文件
|
|
for img_path in content.images:
|
|
path = Path(img_path)
|
|
if not path.exists():
|
|
return False, f"图片文件不存在: {img_path}"
|
|
|
|
file_size = path.stat().st_size
|
|
if file_size > self.config.max_file_size:
|
|
return False, f"图片文件过大: {img_path}"
|
|
|
|
elif isinstance(content, VideoContent):
|
|
path = Path(content.video_path)
|
|
if not path.exists():
|
|
return False, f"视频文件不存在: {content.video_path}"
|
|
|
|
file_size = path.stat().st_size
|
|
if file_size > self.config.max_file_size:
|
|
return False, f"视频文件过大: {content.video_path}"
|
|
|
|
return True, ""
|
|
|
|
async def _publish_image_note(
|
|
self,
|
|
page: Page,
|
|
content: ImageNote,
|
|
account_info: AccountInfo,
|
|
task_logger
|
|
) -> PublishResult:
|
|
"""发布图文笔记"""
|
|
try:
|
|
task_logger.progress("开始发布图文笔记")
|
|
|
|
# 导航到图文笔记发布页面
|
|
image_note_url = self.config.extra_config.get("image_note_url")
|
|
if image_note_url:
|
|
if not await self.auth.safe_goto(page, image_note_url):
|
|
raise ElementNotFoundError("无法访问图文笔记发布页面")
|
|
await asyncio.sleep(3)
|
|
|
|
# 上传图片
|
|
task_logger.progress("开始上传图片")
|
|
upload_success = await self._upload_images(page, content.images, task_logger)
|
|
if not upload_success:
|
|
raise UploadFailedError("图片上传失败")
|
|
|
|
# 填写标题
|
|
task_logger.progress("填写标题")
|
|
selectors = get_selectors(PlatformType.XIAOHONGSHU, "publish", "image_note")
|
|
title_selector = selectors.get("title_input")
|
|
if title_selector:
|
|
await self.human_behavior.human_type(page, title_selector, content.title)
|
|
await asyncio.sleep(1)
|
|
|
|
# 填写内容描述
|
|
task_logger.progress("填写内容描述")
|
|
content_selector = selectors.get("content_input")
|
|
if content_selector:
|
|
await self.human_behavior.human_type(page, content_selector, content.description)
|
|
await asyncio.sleep(1)
|
|
|
|
# 添加标签
|
|
if content.tags:
|
|
task_logger.progress("添加标签")
|
|
await self._add_tags(page, content.tags, selectors.get("tag_input"))
|
|
|
|
# 发布笔记
|
|
task_logger.progress("发布笔记")
|
|
publish_success = await self._publish_note(page, task_logger)
|
|
|
|
if publish_success:
|
|
return self.create_publish_result(
|
|
success=True,
|
|
message="图文笔记发布成功",
|
|
task_id=task_logger.task_id,
|
|
account=account_info.username
|
|
)
|
|
else:
|
|
return self.create_publish_result(
|
|
success=False,
|
|
message="图文笔记发布失败",
|
|
task_id=task_logger.task_id,
|
|
account=account_info.username
|
|
)
|
|
|
|
except Exception as e:
|
|
task_logger.failure(f"图文笔记发布异常: {e}")
|
|
return self.create_publish_result(
|
|
success=False,
|
|
message=f"图文笔记发布失败: {str(e)}",
|
|
task_id=task_logger.task_id,
|
|
account=account_info.username,
|
|
error_details={"exception": str(e)}
|
|
)
|
|
|
|
async def _publish_video_note(
|
|
self,
|
|
page: Page,
|
|
content: VideoContent,
|
|
account_info: AccountInfo,
|
|
task_logger
|
|
) -> PublishResult:
|
|
"""发布视频笔记"""
|
|
try:
|
|
task_logger.progress("开始发布视频笔记")
|
|
|
|
# 导航到视频笔记发布页面
|
|
video_note_url = self.config.extra_config.get("video_note_url")
|
|
if video_note_url:
|
|
if not await self.auth.safe_goto(page, video_note_url):
|
|
raise ElementNotFoundError("无法访问视频笔记发布页面")
|
|
await asyncio.sleep(3)
|
|
|
|
# 上传视频
|
|
task_logger.progress("开始上传视频")
|
|
upload_success = await self._upload_video(page, content.video_path, task_logger)
|
|
if not upload_success:
|
|
raise UploadFailedError("视频上传失败")
|
|
|
|
# 填写标题
|
|
task_logger.progress("填写标题")
|
|
selectors = get_selectors(PlatformType.XIAOHONGSHU, "publish", "video_note")
|
|
title_selector = selectors.get("title_input")
|
|
if title_selector:
|
|
await self.human_behavior.human_type(page, title_selector, content.title)
|
|
await asyncio.sleep(1)
|
|
|
|
# 填写内容描述
|
|
task_logger.progress("填写内容描述")
|
|
content_selector = selectors.get("content_input")
|
|
if content_selector:
|
|
await self.human_behavior.human_type(page, content_selector, content.description)
|
|
await asyncio.sleep(1)
|
|
|
|
# 添加标签
|
|
if content.tags:
|
|
task_logger.progress("添加标签")
|
|
await self._add_tags(page, content.tags, selectors.get("tag_input"))
|
|
|
|
# 发布笔记
|
|
task_logger.progress("发布笔记")
|
|
publish_success = await self._publish_note(page, task_logger)
|
|
|
|
if publish_success:
|
|
return self.create_publish_result(
|
|
success=True,
|
|
message="视频笔记发布成功",
|
|
task_id=task_logger.task_id,
|
|
account=account_info.username
|
|
)
|
|
else:
|
|
return self.create_publish_result(
|
|
success=False,
|
|
message="视频笔记发布失败",
|
|
task_id=task_logger.task_id,
|
|
account=account_info.username
|
|
)
|
|
|
|
except Exception as e:
|
|
task_logger.failure(f"视频笔记发布异常: {e}")
|
|
return self.create_publish_result(
|
|
success=False,
|
|
message=f"视频笔记发布失败: {str(e)}",
|
|
task_id=task_logger.task_id,
|
|
account=account_info.username,
|
|
error_details={"exception": str(e)}
|
|
)
|
|
|
|
async def _upload_images(
|
|
self,
|
|
page: Page,
|
|
image_paths: List[str],
|
|
task_logger
|
|
) -> bool:
|
|
"""上传图片"""
|
|
try:
|
|
selectors = get_selectors(PlatformType.XIAOHONGSHU, "publish", "image_note")
|
|
upload_selector = selectors.get("image_upload")
|
|
|
|
if not upload_selector:
|
|
# 尝试其他可能的上传选择器
|
|
alternative_selectors = [
|
|
"input[type='file']",
|
|
"input[accept*='image']",
|
|
"[data-testid='image-upload']",
|
|
".upload-btn input",
|
|
"[class*='upload'] input"
|
|
]
|
|
|
|
for selector in alternative_selectors:
|
|
try:
|
|
element = await page.wait_for_selector(selector, timeout=5000)
|
|
if element:
|
|
upload_selector = selector
|
|
break
|
|
except:
|
|
continue
|
|
|
|
if not upload_selector:
|
|
raise ElementNotFoundError("未找到图片上传元素")
|
|
|
|
# 找到file input元素
|
|
upload_input = await page.wait_for_selector(upload_selector, timeout=10000)
|
|
|
|
if not upload_input:
|
|
raise ElementNotFoundError("图片上传元素不可用")
|
|
|
|
# 上传图片文件
|
|
await upload_input.set_input_files(image_paths)
|
|
task_logger.progress(f"已选择 {len(image_paths)} 张图片")
|
|
|
|
# 等待上传完成
|
|
wait_times = get_wait_times(PlatformType.XIAOHONGSHU, "publish")
|
|
upload_timeout = wait_times.get("image_upload", 30)
|
|
|
|
task_logger.progress("等待图片上传完成")
|
|
upload_complete = await self._wait_for_image_upload(page, upload_timeout)
|
|
|
|
if upload_complete:
|
|
task_logger.progress("图片上传完成")
|
|
return True
|
|
else:
|
|
task_logger.warning("图片上传超时")
|
|
return False
|
|
|
|
except Exception as e:
|
|
task_logger.error(f"图片上传异常: {e}")
|
|
return False
|
|
|
|
async def _upload_video(
|
|
self,
|
|
page: Page,
|
|
video_path: str,
|
|
task_logger
|
|
) -> bool:
|
|
"""上传视频"""
|
|
try:
|
|
selectors = get_selectors(PlatformType.XIAOHONGSHU, "publish", "video_note")
|
|
upload_selector = selectors.get("video_upload")
|
|
|
|
if not upload_selector:
|
|
# 尝试其他可能的上传选择器
|
|
alternative_selectors = [
|
|
"input[type='file']",
|
|
"input[accept*='video']",
|
|
"[data-testid='video-upload']",
|
|
".upload-btn input",
|
|
"[class*='upload'] input"
|
|
]
|
|
|
|
for selector in alternative_selectors:
|
|
try:
|
|
element = await page.wait_for_selector(selector, timeout=5000)
|
|
if element:
|
|
upload_selector = selector
|
|
break
|
|
except:
|
|
continue
|
|
|
|
if not upload_selector:
|
|
raise ElementNotFoundError("未找到视频上传元素")
|
|
|
|
# 找到file input元素
|
|
upload_input = await page.wait_for_selector(upload_selector, timeout=10000)
|
|
|
|
if not upload_input:
|
|
raise ElementNotFoundError("视频上传元素不可用")
|
|
|
|
# 上传视频文件
|
|
await upload_input.set_input_files(video_path)
|
|
task_logger.progress("已选择视频文件")
|
|
|
|
# 等待上传完成
|
|
wait_times = get_wait_times(PlatformType.XIAOHONGSHU, "publish")
|
|
upload_timeout = wait_times.get("video_upload", 120)
|
|
|
|
task_logger.progress("等待视频上传完成")
|
|
upload_complete = await self._wait_for_video_upload(page, upload_timeout)
|
|
|
|
if upload_complete:
|
|
task_logger.progress("视频上传完成")
|
|
return True
|
|
else:
|
|
task_logger.warning("视频上传超时")
|
|
return False
|
|
|
|
except Exception as e:
|
|
task_logger.error(f"视频上传异常: {e}")
|
|
return False
|
|
|
|
async def _wait_for_image_upload(self, page: Page, timeout: int = 30) -> bool:
|
|
"""等待图片上传完成"""
|
|
start_time = asyncio.get_event_loop().time()
|
|
|
|
# 可能的上传完成指示器
|
|
success_indicators = [
|
|
".upload-success",
|
|
"[data-testid='upload-success']",
|
|
".image-uploaded",
|
|
"[class*='success']",
|
|
".upload-complete"
|
|
]
|
|
|
|
while asyncio.get_event_loop().time() - start_time < timeout:
|
|
try:
|
|
# 检查成功指示器
|
|
for indicator in success_indicators:
|
|
try:
|
|
element = await page.wait_for_selector(indicator, timeout=2000)
|
|
if element and await element.is_visible():
|
|
return True
|
|
except:
|
|
continue
|
|
|
|
# 检查是否有错误提示
|
|
error_indicators = [
|
|
".upload-error",
|
|
"[data-testid='upload-error']",
|
|
".error-message",
|
|
"[class*='error']"
|
|
]
|
|
|
|
for indicator in error_indicators:
|
|
try:
|
|
element = await page.wait_for_selector(indicator, timeout=2000)
|
|
if element and await element.is_visible():
|
|
logger.warning(f"检测到上传错误: {indicator}")
|
|
return False
|
|
except:
|
|
continue
|
|
|
|
await asyncio.sleep(2)
|
|
|
|
except Exception:
|
|
await asyncio.sleep(2)
|
|
|
|
return False
|
|
|
|
async def _wait_for_video_upload(self, page: Page, timeout: int = 120) -> bool:
|
|
"""等待视频上传完成"""
|
|
start_time = asyncio.get_event_loop().time()
|
|
|
|
# 可能的上传完成指示器
|
|
success_indicators = [
|
|
".upload-success",
|
|
"[data-testid='upload-success']",
|
|
".video-uploaded",
|
|
"[class*='success']",
|
|
".upload-complete",
|
|
".progress-100"
|
|
]
|
|
|
|
while asyncio.get_event_loop().time() - start_time < timeout:
|
|
try:
|
|
# 检查成功指示器
|
|
for indicator in success_indicators:
|
|
try:
|
|
element = await page.wait_for_selector(indicator, timeout=3000)
|
|
if element and await element.is_visible():
|
|
return True
|
|
except:
|
|
continue
|
|
|
|
# 检查进度条是否达到100%
|
|
progress_selectors = [
|
|
".upload-progress",
|
|
"[data-testid='upload-progress']",
|
|
".progress-bar"
|
|
]
|
|
|
|
for selector in progress_selectors:
|
|
try:
|
|
element = await page.wait_for_selector(selector, timeout=2000)
|
|
if element:
|
|
# 检查进度是否完成
|
|
text = await element.inner_text()
|
|
if "100%" in text or "完成" in text:
|
|
return True
|
|
except:
|
|
continue
|
|
|
|
await asyncio.sleep(3)
|
|
|
|
except Exception:
|
|
await asyncio.sleep(3)
|
|
|
|
return False
|
|
|
|
async def _add_tags(self, page: Page, tags: List[str], tag_selector: Optional[str]):
|
|
"""添加标签"""
|
|
if not tags or not tag_selector:
|
|
return
|
|
|
|
try:
|
|
for tag in tags:
|
|
# 点击标签输入框
|
|
await self.human_behavior.human_click(page, tag_selector)
|
|
await asyncio.sleep(0.5)
|
|
|
|
# 输入标签
|
|
await self.human_behavior.human_type(page, tag_selector, f"#{tag}")
|
|
await asyncio.sleep(0.5)
|
|
|
|
# 按回车确认标签
|
|
await page.keyboard.press("Enter")
|
|
await asyncio.sleep(0.5)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"添加标签失败: {e}")
|
|
|
|
async def _publish_note(self, page: Page, task_logger) -> bool:
|
|
"""发布笔记"""
|
|
try:
|
|
selectors = get_selectors(PlatformType.XIAOHONGSHU, "publish", "image_note")
|
|
publish_button_selector = selectors.get("publish_button")
|
|
|
|
if not publish_button_selector:
|
|
# 尝试其他可能的发布按钮选择器
|
|
alternative_selectors = [
|
|
"button[type='submit']",
|
|
"[data-testid='publish-btn']",
|
|
".publish-btn",
|
|
"button:has-text('发布')",
|
|
"button:has-text('提交')",
|
|
"[class*='publish'] button"
|
|
]
|
|
|
|
for selector in alternative_selectors:
|
|
try:
|
|
element = await page.wait_for_selector(selector, timeout=5000)
|
|
if element and await element.is_visible():
|
|
publish_button_selector = selector
|
|
break
|
|
except:
|
|
continue
|
|
|
|
if not publish_button_selector:
|
|
raise ElementNotFoundError("未找到发布按钮")
|
|
|
|
# 点击发布按钮
|
|
await self.human_behavior.human_click(page, publish_button_selector)
|
|
task_logger.progress("已点击发布按钮")
|
|
|
|
# 等待发布完成
|
|
wait_times = get_wait_times(PlatformType.XIAOHONGSHU, "publish")
|
|
publish_timeout = wait_times.get("publish_success", 10)
|
|
|
|
task_logger.progress("等待发布完成")
|
|
return await self._wait_for_publish_success(page, publish_timeout)
|
|
|
|
except Exception as e:
|
|
task_logger.error(f"发布笔记失败: {e}")
|
|
return False
|
|
|
|
async def _wait_for_publish_success(self, page: Page, timeout: int = 10) -> bool:
|
|
"""等待发布成功"""
|
|
start_time = asyncio.get_event_loop().time()
|
|
|
|
# 可能的成功指示器
|
|
success_indicators = [
|
|
".publish-success",
|
|
"[data-testid='publish-success']",
|
|
".success-message",
|
|
"[class*='success']",
|
|
"发布成功",
|
|
"提交成功"
|
|
]
|
|
|
|
while asyncio.get_event_loop().time() - start_time < timeout:
|
|
try:
|
|
# 检查成功指示器
|
|
for indicator in success_indicators:
|
|
if indicator.startswith('.') or indicator.startswith('['):
|
|
# CSS选择器
|
|
try:
|
|
element = await page.wait_for_selector(indicator, timeout=2000)
|
|
if element and await element.is_visible():
|
|
return True
|
|
except:
|
|
continue
|
|
else:
|
|
# 文本内容
|
|
if await page.locator(f"text={indicator}").count() > 0:
|
|
return True
|
|
|
|
await asyncio.sleep(1)
|
|
|
|
except Exception:
|
|
await asyncio.sleep(1)
|
|
|
|
return False |