commit fbd9c2216ddfe4175e8ce247fb9a5b1ff5eebb4a Author: jinye_huang Date: Wed Aug 27 09:25:17 2025 +0800 init program diff --git a/author_search.py b/author_search.py new file mode 100644 index 0000000..56acb62 --- /dev/null +++ b/author_search.py @@ -0,0 +1,74 @@ +import selenium +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import time +import json +import random + +def grab_herf(driver): + herf_list = driver.find_elements(By.CSS_SELECTOR, "a.cover.mask.ld") + herf_result = [] + for herf in herf_list: + herf_result.append(herf.get_attribute("href")) + return herf_result + +def scroll_page(driver): + driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + time.sleep(random.random()*2) + return True + + + +def main(): + driver = webdriver.Safari() + # Url = "https://www.xiaohongshu.com/search_result?keyword=%25E6%2597%2585%25E6%25B8%25B8%25E6%2594%25BB%25E7%2595%25A5&source=web_explore_feed" + # Url = "https://www.xiaohongshu.com/user/profile/5b0c19d2e8ac2b4f75c6cad2?xsec_token=ABTQo4AIGKlpW474JBfIZcd8Ln8DQr1Ugt_Yk-w3h1bzs%3D&xsec_source=pc_search" + Url = "https://www.xiaohongshu.com/user/profile/5ac968dbe8ac2b0a5581b909?xsec_token=YBG4U7EYzyAxyWC5H0HQKip9JT9B9KxrA43gWtkA3mi1A%3D&xsec_source=app_share&xhsshare=WeixinSession&appuid=6326ddf20000000023024438&apptime=1741862947&share_id=5da7602111cb431ca55a0dcfd6b20bc3&share_channel=wechat" + Url = "https://www.xiaohongshu.com/user/profile/663057ef000000001e0060c9?xsec_token=YB1i6fylSYGpHg1gVgG5KZohqjJvDmwR96wwU_AQxdRrI=&xsec_source=app_share&xhsshare=WeixinSession&appuid=6326ddf20000000023024438&apptime=1741863000&share_id=b53d29eb6ce144c4b2d99035e4369d55&share_channel=wechat&wechatWid=cb745eed2e2630a361f9ce99520d9c9a&wechatOrigin=menu" + cookies = json.load(open("/Users/yarrow/Spider/seleniumXH/cookies.json", "r")) + for cookie in cookies: + cookie['sameSite'] = 'Strict' # 或者根据需要设置为'Strict'或'None' + driver.add_cookie(cookie) + ## 连接网页 + driver.get(Url) + driver.maximize_window() + ## 等待页面加载 + # WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "search_result"))) + time.sleep(1.5) + ## 最新 tag + ##
综合
+ ## mousecenter 选最热 + ## 点击最热 + # driver.find_element(By.CSS_SELECTOR, ".filter").click() + ## 等待页面 + # WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".filter-item"))) + ## 点击最热 + ## 选择第一个 + # driver.find_element(By.CSS_SELECTOR, ".filter-item").click() + herf_result = [] + # 抓取页面中的所有 + # + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.cover.mask.ld"))) + i = 0 + while True: + herf_result.extend(grab_herf(driver)) + scroll_page(driver) + print(herf_result) + with open("herf_result.json", "w") as f: + json.dump(herf_result, f) + time.sleep(random.random()) + i += 1 + if i > 100: + break + + driver.quit() + + + + ## 下滑 抓取链接 + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cookies.json b/cookies.json new file mode 100644 index 0000000..4215122 --- /dev/null +++ b/cookies.json @@ -0,0 +1,234 @@ +[ + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "loadts", + "expirationDate": 1779290711, + "httpOnly": false, + "storeId": null, + "value": "1747754711624", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "xsecappid", + "expirationDate": 1779290722, + "httpOnly": false, + "storeId": null, + "value": "ugc", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "x-user-id-creator.xiaohongshu.com", + "expirationDate": 1748359522, + "httpOnly": true, + "storeId": null, + "value": "63e126670000000026010b38", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "websectiga", + "expirationDate": 1748013906, + "httpOnly": false, + "storeId": null, + "value": "2a3d3ea002e7d92b5c9743590ebd24010cf3710ff3af8029153751e41a6af4a3", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "galaxy_creator_session_id", + "expirationDate": 1748359522, + "httpOnly": true, + "storeId": null, + "value": "6j9dY9sRYxLZyJ4kkWstiHYcgXKRc3Ey6xlJ", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "webBuild", + "httpOnly": false, + "storeId": null, + "value": "4.62.3", + "session": true, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "galaxy.creator.beaker.session.id", + "expirationDate": 1748359522, + "httpOnly": true, + "storeId": null, + "value": "1747754722335059314628", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "sec_poison_id", + "expirationDate": 1747755311, + "httpOnly": false, + "storeId": null, + "value": "2b6e92d1-152e-4037-a7bf-ee39288f96ec", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": true, + "hostOnly": false, + "name": "web_session", + "expirationDate": 1778915347, + "httpOnly": true, + "storeId": null, + "value": "040069b295652fcb5b6efaac133a4b4d33de2b", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "gid", + "expirationDate": 1748359516, + "httpOnly": false, + "storeId": null, + "value": "yjKdd44jJjCKyjKdd44qiJ4yKq3I38Dx71yV8l73j6YFixq8ETUl6l888J2YYjy8Di0qSif2", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "a1", + "expirationDate": 1779290441, + "httpOnly": false, + "storeId": null, + "value": "196ee485c98k24abinkkxhljpritrl9fxglkt9o7c30000331929", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "abRequestId", + "expirationDate": 1779290182, + "httpOnly": false, + "storeId": null, + "value": "00cc8c0b-f731-52f6-b667-dacaeca1f7e2", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "access-token-creator.xiaohongshu.com", + "expirationDate": 1748359522, + "httpOnly": true, + "storeId": null, + "value": "customer.creator.AT-68c5175065493727485665796ag6hmazddetdq91", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": "creator.xiaohongshu.com", + "secure": false, + "hostOnly": true, + "name": "acw_tc", + "expirationDate": 1747756505, + "httpOnly": true, + "storeId": null, + "value": "0a0d096b17477547051863398e5a52460db5290a4670ce5b26b9e218778601", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "customer-sso-sid", + "expirationDate": 1748359521, + "httpOnly": true, + "storeId": null, + "value": "68c517506549372748883335ojrtogdsaqqck680", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "customerClientId", + "expirationDate": 1748359522, + "httpOnly": true, + "storeId": null, + "value": "554166055186792", + "session": false, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "unread", + "httpOnly": false, + "storeId": null, + "value": "{%22ub%22:%2268221899000000002202fa33%22%2C%22ue%22:%2268240ac4000000002100f82b%22%2C%22uc%22:27}", + "session": true, + "path": "/", + "sameSite": "no_restriction" + }, + { + "domain": ".xiaohongshu.com", + "secure": false, + "hostOnly": false, + "name": "webId", + "expirationDate": 1779290441, + "httpOnly": false, + "storeId": null, + "value": "39c122436dd1861d269cd3bb18901a4e", + "session": false, + "path": "/", + "sameSite": "no_restriction" + } +] \ No newline at end of file diff --git a/req_search.py b/req_search.py new file mode 100644 index 0000000..ae8aba7 --- /dev/null +++ b/req_search.py @@ -0,0 +1,72 @@ +import selenium +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import time +import json +import random + +def grab_herf(driver): + herf_list = driver.find_elements(By.CSS_SELECTOR, "a.cover.mask.ld") + herf_result = [] + for herf in herf_list: + herf_result.append(herf.get_attribute("href")) + return herf_result + +def scroll_page(driver): + driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + time.sleep(random.random()*2) + return True + + + +def main(): + driver = webdriver.Safari() + # Url = "https://www.xiaohongshu.com/search_result?keyword=%25E5%2591%25A8%25E6%259C%25AB%25E5%258E%25BB%25E5%2593%25AA%25E7%258E%25A9&source=web_search_result_notes" + Url = "https://www.xiaohongshu.com/search_result?keyword=%25E6%2597%2585%25E6%25B8%25B8%25E6%2594%25BB%25E7%2595%25A5&source=web_explore_feed" + cookies = json.load(open("/Users/yarrow/Spider/seleniumXH/cookies.json", "r")) + for cookie in cookies: + cookie['sameSite'] = 'Strict' # 或者根据需要设置为'Strict'或'None' + driver.add_cookie(cookie) + ## 连接网页 + driver.get(Url) + driver.maximize_window() + ## 等待页面加载 + # WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "search_result"))) + time.sleep(1.5) + ## 最新 tag + ##
综合
+ ## mousecenter 选最热 + ## 点击最热 + # driver.find_element(By.CSS_SELECTOR, ".filter").click() + ## 等待页面 + # WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".filter-item"))) + ## 点击最热 + ## 选择第一个 + # driver.find_element(By.CSS_SELECTOR, ".filter-item").click() + herf_result = [] + # 抓取页面中的所有 + #
+ WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a.cover.mask.ld"))) + i = 0 + while True: + herf_result.extend(grab_herf(driver)) + scroll_page(driver) + print(herf_result) + with open("herf_result.json", "w") as f: + json.dump(herf_result, f) + time.sleep(random.random()) + i += 1 + if i > 100: + break + + driver.quit() + + + + ## 下滑 抓取链接 + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/req_xhsqrcode.py b/req_xhsqrcode.py new file mode 100644 index 0000000..cf357a8 --- /dev/null +++ b/req_xhsqrcode.py @@ -0,0 +1,68 @@ +import requests +import selenium +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import time +import json +import base64 + +if __name__ == "__main__": + url = "https://creator.xiaohongshu.com/login" + + driver = webdriver.Safari() + driver.maximize_window() + driver.get(url) + time.sleep(5) + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//img[@class='css-wemwzq']"))) + driver.find_element(By.XPATH, "//img[@class='css-wemwzq']").click() + # time.sleep(10) + + # save img + ## 等待跳转 + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "css-1lhmg90"))) + # 获取图片 + time.sleep(3) + img_element = driver.find_element(By.CLASS_NAME, "css-1lhmg90") + img_src = img_element.get_attribute("src") + print(f"img_src: {img_src}") + img_data = base64.b64decode(img_src.split(",")[1]) + print(img_data) + with open("qrcode.png", "wb") as f: + f.write(img_data) + + # 打印提示信息,请用户扫描二维码 + print("二维码已保存为 qrcode.png,请使用小红书 App 扫描登录") + print("等待登录中...") + + # 保存当前 URL 用于检测页面跳转 + current_url = driver.current_url + + # 等待页面跳转(登录成功) + # 方法1:检测 URL 变化 + try: + # 使用一个较长的超时时间,给用户足够时间扫码 + WebDriverWait(driver, 120).until(lambda d: d.current_url != current_url) + print("检测到页面跳转,登录成功!") + except: + # 如果 URL 没有变化,尝试方法2 + print("URL 没有变化,尝试检测登录状态元素...") + + # 方法2:检测登录后特定元素的出现 + # try: + # # 例如,检测用户头像或其他登录后才会出现的元素 + # # 这里以用户头像为例,需要根据实际网页元素调整 + # WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CLASS_NAME, "user-avatar"))) + # print("检测到用户登录元素,登录成功!") + # except: + # print("登录超时或失败,请重试") + + # 登录成功后,保存 cookies + cookies = driver.get_cookies() + with open("cookies_xhs.json", "w") as f: + json.dump(cookies, f, indent=4) + print(f"已保存 cookies 到 cookies_xhs.json 文件") + + # 关闭浏览器 + driver.quit() diff --git a/xhs_articlepub.py b/xhs_articlepub.py new file mode 100644 index 0000000..dbf64a2 --- /dev/null +++ b/xhs_articlepub.py @@ -0,0 +1,153 @@ +import selenium +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +import json +import base64 +class xhsPublisher(): + def __init__(self, driver, cookies_path): + self.driver = driver + self.cookies_path = cookies_path + self.cookies = json.loads(open(self.cookies_path, "r").read()) + self.load_cookies() + + self.WEB_URL = "https://creator.xiaohongshu.com/publish/publish?from=menu" + + def load_cookies(self): + for cookie in self.cookies: + # 确保每个cookie都有'sameSite'属性 + if True: + # if 'sameSite' not in cookie: + cookie['sameSite'] = 'Strict' # 或者根据需要设置为'Strict'或'None' + + self.driver.add_cookie(cookie) + return True + + def open_web(self): + self.driver.get(self.WEB_URL) + return True + + def select_upload_type(self, upload_type): + ## 这里没改好 + if upload_type == "image": + # 图文 + self.driver.find_element(By.XPATH, "//div[@data-v-16fdb080 and @data-v-a964f0b4 and @class='creator-tab active']").click() + elif upload_type == "video": + # 视频 + self.driver.find_element(By.XPATH, "//div[@data-v-7cbccdb2 and @data-v-08fc0cfe and @class='drag-over']").click() + return True + + def upload_image(self, image_path): + image_base64 = self.transbase64(image_path) + self.driver.find_element(By.XPATH, "//input[@data-v-4fabe6c5 and @data-v-7cbccdb2-s and @class='upload-input']").send_keys(image_base64) + return True + + def upload_video(self, video_path): + video_base64 = self.transbase64(video_path) + self.driver.find_element(By.XPATH, "//input[@data-v-4fabe6c5 and @data-v-7cbccdb2-s and @class='upload-input']").send_keys(video_base64) + return True + + def transbase64(self, file_path): + with open(file_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode('utf-8') + + def publish(self, title, content): + self.driver.find_element(By.XPATH, "//input[@class='d-text']").send_keys(title) + self.driver.find_element(By.XPATH, "//div[@class='ql-editor ql-blank']").send_keys(content) + return True + + def click_publish(self): + self.driver.find_element(By.XPATH, "//button[@data-v-34b0c0bc and @data-v-9bfbb062 and @data-v-1fed608d-s and @type='button' and @class='d-button d-button-large --size-icon-large --size-text-h6 d-button-with-content --color-static bold --color-bg-fill --color-text-paragraph custom-button red publishBtn' and @data-eaglet-imp='true']").click() + return True + + def publish_article(self, title, content, image_path, video_path): + self.select_upload_type("image") + self.upload_image(image_path) + self.publish(title, content) + self.click_publish() + return True + + def publish_video(self, title, content, video_path): + self.select_upload_type("video") + self.upload_video(video_path) + self.publish(title, content) + self.click_publish() + return True + + def work(self, type, title, content, image_path, video_path): + try: + self.open_web() + self.driver.maximize_window() + self.driver.implicitly_wait(500) + if type == "article": + self.publish_article(title, content, image_path, video_path) + elif type == "video": + self.publish_video(title, content, video_path) + else: + raise ValueError("Invalid type") + self.driver.quit() + except Exception as e: + print(e) + finally: + self.driver.quit() + return True + +if __name__ == "__main__": + driver = webdriver.Safari() + # 设置浏览器全屏 + driver.maximize_window() + # 设置模拟位置(经度和纬度) + driver.execute_script("navigator.geolocation.getCurrentPosition = function(success) {" + "success({coords: {latitude: 37.7749, longitude: -122.4194}});" + "};") + + cookies_path = "/Users/yarrow/Spider/seleniumXH/cookies.json" + cookies = json.loads(open(cookies_path, "r").read()) + # print(cookies) + for cookie in cookies: + # 确保每个cookie都有'sameSite'属性 + if True: + # if 'sameSite' not in cookie: + cookie['sameSite'] = 'Strict' # 或者根据需要设置为'Strict'或'None' + + driver.add_cookie(cookie) + + driver.get("https://creator.xiaohongshu.com/publish/publish?from=menu") + # input("press any key to quit") + driver.implicitly_wait(3000) + ## 图文 + #
上传图文
+ driver.find_element(By.XPATH, "//div[@data-v-16fdb080 and @data-v-a964f0b4 and @class='creator-tab']").click() + # + # 视频 ##

拖拽视频到此或点击上传

+ driver.find_element(By.XPATH, "//input[@data-v-4fabe6c5 and @data-v-7cbccdb2-s and @class='upload-input']").send_keys("/Users/yarrow/Spider/cableCar.jpg") + print("send keys /Users/yarrow/Spider/cableCar.jpg") + driver.implicitly_wait(10) + print("wait 10 seconds") + # 标题 + # + driver.find_element(By.XPATH, "//input[@class='d-text']").send_keys("缆车") + print("send keys 缆车") + # 正文 + #


+ driver.find_element(By.XPATH, "//div[@class='ql-editor ql-blank']").send_keys("""缆车 \n + #周末去哪儿玩 + """) + print("send keys 缆车") + # 发布 + + # + # + # 发布 + driver.find_element(By.XPATH, "//button[@data-v-34b0c0bc and @data-v-9bfbb062 and @data-v-1fed608d-s and @type='button' and @class='d-button d-button-large --size-icon-large --size-text-h6 d-button-with-content --color-static bold --color-bg-fill --color-text-paragraph custom-button red publishBtn' and @data-eaglet-imp='true']").click() + ## 找到span内容为发布的 + # driver.find_element(By.XPATH, "//span[@class='d-text --color-static --color-current --size-text-paragraph d-text-nowrap d-text-ellipsis d-text-nowrap' and @style='text-underline-offset: auto;']").click() + driver.implicitly_wait(500) + input("press any key to quit") + driver.quit() + + ## button + +