feat: update xhs sign

This commit is contained in:
程序员阿江(Relakkes)
2025-10-27 19:06:07 +08:00
parent ed6e0bfb5f
commit 3f5925e326
5 changed files with 79 additions and 20 deletions

View File

@@ -17,7 +17,7 @@ SORT_TYPE = "popularity_descending"
# 指定笔记URL列表, 必须要携带xsec_token参数 # 指定笔记URL列表, 必须要携带xsec_token参数
XHS_SPECIFIED_NOTE_URL_LIST = [ XHS_SPECIFIED_NOTE_URL_LIST = [
"https://www.xiaohongshu.com/explore/66fad51c000000001b0224b8?xsec_token=AB3rO-QopW5sgrJ41GwN01WCXh6yWPxjSoFI9D5JIMgKw=&xsec_source=pc_search" "https://www.xiaohongshu.com/explore/68f99f6d0000000007033fcf?xsec_token=ABZEzjuN2fPjKF9EcMsCCxfbt3IBRsFZldGFoCJbdDmXI=&xsec_source=pc_feed"
# ........................ # ........................
] ]

View File

@@ -10,23 +10,24 @@
import asyncio import asyncio
import json import json
import re import time
from typing import Any, Callable, Dict, List, Optional, Union from typing import Any, Callable, Dict, List, Optional, Union
from urllib.parse import urlencode from urllib.parse import urlencode
import httpx import httpx
from playwright.async_api import BrowserContext, Page from playwright.async_api import BrowserContext, Page
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_result from tenacity import retry, stop_after_attempt, wait_fixed
import config import config
from base.base_crawler import AbstractApiClient from base.base_crawler import AbstractApiClient
from tools import utils from tools import utils
from html import unescape
from .exception import DataFetchError, IPBlockError from .exception import DataFetchError, IPBlockError
from .field import SearchNoteType, SearchSortType from .field import SearchNoteType, SearchSortType
from .help import get_search_id, sign from .help import get_search_id, sign
from .extractor import XiaoHongShuExtractor from .extractor import XiaoHongShuExtractor
from .secsign import seccore_signv2_playwright
class XiaoHongShuClient(AbstractApiClient): class XiaoHongShuClient(AbstractApiClient):
@@ -63,15 +64,13 @@ class XiaoHongShuClient(AbstractApiClient):
Returns: Returns:
""" """
encrypt_params = await self.playwright_page.evaluate( x_s = await seccore_signv2_playwright(self.playwright_page, url, data)
"([url, data]) => window._webmsxyw(url,data)", [url, data]
)
local_storage = await self.playwright_page.evaluate("() => window.localStorage") local_storage = await self.playwright_page.evaluate("() => window.localStorage")
signs = sign( signs = sign(
a1=self.cookie_dict.get("a1", ""), a1=self.cookie_dict.get("a1", ""),
b1=local_storage.get("b1", ""), b1=local_storage.get("b1", ""),
x_s=encrypt_params.get("X-s", ""), x_s=x_s,
x_t=str(encrypt_params.get("X-t", "")), x_t=str(int(time.time())),
) )
headers = { headers = {

View File

@@ -282,16 +282,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
async with semaphore: async with semaphore:
try: try:
utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}") utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True)
try:
note_detail = await self.xhs_client.get_note_by_id(note_id, xsec_source, xsec_token)
except RetryError:
pass
if not note_detail: if not note_detail:
note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True) raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
if not note_detail:
raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source}) note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source})

View File

@@ -27,16 +27,17 @@ def sign(a1="", b1="", x_s="", x_t=""):
"s0": 3, # getPlatformCode "s0": 3, # getPlatformCode
"s1": "", "s1": "",
"x0": "1", # localStorage.getItem("b1b1") "x0": "1", # localStorage.getItem("b1b1")
"x1": "3.7.8-2", # version "x1": "4.2.2", # version
"x2": "Mac OS", "x2": "Mac OS",
"x3": "xhs-pc-web", "x3": "xhs-pc-web",
"x4": "4.27.2", "x4": "4.74.0",
"x5": a1, # cookie of a1 "x5": a1, # cookie of a1
"x6": x_t, "x6": x_t,
"x7": x_s, "x7": x_s,
"x8": b1, # localStorage.getItem("b1") "x8": b1, # localStorage.getItem("b1")
"x9": mrc(x_t + x_s + b1), "x9": mrc(x_t + x_s + b1),
"x10": 154, # getSigCount "x10": 154, # getSigCount
"x11": "normal"
} }
encode_str = encodeUtf8(json.dumps(common, separators=(',', ':'))) encode_str = encodeUtf8(json.dumps(common, separators=(',', ':')))
x_s_common = b64Encode(encode_str) x_s_common = b64Encode(encode_str)

View File

@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
import hashlib
import base64
import json
from typing import Any
def _build_c(e: Any, a: Any) -> str:
c = str(e)
if isinstance(a, (dict, list)):
c += json.dumps(a, separators=(",", ":"), ensure_ascii=False)
elif isinstance(a, str):
c += a
# 其它类型不拼
return c
# ---------------------------
# p.Pu = MD5(c) => hex 小写
# ---------------------------
def _md5_hex(s: str) -> str:
return hashlib.md5(s.encode("utf-8")).hexdigest()
# ============================================================
# Playwright 版本(异步):传入 pagePage 对象)
# 内部用 page.evaluate('window.mnsv2(...)')
# ============================================================
async def seccore_signv2_playwright(
page, # Playwright Page
e: Any,
a: Any,
) -> str:
"""
使用 Playwright 的 page.evaluate 调用 window.mnsv2(c, d) 来生成签名。
需确保 page 上下文中已存在 window.mnsv2比如已注入目标站点脚本
用法:
s = await page.evaluate("(c, d) => window.mnsv2(c, d)", c, d)
"""
c = _build_c(e, a)
d = _md5_hex(c)
# 调用浏览器上下文里的 window.mnsv2
s = await page.evaluate("(c, d) => window.mnsv2(c, d)", [c, d])
f = {
"x0": "4.2.6",
"x1": "xhs-pc-web",
"x2": "Mac OS",
"x3": s,
"x4": a,
}
payload = json.dumps(f, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
token = "XYS_" + base64.b64encode(payload).decode("ascii")
print(token)
return token