mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2025-11-25 03:15:17 +08:00
feat: update xhs sign
This commit is contained in:
@@ -17,7 +17,7 @@ SORT_TYPE = "popularity_descending"
|
|||||||
|
|
||||||
# 指定笔记URL列表, 必须要携带xsec_token参数
|
# 指定笔记URL列表, 必须要携带xsec_token参数
|
||||||
XHS_SPECIFIED_NOTE_URL_LIST = [
|
XHS_SPECIFIED_NOTE_URL_LIST = [
|
||||||
"https://www.xiaohongshu.com/explore/66fad51c000000001b0224b8?xsec_token=AB3rO-QopW5sgrJ41GwN01WCXh6yWPxjSoFI9D5JIMgKw=&xsec_source=pc_search"
|
"https://www.xiaohongshu.com/explore/68f99f6d0000000007033fcf?xsec_token=ABZEzjuN2fPjKF9EcMsCCxfbt3IBRsFZldGFoCJbdDmXI=&xsec_source=pc_feed"
|
||||||
# ........................
|
# ........................
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -10,23 +10,24 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import re
|
import time
|
||||||
from typing import Any, Callable, Dict, List, Optional, Union
|
from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from playwright.async_api import BrowserContext, Page
|
from playwright.async_api import BrowserContext, Page
|
||||||
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_result
|
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||||
|
|
||||||
import config
|
import config
|
||||||
from base.base_crawler import AbstractApiClient
|
from base.base_crawler import AbstractApiClient
|
||||||
from tools import utils
|
from tools import utils
|
||||||
from html import unescape
|
|
||||||
|
|
||||||
from .exception import DataFetchError, IPBlockError
|
from .exception import DataFetchError, IPBlockError
|
||||||
from .field import SearchNoteType, SearchSortType
|
from .field import SearchNoteType, SearchSortType
|
||||||
from .help import get_search_id, sign
|
from .help import get_search_id, sign
|
||||||
from .extractor import XiaoHongShuExtractor
|
from .extractor import XiaoHongShuExtractor
|
||||||
|
from .secsign import seccore_signv2_playwright
|
||||||
|
|
||||||
|
|
||||||
class XiaoHongShuClient(AbstractApiClient):
|
class XiaoHongShuClient(AbstractApiClient):
|
||||||
@@ -63,15 +64,13 @@ class XiaoHongShuClient(AbstractApiClient):
|
|||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
encrypt_params = await self.playwright_page.evaluate(
|
x_s = await seccore_signv2_playwright(self.playwright_page, url, data)
|
||||||
"([url, data]) => window._webmsxyw(url,data)", [url, data]
|
|
||||||
)
|
|
||||||
local_storage = await self.playwright_page.evaluate("() => window.localStorage")
|
local_storage = await self.playwright_page.evaluate("() => window.localStorage")
|
||||||
signs = sign(
|
signs = sign(
|
||||||
a1=self.cookie_dict.get("a1", ""),
|
a1=self.cookie_dict.get("a1", ""),
|
||||||
b1=local_storage.get("b1", ""),
|
b1=local_storage.get("b1", ""),
|
||||||
x_s=encrypt_params.get("X-s", ""),
|
x_s=x_s,
|
||||||
x_t=str(encrypt_params.get("X-t", "")),
|
x_t=str(int(time.time())),
|
||||||
)
|
)
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
|
|||||||
@@ -282,16 +282,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
|||||||
async with semaphore:
|
async with semaphore:
|
||||||
try:
|
try:
|
||||||
utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
|
utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
|
||||||
|
note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True)
|
||||||
try:
|
|
||||||
note_detail = await self.xhs_client.get_note_by_id(note_id, xsec_source, xsec_token)
|
|
||||||
except RetryError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if not note_detail:
|
if not note_detail:
|
||||||
note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True)
|
raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
|
||||||
if not note_detail:
|
|
||||||
raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
|
|
||||||
|
|
||||||
note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source})
|
note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source})
|
||||||
|
|
||||||
|
|||||||
@@ -27,16 +27,17 @@ def sign(a1="", b1="", x_s="", x_t=""):
|
|||||||
"s0": 3, # getPlatformCode
|
"s0": 3, # getPlatformCode
|
||||||
"s1": "",
|
"s1": "",
|
||||||
"x0": "1", # localStorage.getItem("b1b1")
|
"x0": "1", # localStorage.getItem("b1b1")
|
||||||
"x1": "3.7.8-2", # version
|
"x1": "4.2.2", # version
|
||||||
"x2": "Mac OS",
|
"x2": "Mac OS",
|
||||||
"x3": "xhs-pc-web",
|
"x3": "xhs-pc-web",
|
||||||
"x4": "4.27.2",
|
"x4": "4.74.0",
|
||||||
"x5": a1, # cookie of a1
|
"x5": a1, # cookie of a1
|
||||||
"x6": x_t,
|
"x6": x_t,
|
||||||
"x7": x_s,
|
"x7": x_s,
|
||||||
"x8": b1, # localStorage.getItem("b1")
|
"x8": b1, # localStorage.getItem("b1")
|
||||||
"x9": mrc(x_t + x_s + b1),
|
"x9": mrc(x_t + x_s + b1),
|
||||||
"x10": 154, # getSigCount
|
"x10": 154, # getSigCount
|
||||||
|
"x11": "normal"
|
||||||
}
|
}
|
||||||
encode_str = encodeUtf8(json.dumps(common, separators=(',', ':')))
|
encode_str = encodeUtf8(json.dumps(common, separators=(',', ':')))
|
||||||
x_s_common = b64Encode(encode_str)
|
x_s_common = b64Encode(encode_str)
|
||||||
|
|||||||
66
media_platform/xhs/secsign.py
Normal file
66
media_platform/xhs/secsign.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||||
|
# 1. 不得用于任何商业用途。
|
||||||
|
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||||
|
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||||
|
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||||
|
# 5. 不得用于任何非法或不当的用途。
|
||||||
|
#
|
||||||
|
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||||
|
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
def _build_c(e: Any, a: Any) -> str:
|
||||||
|
c = str(e)
|
||||||
|
if isinstance(a, (dict, list)):
|
||||||
|
c += json.dumps(a, separators=(",", ":"), ensure_ascii=False)
|
||||||
|
elif isinstance(a, str):
|
||||||
|
c += a
|
||||||
|
# 其它类型不拼
|
||||||
|
return c
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------
|
||||||
|
# p.Pu = MD5(c) => hex 小写
|
||||||
|
# ---------------------------
|
||||||
|
def _md5_hex(s: str) -> str:
|
||||||
|
return hashlib.md5(s.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Playwright 版本(异步):传入 page(Page 对象)
|
||||||
|
# 内部用 page.evaluate('window.mnsv2(...)')
|
||||||
|
# ============================================================
|
||||||
|
async def seccore_signv2_playwright(
|
||||||
|
page, # Playwright Page
|
||||||
|
e: Any,
|
||||||
|
a: Any,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
使用 Playwright 的 page.evaluate 调用 window.mnsv2(c, d) 来生成签名。
|
||||||
|
需确保 page 上下文中已存在 window.mnsv2(比如已注入目标站点脚本)。
|
||||||
|
|
||||||
|
用法:
|
||||||
|
s = await page.evaluate("(c, d) => window.mnsv2(c, d)", c, d)
|
||||||
|
"""
|
||||||
|
c = _build_c(e, a)
|
||||||
|
d = _md5_hex(c)
|
||||||
|
|
||||||
|
# 调用浏览器上下文里的 window.mnsv2
|
||||||
|
s = await page.evaluate("(c, d) => window.mnsv2(c, d)", [c, d])
|
||||||
|
f = {
|
||||||
|
"x0": "4.2.6",
|
||||||
|
"x1": "xhs-pc-web",
|
||||||
|
"x2": "Mac OS",
|
||||||
|
"x3": s,
|
||||||
|
"x4": a,
|
||||||
|
}
|
||||||
|
payload = json.dumps(f, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
||||||
|
token = "XYS_" + base64.b64encode(payload).decode("ascii")
|
||||||
|
print(token)
|
||||||
|
return token
|
||||||
Reference in New Issue
Block a user