refactor: 使用 xhshow 替代 playwright 签名方案

感谢 @Cloxl/xhshow 开源项目

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
程序员阿江(Relakkes)
2025-11-10 18:12:45 +08:00
parent 54f23b8d1c
commit b5dab6d1e8
6 changed files with 751 additions and 776 deletions

View File

@@ -314,6 +314,7 @@ Nstbrowser 指纹浏览器 — 多账号运营&自动化管理的最佳解决方
## 📚 参考
- **小红书签名仓库**[Cloxl 的 xhs 签名仓库](https://github.com/Cloxl/xhshow)
- **小红书客户端**[ReaJason 的 xhs 仓库](https://github.com/ReaJason/xhs)
- **短信转发**[SmsForwarder 参考仓库](https://github.com/pppscn/SmsForwarder)
- **内网穿透工具**[ngrok 官方文档](https://ngrok.com/docs/)

View File

@@ -17,6 +17,7 @@ from urllib.parse import urlencode
import httpx
from playwright.async_api import BrowserContext, Page
from tenacity import retry, stop_after_attempt, wait_fixed
from xhshow import Xhshow
import config
from base.base_crawler import AbstractApiClient
@@ -27,7 +28,6 @@ from .exception import DataFetchError, IPBlockError
from .field import SearchNoteType, SearchSortType
from .help import get_search_id, sign
from .extractor import XiaoHongShuExtractor
from .secsign import seccore_signv2_playwright
class XiaoHongShuClient(AbstractApiClient):
@@ -53,24 +53,51 @@ class XiaoHongShuClient(AbstractApiClient):
self.playwright_page = playwright_page
self.cookie_dict = cookie_dict
self._extractor = XiaoHongShuExtractor()
# 初始化 xhshow 客户端用于签名生成
self._xhshow_client = Xhshow()
async def _pre_headers(self, url: str, data=None) -> Dict:
"""
请求头参数签名
请求头参数签名,使用 xhshow 库生成签名
Args:
url:
data:
url: 完整的 URIGET 请求包含查询参数)
data: POST 请求的请求体数据
Returns:
"""
x_s = await seccore_signv2_playwright(self.playwright_page, url, data)
local_storage = await self.playwright_page.evaluate("() => window.localStorage")
# 获取 a1 cookie 值
a1_value = self.cookie_dict.get("a1", "")
# 根据请求类型使用不同的签名方法
if data is None:
# GET 请求:从 url 中提取参数
from urllib.parse import urlparse, parse_qs
parsed = urlparse(url)
params = {k: v[0] if len(v) == 1 else v for k, v in parse_qs(parsed.query).items()}
# 使用完整的 URL包含 host
full_url = f"{self._host}{url}"
x_s = self._xhshow_client.sign_xs_get(uri=full_url, a1_value=a1_value, params=params)
else:
# POST 请求:使用 data 作为 payload
full_url = f"{self._host}{url}"
x_s = self._xhshow_client.sign_xs_post(uri=full_url, a1_value=a1_value, payload=data)
# 尝试获取 b1 值(从 localStorage如果获取失败则使用空字符串
b1_value = ""
try:
if self.playwright_page:
local_storage = await self.playwright_page.evaluate("() => window.localStorage")
b1_value = local_storage.get("b1", "")
except Exception as e:
utils.logger.warning(f"[XiaoHongShuClient._pre_headers] Failed to get b1 from localStorage: {e}, using empty string")
# 使用 sign 函数生成其他签名头
signs = sign(
a1=self.cookie_dict.get("a1", ""),
b1=local_storage.get("b1", ""),
a1=a1_value,
b1=b1_value,
x_s=x_s,
x_t=str(int(time.time())),
x_t=str(int(time.time() * 1000)), # x-t 使用毫秒时间戳
)
headers = {

View File

@@ -1,66 +0,0 @@
# -*- coding: utf-8 -*-
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
import hashlib
import base64
import json
from typing import Any
def _build_c(e: Any, a: Any) -> str:
c = str(e)
if isinstance(a, (dict, list)):
c += json.dumps(a, separators=(",", ":"), ensure_ascii=False)
elif isinstance(a, str):
c += a
# 其它类型不拼
return c
# ---------------------------
# p.Pu = MD5(c) => hex 小写
# ---------------------------
def _md5_hex(s: str) -> str:
return hashlib.md5(s.encode("utf-8")).hexdigest()
# ============================================================
# Playwright 版本(异步):传入 pagePage 对象)
# 内部用 page.evaluate('window.mnsv2(...)')
# ============================================================
async def seccore_signv2_playwright(
page, # Playwright Page
e: Any,
a: Any,
) -> str:
"""
使用 Playwright 的 page.evaluate 调用 window.mnsv2(c, d) 来生成签名。
需确保 page 上下文中已存在 window.mnsv2比如已注入目标站点脚本
用法:
s = await page.evaluate("(c, d) => window.mnsv2(c, d)", c, d)
"""
c = _build_c(e, a)
d = _md5_hex(c)
# 调用浏览器上下文里的 window.mnsv2
s = await page.evaluate("(c, d) => window.mnsv2(c, d)", [c, d])
f = {
"x0": "4.2.6",
"x1": "xhs-pc-web",
"x2": "Mac OS",
"x3": s,
"x4": a,
}
payload = json.dumps(f, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
token = "XYS_" + base64.b64encode(payload).decode("ascii")
print(token)
return token

View File

@@ -33,6 +33,7 @@ dependencies = [
"typer>=0.12.3",
"uvicorn==0.29.0",
"wordcloud==1.9.3",
"xhshow>=0.1.3",
]
[[tool.uv.index]]

View File

@@ -25,3 +25,4 @@ alembic>=1.16.5
asyncmy>=0.2.10
sqlalchemy>=2.0.43
motor>=3.3.0
xhshow>=0.1.3

1413
uv.lock generated

File diff suppressed because it is too large Load Diff