refactor: add xhs creator params

refactor: update xhs note detail
refactor: 使用 xhshow 替代 playwright 签名方案
2025-11-25 03:15:17 +08:00 · 2025-11-10 21:10:03 +08:00 · 2025-11-10 18:13:51 +08:00 · 2025-11-10 18:12:45 +08:00
8 changed files with 780 additions and 796 deletions
--- a/README.md
+++ b/README.md
@@ -314,6 +314,7 @@ Nstbrowser 指纹浏览器 — 多账号运营&自动化管理的最佳解决方

 ## 📚 参考

+- **小红书签名仓库**：[Cloxl 的 xhs 签名仓库](https://github.com/Cloxl/xhshow)
 - **小红书客户端**：[ReaJason 的 xhs 仓库](https://github.com/ReaJason/xhs)
 - **短信转发**：[SmsForwarder 参考仓库](https://github.com/pppscn/SmsForwarder)
 - **内网穿透工具**：[ngrok 官方文档](https://ngrok.com/docs/)
--- a/config/xhs_config.py
+++ b/config/xhs_config.py
@@ -17,16 +17,13 @@ SORT_TYPE = "popularity_descending"

 # 指定笔记URL列表, 必须要携带xsec_token参数
 XHS_SPECIFIED_NOTE_URL_LIST = [
-    "https://www.xiaohongshu.com/explore/68f99f6d0000000007033fcf?xsec_token=ABZEzjuN2fPjKF9EcMsCCxfbt3IBRsFZldGFoCJbdDmXI=&xsec_source=pc_feed"
+    "https://www.xiaohongshu.com/explore/64b95d01000000000c034587?xsec_token=AB0EFqJvINCkj6xOCKCQgfNNh8GdnBC_6XecG4QOddo3Q=&xsec_source=pc_cfeed"
    # ........................
 ]

-# 指定创作者URL列表 (支持完整URL或纯ID)
-# 支持格式:
-# 1. 完整创作者主页URL (带xsec_token和xsec_source参数): "https://www.xiaohongshu.com/user/profile/5eb8e1d400000000010075ae?xsec_token=AB1nWBKCo1vE2HEkfoJUOi5B6BE5n7wVrbdpHoWIj5xHw=&xsec_source=pc_feed"
-# 2. 纯user_id: "63e36c9a000000002703502b"
+# 指定创作者URL列表，需要携带xsec_token和xsec_source参数
+
 XHS_CREATOR_ID_LIST = [
-    "https://www.xiaohongshu.com/user/profile/5eb8e1d400000000010075ae?xsec_token=AB1nWBKCo1vE2HEkfoJUOi5B6BE5n7wVrbdpHoWIj5xHw=&xsec_source=pc_feed",
-    "63e36c9a000000002703502b",    
+    "https://www.xiaohongshu.com/user/profile/5f58bd990000000001003753?xsec_token=ABYVg1evluJZZzpMX-VWzchxQ1qSNVW3r-jOEnKqMcgZw=&xsec_source=pc_search"
    # ........................
 ]
--- a/media_platform/xhs/client.py
+++ b/media_platform/xhs/client.py
@@ -17,6 +17,7 @@ from urllib.parse import urlencode
 import httpx
 from playwright.async_api import BrowserContext, Page
 from tenacity import retry, stop_after_attempt, wait_fixed
+from xhshow import Xhshow

 import config
 from base.base_crawler import AbstractApiClient
@@ -27,7 +28,6 @@ from .exception import DataFetchError, IPBlockError
 from .field import SearchNoteType, SearchSortType
 from .help import get_search_id, sign
 from .extractor import XiaoHongShuExtractor
-from .secsign import seccore_signv2_playwright


 class XiaoHongShuClient(AbstractApiClient):
@@ -53,24 +53,51 @@ class XiaoHongShuClient(AbstractApiClient):
        self.playwright_page = playwright_page
        self.cookie_dict = cookie_dict
        self._extractor = XiaoHongShuExtractor()
+        # 初始化 xhshow 客户端用于签名生成
+        self._xhshow_client = Xhshow()

    async def _pre_headers(self, url: str, data=None) -> Dict:
        """
-        请求头参数签名
+        请求头参数签名，使用 xhshow 库生成签名
        Args:
-            url:
-            data:
+            url: 完整的 URI（GET 请求包含查询参数）
+            data: POST 请求的请求体数据

        Returns:

        """
-        x_s = await seccore_signv2_playwright(self.playwright_page, url, data)
-        local_storage = await self.playwright_page.evaluate("() => window.localStorage")
+        # 获取 a1 cookie 值
+        a1_value = self.cookie_dict.get("a1", "")
+
+        # 根据请求类型使用不同的签名方法
+        if data is None:
+            # GET 请求：从 url 中提取参数
+            from urllib.parse import urlparse, parse_qs
+            parsed = urlparse(url)
+            params = {k: v[0] if len(v) == 1 else v for k, v in parse_qs(parsed.query).items()}
+            # 使用完整的 URL（包含 host）
+            full_url = f"{self._host}{url}"
+            x_s = self._xhshow_client.sign_xs_get(uri=full_url, a1_value=a1_value, params=params)
+        else:
+            # POST 请求：使用 data 作为 payload
+            full_url = f"{self._host}{url}"
+            x_s = self._xhshow_client.sign_xs_post(uri=full_url, a1_value=a1_value, payload=data)
+
+        # 尝试获取 b1 值（从 localStorage），如果获取失败则使用空字符串
+        b1_value = ""
+        try:
+            if self.playwright_page:
+                local_storage = await self.playwright_page.evaluate("() => window.localStorage")
+                b1_value = local_storage.get("b1", "")
+        except Exception as e:
+            utils.logger.warning(f"[XiaoHongShuClient._pre_headers] Failed to get b1 from localStorage: {e}, using empty string")
+
+        # 使用 sign 函数生成其他签名头
        signs = sign(
-            a1=self.cookie_dict.get("a1", ""),
-            b1=local_storage.get("b1", ""),
+            a1=a1_value,
+            b1=b1_value,
            x_s=x_s,
-            x_t=str(int(time.time())),
+            x_t=str(int(time.time() * 1000)),  # x-t 使用毫秒时间戳
        )

        headers = {
@@ -115,7 +142,8 @@ class XiaoHongShuClient(AbstractApiClient):
        elif data["code"] == self.IP_ERROR_CODE:
            raise IPBlockError(self.IP_ERROR_STR)
        else:
-            raise DataFetchError(data.get("msg", None))
+            err_msg = data.get("msg", None) or f"{response.text}"
+            raise DataFetchError(err_msg)

    async def get(self, uri: str, params=None) -> Dict:
        """
@@ -480,6 +508,8 @@ class XiaoHongShuClient(AbstractApiClient):
        creator: str,
        cursor: str,
        page_size: int = 30,
+        xsec_token: str = "",
+        xsec_source: str = "pc_feed",
    ) -> Dict:
        """
        获取博主的笔记
@@ -487,24 +517,22 @@ class XiaoHongShuClient(AbstractApiClient):
            creator: 博主ID
            cursor: 上一页最后一条笔记的ID
            page_size: 分页数据长度
+            xsec_token: 验证token
+            xsec_source: 渠道来源

        Returns:

        """
-        uri = "/api/sns/web/v1/user_posted"
-        data = {
-            "user_id": creator,
-            "cursor": cursor,
-            "num": page_size,
-            "image_formats": "jpg,webp,avif",
-        }
-        return await self.get(uri, data)
+        uri = f"/api/sns/web/v1/user_posted?num={page_size}&cursor={cursor}&user_id={creator}&xsec_token={xsec_token}&xsec_source={xsec_source}"
+        return await self.get(uri)

    async def get_all_notes_by_creator(
        self,
        user_id: str,
        crawl_interval: float = 1.0,
        callback: Optional[Callable] = None,
+        xsec_token: str = "",
+        xsec_source: str = "pc_feed",
    ) -> List[Dict]:
        """
        获取指定用户下的所有发过的帖子，该方法会一直查找一个用户下的所有帖子信息
@@ -512,6 +540,8 @@ class XiaoHongShuClient(AbstractApiClient):
            user_id: 用户ID
            crawl_interval: 爬取一次的延迟单位（秒）
            callback: 一次分页爬取结束后的更新回调函数
+            xsec_token: 验证token
+            xsec_source: 渠道来源

        Returns:

@@ -520,7 +550,7 @@ class XiaoHongShuClient(AbstractApiClient):
        notes_has_more = True
        notes_cursor = ""
        while notes_has_more and len(result) < config.CRAWLER_MAX_NOTES_COUNT:
-            notes_res = await self.get_notes_by_creator(user_id, notes_cursor)
+            notes_res = await self.get_notes_by_creator(user_id, notes_cursor, xsec_token=xsec_token, xsec_source=xsec_source)
            if not notes_res:
                utils.logger.error(
                    f"[XiaoHongShuClient.get_notes_by_creator] The current creator may have been banned by xhs, so they cannot access the data."
--- a/media_platform/xhs/core.py
+++ b/media_platform/xhs/core.py
@@ -201,6 +201,8 @@ class XiaoHongShuCrawler(AbstractCrawler):
                user_id=user_id,
                crawl_interval=crawl_interval,
                callback=self.fetch_creator_notes_detail,
+                xsec_token=creator_info.xsec_token,
+                xsec_source=creator_info.xsec_source,
            )

            note_ids = []
@@ -279,12 +281,19 @@ class XiaoHongShuCrawler(AbstractCrawler):
            Dict: note detail
        """
        note_detail = None
+        utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
        async with semaphore:
            try:
-                utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
-                note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True)
+                try:
+                    note_detail = await self.xhs_client.get_note_by_id(note_id, xsec_source, xsec_token)
+                except RetryError:
+                    pass
+
                if not note_detail:
-                    raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
+                    note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token,
+                                                                                 enable_cookie=True)
+                    if not note_detail:
+                        raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")

                note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source})
                
--- a/media_platform/xhs/secsign.py
+++ b/media_platform/xhs/secsign.py
@@ -1,66 +0,0 @@
-# -*- coding: utf-8 -*-
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
-# 1. 不得用于任何商业用途。
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
-# 3. 不得进行大规模爬取或对平台造成运营干扰。
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
-# 5. 不得用于任何非法或不当的用途。
-#
-# 详细许可条款请参阅项目根目录下的LICENSE文件。
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
-
-import hashlib
-import base64
-import json
-from typing import Any
-
-def _build_c(e: Any, a: Any) -> str:
-    c = str(e)
-    if isinstance(a, (dict, list)):
-        c += json.dumps(a, separators=(",", ":"), ensure_ascii=False)
-    elif isinstance(a, str):
-        c += a
-    # 其它类型不拼
-    return c
-
-
-# ---------------------------
-# p.Pu = MD5(c) => hex 小写
-# ---------------------------
-def _md5_hex(s: str) -> str:
-    return hashlib.md5(s.encode("utf-8")).hexdigest()
-
-
-
-# ============================================================
-# Playwright 版本（异步）：传入 page（Page 对象）
-#    内部用 page.evaluate('window.mnsv2(...)')
-# ============================================================
-async def seccore_signv2_playwright(
-    page,  # Playwright Page
-    e: Any,
-    a: Any,
-) -> str:
-    """
-    使用 Playwright 的 page.evaluate 调用 window.mnsv2(c, d) 来生成签名。
-    需确保 page 上下文中已存在 window.mnsv2（比如已注入目标站点脚本）。
-
-    用法：
-      s = await page.evaluate("(c, d) => window.mnsv2(c, d)", c, d)
-    """
-    c = _build_c(e, a)
-    d = _md5_hex(c)
-
-    # 调用浏览器上下文里的 window.mnsv2
-    s = await page.evaluate("(c, d) => window.mnsv2(c, d)", [c, d])
-    f = {
-        "x0": "4.2.6",
-        "x1": "xhs-pc-web",
-        "x2": "Mac OS",
-        "x3": s,
-        "x4": a,
-    }
-    payload = json.dumps(f, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
-    token = "XYS_" + base64.b64encode(payload).decode("ascii")
-    print(token)
-    return token
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
    "typer>=0.12.3",
    "uvicorn==0.29.0",
    "wordcloud==1.9.3",
+    "xhshow>=0.1.3",
 ]

 [[tool.uv.index]]
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,3 +25,4 @@ alembic>=1.16.5
 asyncmy>=0.2.10
 sqlalchemy>=2.0.43
 motor>=3.3.0
+xhshow>=0.1.3
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
程序员阿江(Relakkes)	b6caa7a85e	refactor: add xhs creator params	2025-11-10 21:10:03 +08:00
程序员阿江(Relakkes)	1e3637f238	refactor: update xhs note detail	2025-11-10 18:13:51 +08:00
程序员阿江(Relakkes)	b5dab6d1e8	refactor: 使用 xhshow 替代 playwright 签名方案感谢 @Cloxl/xhshow 开源项目 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-10 18:12:45 +08:00