From 6dcfd7e0a5b2f1f9bdf215bd5077e15098ca2040 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E9=98=BF=E6=B1=9F=28Relakkes?=
 =?UTF-8?q?=29?= <relakkes@gmail.com>
Date: Mon, 17 Nov 2025 17:11:35 +0800
Subject: [PATCH] refactor: weibo login

---
 config/weibo_config.py         |  2 +-
 media_platform/weibo/client.py | 27 ++++++++++++++++++++++++---
 media_platform/weibo/core.py   | 11 ++++++++---
 tools/crawler_util.py          |  9 +--------
 4 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/config/weibo_config.py b/config/weibo_config.py
index a8224ad..f89ea35 100644
--- a/config/weibo_config.py
+++ b/config/weibo_config.py
@@ -12,7 +12,7 @@
 # 微博平台配置
 
 # 搜索类型，具体的枚举值在media_platform/weibo/field.py中
-WEIBO_SEARCH_TYPE = "popular"
+WEIBO_SEARCH_TYPE = "default"
 
 # 指定微博ID列表
 WEIBO_SPECIFIED_ID_LIST = [
diff --git a/media_platform/weibo/client.py b/media_platform/weibo/client.py
index ec9f289..0f89993 100644
--- a/media_platform/weibo/client.py
+++ b/media_platform/weibo/client.py
@@ -23,6 +23,7 @@ from urllib.parse import parse_qs, unquote, urlencode
 import httpx
 from httpx import Response
 from playwright.async_api import BrowserContext, Page
+from tenacity import retry, stop_after_attempt, wait_fixed
 
 import config
 from tools import utils
@@ -50,6 +51,7 @@ class WeiboClient:
         self.cookie_dict = cookie_dict
         self._image_agent_host = "https://i1.wp.com/"
 
+    @retry(stop=stop_after_attempt(5), wait=wait_fixed(2))
     async def request(self, method, url, **kwargs) -> Union[Response, Dict]:
         enable_return_response = kwargs.pop("return_response", False)
         async with httpx.AsyncClient(proxy=self.proxy) as client:
@@ -58,7 +60,12 @@ class WeiboClient:
         if enable_return_response:
             return response
 
-        data: Dict = response.json()
+        try:
+            data: Dict = response.json()
+        except json.decoder.JSONDecodeError:
+            utils.logger.error(f"[WeiboClient.request] request {method}:{url} err code: {response.status_code} res:{response.text}")
+            raise DataFetchError(f"get response code error: {response.status_code}")
+
         ok_code = data.get("ok")
         if ok_code == 0:  # response error
             utils.logger.error(f"[WeiboClient.request] request {method}:{url} err, res:{data}")
@@ -99,10 +106,24 @@ class WeiboClient:
             ping_flag = False
         return ping_flag
 
-    async def update_cookies(self, browser_context: BrowserContext):
-        cookie_str, cookie_dict = utils.convert_cookies(await browser_context.cookies())
+    async def update_cookies(self, browser_context: BrowserContext, urls: Optional[List[str]] = None):
+        """
+        Update cookies from browser context
+        :param browser_context: Browser context
+        :param urls: Optional list of URLs to filter cookies (e.g., ["https://m.weibo.cn"])
+                     If provided, only cookies for these URLs will be retrieved
+        """
+        if urls:
+            cookies = await browser_context.cookies(urls=urls)
+            utils.logger.info(f"[WeiboClient.update_cookies] Updating cookies for specific URLs: {urls}")
+        else:
+            cookies = await browser_context.cookies()
+            utils.logger.info("[WeiboClient.update_cookies] Updating all cookies")
+
+        cookie_str, cookie_dict = utils.convert_cookies(cookies)
         self.headers["Cookie"] = cookie_str
         self.cookie_dict = cookie_dict
+        utils.logger.info(f"[WeiboClient.update_cookies] Cookie updated successfully, total: {len(cookie_dict)} cookies")
 
     async def get_note_by_keyword(
         self,
diff --git a/media_platform/weibo/core.py b/media_platform/weibo/core.py
index 2b1ac9f..6c05b4e 100644
--- a/media_platform/weibo/core.py
+++ b/media_platform/weibo/core.py
@@ -83,7 +83,8 @@ class WeiboCrawler(AbstractCrawler):
 
 
             self.context_page = await self.browser_context.new_page()
-            await self.context_page.goto(self.mobile_index_url)
+            await self.context_page.goto(self.index_url)
+            await asyncio.sleep(2)
 
             # Create a client to interact with the xiaohongshu website.
             self.wb_client = await self.create_weibo_client(httpx_proxy_format)
@@ -100,8 +101,12 @@ class WeiboCrawler(AbstractCrawler):
                 # 登录成功后重定向到手机端的网站，再更新手机端登录成功的cookie
                 utils.logger.info("[WeiboCrawler.start] redirect weibo mobile homepage and update cookies on mobile platform")
                 await self.context_page.goto(self.mobile_index_url)
-                await asyncio.sleep(2)
-                await self.wb_client.update_cookies(browser_context=self.browser_context)
+                await asyncio.sleep(3)
+                # 只获取移动端的 cookies，避免 PC 端和移动端 cookies 混淆
+                await self.wb_client.update_cookies(
+                    browser_context=self.browser_context,
+                    urls=[self.mobile_index_url]
+                )
 
             crawler_type_var.set(config.CRAWLER_TYPE)
             if config.CRAWLER_TYPE == "search":
diff --git a/tools/crawler_util.py b/tools/crawler_util.py
index 06cf8c5..13141e8 100644
--- a/tools/crawler_util.py
+++ b/tools/crawler_util.py
@@ -120,14 +120,7 @@ def get_user_agent() -> str:
 
 def get_mobile_user_agent() -> str:
     ua_list = [
-        "Mozilla/5.0 (iPhone; CPU iPhone OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Mobile/15E148 Safari/604.1",
-        "Mozilla/5.0 (iPad; CPU OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Mobile/15E148 Safari/604.1",
-        "Mozilla/5.0 (iPhone; CPU iPhone OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/114.0.5735.99 Mobile/15E148 Safari/604.1",
-        "Mozilla/5.0 (iPad; CPU OS 16_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/114.0.5735.124 Mobile/15E148 Safari/604.1",
-        "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36",
-        "Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/21.0 Chrome/110.0.5481.154 Mobile Safari/537.36",
-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 OPR/99.0.0.0",
-        "Mozilla/5.0 (Linux; Android 10; JNY-LX1; HMSCore 6.11.0.302) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.88 HuaweiBrowser/13.0.5.303 Mobile Safari/537.36"
+        "Mozilla/5.0 (iPhone; CPU iPhone OS 18_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Mobile/15E148 Safari/604.1"
     ]
     return random.choice(ua_list)