Merge branch 'feature/config-refactor-20251018'

2025-11-25 03:15:17 +08:00 · 2025-10-19 15:32:42 +08:00
parent 56bf5d226f 03e384bbe2
commit 26a261bc09
6 changed files with 20 additions and 12 deletions
--- a/media_platform/bilibili/core.py
+++ b/media_platform/bilibili/core.py
@@ -78,8 +78,9 @@ class BilibiliCrawler(AbstractCrawler):
                # Launch a browser context.
                chromium = playwright.chromium
                self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS)
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="libs/stealth.min.js")
+                # stealth.min.js is a js script to prevent the website from detecting the crawler.
+                await self.browser_context.add_init_script(path="libs/stealth.min.js")
+
            self.context_page = await self.browser_context.new_page()
            await self.context_page.goto(self.index_url)

--- a/media_platform/douyin/core.py
+++ b/media_platform/douyin/core.py
@@ -74,8 +74,9 @@ class DouYinCrawler(AbstractCrawler):
                    user_agent=None,
                    headless=config.HEADLESS,
                )
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="libs/stealth.min.js")
+                # stealth.min.js is a js script to prevent the website from detecting the crawler.
+                await self.browser_context.add_init_script(path="libs/stealth.min.js")
+
            self.context_page = await self.browser_context.new_page()
            await self.context_page.goto(self.index_url)

--- a/media_platform/kuaishou/core.py
+++ b/media_platform/kuaishou/core.py
@@ -78,8 +78,10 @@ class KuaishouCrawler(AbstractCrawler):
                self.browser_context = await self.launch_browser(
                    chromium, None, self.user_agent, headless=config.HEADLESS
                )
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="libs/stealth.min.js")
+                # stealth.min.js is a js script to prevent the website from detecting the crawler.
+                await self.browser_context.add_init_script(path="libs/stealth.min.js")
+
+
            self.context_page = await self.browser_context.new_page()
            await self.context_page.goto(f"{self.index_url}?isHome=1")

--- a/media_platform/weibo/core.py
+++ b/media_platform/weibo/core.py
@@ -77,8 +77,11 @@ class WeiboCrawler(AbstractCrawler):
                # Launch a browser context.
                chromium = playwright.chromium
                self.browser_context = await self.launch_browser(chromium, None, self.mobile_user_agent, headless=config.HEADLESS)
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="libs/stealth.min.js")
+
+                # stealth.min.js is a js script to prevent the website from detecting the crawler.
+                await self.browser_context.add_init_script(path="libs/stealth.min.js")
+
+
            self.context_page = await self.browser_context.new_page()
            await self.context_page.goto(self.mobile_index_url)

--- a/media_platform/xhs/core.py
+++ b/media_platform/xhs/core.py
@@ -79,8 +79,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
                    self.user_agent,
                    headless=config.HEADLESS,
                )
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="libs/stealth.min.js")
+                # stealth.min.js is a js script to prevent the website from detecting the crawler.
+                await self.browser_context.add_init_script(path="libs/stealth.min.js")
+
            self.context_page = await self.browser_context.new_page()
            await self.context_page.goto(self.index_url)

--- a/media_platform/zhihu/core.py
+++ b/media_platform/zhihu/core.py
@@ -86,8 +86,8 @@ class ZhihuCrawler(AbstractCrawler):
                self.browser_context = await self.launch_browser(
                    chromium, None, self.user_agent, headless=config.HEADLESS
                )
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="libs/stealth.min.js")
+                # stealth.min.js is a js script to prevent the website from detecting the crawler.
+                await self.browser_context.add_init_script(path="libs/stealth.min.js")

            self.context_page = await self.browser_context.new_page()
            await self.context_page.goto(self.index_url, wait_until="domcontentloaded")