Merge branch 'feature/config-refactor-20251018'

This commit is contained in:
程序员阿江(Relakkes)
2025-10-19 15:32:42 +08:00
6 changed files with 20 additions and 12 deletions

View File

@@ -78,8 +78,9 @@ class BilibiliCrawler(AbstractCrawler):
# Launch a browser context.
chromium = playwright.chromium
self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS)
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page()
await self.context_page.goto(self.index_url)

View File

@@ -74,8 +74,9 @@ class DouYinCrawler(AbstractCrawler):
user_agent=None,
headless=config.HEADLESS,
)
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page()
await self.context_page.goto(self.index_url)

View File

@@ -78,8 +78,10 @@ class KuaishouCrawler(AbstractCrawler):
self.browser_context = await self.launch_browser(
chromium, None, self.user_agent, headless=config.HEADLESS
)
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page()
await self.context_page.goto(f"{self.index_url}?isHome=1")

View File

@@ -77,8 +77,11 @@ class WeiboCrawler(AbstractCrawler):
# Launch a browser context.
chromium = playwright.chromium
self.browser_context = await self.launch_browser(chromium, None, self.mobile_user_agent, headless=config.HEADLESS)
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page()
await self.context_page.goto(self.mobile_index_url)

View File

@@ -79,8 +79,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
self.user_agent,
headless=config.HEADLESS,
)
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page()
await self.context_page.goto(self.index_url)

View File

@@ -86,8 +86,8 @@ class ZhihuCrawler(AbstractCrawler):
self.browser_context = await self.launch_browser(
chromium, None, self.user_agent, headless=config.HEADLESS
)
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
# stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page()
await self.context_page.goto(self.index_url, wait_until="domcontentloaded")