mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2025-11-25 03:15:17 +08:00
Merge branch 'feature/config-refactor-20251018'
This commit is contained in:
@@ -78,8 +78,9 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
# Launch a browser context.
|
||||
chromium = playwright.chromium
|
||||
self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS)
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
|
||||
self.context_page = await self.browser_context.new_page()
|
||||
await self.context_page.goto(self.index_url)
|
||||
|
||||
|
||||
@@ -74,8 +74,9 @@ class DouYinCrawler(AbstractCrawler):
|
||||
user_agent=None,
|
||||
headless=config.HEADLESS,
|
||||
)
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
|
||||
self.context_page = await self.browser_context.new_page()
|
||||
await self.context_page.goto(self.index_url)
|
||||
|
||||
|
||||
@@ -78,8 +78,10 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
self.browser_context = await self.launch_browser(
|
||||
chromium, None, self.user_agent, headless=config.HEADLESS
|
||||
)
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
|
||||
|
||||
self.context_page = await self.browser_context.new_page()
|
||||
await self.context_page.goto(f"{self.index_url}?isHome=1")
|
||||
|
||||
|
||||
@@ -77,8 +77,11 @@ class WeiboCrawler(AbstractCrawler):
|
||||
# Launch a browser context.
|
||||
chromium = playwright.chromium
|
||||
self.browser_context = await self.launch_browser(chromium, None, self.mobile_user_agent, headless=config.HEADLESS)
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
|
||||
|
||||
self.context_page = await self.browser_context.new_page()
|
||||
await self.context_page.goto(self.mobile_index_url)
|
||||
|
||||
|
||||
@@ -79,8 +79,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||
self.user_agent,
|
||||
headless=config.HEADLESS,
|
||||
)
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
|
||||
self.context_page = await self.browser_context.new_page()
|
||||
await self.context_page.goto(self.index_url)
|
||||
|
||||
|
||||
@@ -86,8 +86,8 @@ class ZhihuCrawler(AbstractCrawler):
|
||||
self.browser_context = await self.launch_browser(
|
||||
chromium, None, self.user_agent, headless=config.HEADLESS
|
||||
)
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||
|
||||
self.context_page = await self.browser_context.new_page()
|
||||
await self.context_page.goto(self.index_url, wait_until="domcontentloaded")
|
||||
|
||||
Reference in New Issue
Block a user