diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index fe3ed61..babcc10 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -63,7 +63,7 @@ class BilibiliCrawler(AbstractCrawler): config.IP_PROXY_POOL_COUNT, enable_validate_ip=True ) ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy() - playwright_proxy_format, httpx_proxy_format = self.format_proxy_info( + playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info( ip_proxy_info ) @@ -547,25 +547,6 @@ class BilibiliCrawler(AbstractCrawler): ) return bilibili_client_obj - @staticmethod - def format_proxy_info( - ip_proxy_info: IpInfoModel, - ) -> Tuple[Optional[Dict], Optional[Dict]]: - """ - format proxy info for playwright and httpx - :param ip_proxy_info: ip proxy info - :return: playwright proxy, httpx proxy - """ - playwright_proxy = { - "server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}", - "username": ip_proxy_info.user, - "password": ip_proxy_info.password, - } - httpx_proxy = { - f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" - } - return playwright_proxy, httpx_proxy - async def launch_browser( self, chromium: BrowserType, diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index 7f2956f..7b27046 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -54,7 +54,7 @@ class DouYinCrawler(AbstractCrawler): config.IP_PROXY_POOL_COUNT, enable_validate_ip=True ) ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy() - playwright_proxy_format, httpx_proxy_format = self.format_proxy_info( + playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info( ip_proxy_info ) @@ -276,21 +276,6 @@ class DouYinCrawler(AbstractCrawler): if aweme_item is not None: await douyin_store.update_douyin_aweme(aweme_item) - @staticmethod - def format_proxy_info( - ip_proxy_info: IpInfoModel, - ) -> Tuple[Optional[Dict], Optional[Dict]]: - """format proxy info for playwright and httpx""" - playwright_proxy = { - "server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}", - "username": ip_proxy_info.user, - "password": ip_proxy_info.password, - } - httpx_proxy = { - f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" - } - return playwright_proxy, httpx_proxy - async def create_douyin_client(self, httpx_proxy: Optional[str]) -> DOUYINClient: """Create douyin client""" cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies()) # type: ignore diff --git a/media_platform/kuaishou/core.py b/media_platform/kuaishou/core.py index 93e02ac..0debef5 100644 --- a/media_platform/kuaishou/core.py +++ b/media_platform/kuaishou/core.py @@ -55,7 +55,7 @@ class KuaishouCrawler(AbstractCrawler): config.IP_PROXY_POOL_COUNT, enable_validate_ip=True ) ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy() - playwright_proxy_format, httpx_proxy_format = self.format_proxy_info( + playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info( ip_proxy_info ) @@ -259,21 +259,6 @@ class KuaishouCrawler(AbstractCrawler): browser_context=self.browser_context ) - @staticmethod - def format_proxy_info( - ip_proxy_info: IpInfoModel, - ) -> Tuple[Optional[Dict], Optional[Dict]]: - """format proxy info for playwright and httpx""" - playwright_proxy = { - "server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}", - "username": ip_proxy_info.user, - "password": ip_proxy_info.password, - } - httpx_proxy = { - f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" - } - return playwright_proxy, httpx_proxy - async def create_ks_client(self, httpx_proxy: Optional[str]) -> KuaiShouClient: """Create ks client""" utils.logger.info( diff --git a/media_platform/tieba/core.py b/media_platform/tieba/core.py index 9997524..8635104 100644 --- a/media_platform/tieba/core.py +++ b/media_platform/tieba/core.py @@ -30,7 +30,6 @@ from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool from store import tieba as tieba_store from tools import utils from tools.cdp_browser import CDPBrowserManager -from tools.crawler_util import format_proxy_info from var import crawler_type_var, source_keyword_var from .client import BaiduTieBaClient @@ -66,7 +65,7 @@ class TieBaCrawler(AbstractCrawler): config.IP_PROXY_POOL_COUNT, enable_validate_ip=True ) ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy() - _, httpx_proxy_format = format_proxy_info(ip_proxy_info) + _, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info) utils.logger.info( f"[BaiduTieBaCrawler.start] Init default ip proxy, value: {httpx_proxy_format}" ) diff --git a/media_platform/weibo/core.py b/media_platform/weibo/core.py index 6d229eb..7d622f0 100644 --- a/media_platform/weibo/core.py +++ b/media_platform/weibo/core.py @@ -64,7 +64,7 @@ class WeiboCrawler(AbstractCrawler): config.IP_PROXY_POOL_COUNT, enable_validate_ip=True ) ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy() - playwright_proxy_format, httpx_proxy_format = self.format_proxy_info( + playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info( ip_proxy_info ) @@ -367,21 +367,6 @@ class WeiboCrawler(AbstractCrawler): ) return weibo_client_obj - @staticmethod - def format_proxy_info( - ip_proxy_info: IpInfoModel, - ) -> Tuple[Optional[Dict], Optional[Dict]]: - """format proxy info for playwright and httpx""" - playwright_proxy = { - "server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}", - "username": ip_proxy_info.user, - "password": ip_proxy_info.password, - } - httpx_proxy = { - f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" - } - return playwright_proxy, httpx_proxy - async def launch_browser( self, chromium: BrowserType, diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 4eb563e..728e9ab 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -61,7 +61,7 @@ class XiaoHongShuCrawler(AbstractCrawler): config.IP_PROXY_POOL_COUNT, enable_validate_ip=True ) ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy() - playwright_proxy_format, httpx_proxy_format = self.format_proxy_info( + playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info( ip_proxy_info ) @@ -378,21 +378,6 @@ class XiaoHongShuCrawler(AbstractCrawler): max_count=CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES, ) - @staticmethod - def format_proxy_info( - ip_proxy_info: IpInfoModel, - ) -> Tuple[Optional[Dict], Optional[Dict]]: - """format proxy info for playwright and httpx""" - playwright_proxy = { - "server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}", - "username": ip_proxy_info.user, - "password": ip_proxy_info.password, - } - httpx_proxy = { - f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" - } - return playwright_proxy, httpx_proxy - async def create_xhs_client(self, httpx_proxy: Optional[str]) -> XiaoHongShuClient: """Create xhs client""" utils.logger.info( diff --git a/media_platform/zhihu/core.py b/media_platform/zhihu/core.py index 2d92215..bd533bc 100644 --- a/media_platform/zhihu/core.py +++ b/media_platform/zhihu/core.py @@ -65,7 +65,7 @@ class ZhihuCrawler(AbstractCrawler): config.IP_PROXY_POOL_COUNT, enable_validate_ip=True ) ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy() - playwright_proxy_format, httpx_proxy_format = self.format_proxy_info( + playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info( ip_proxy_info ) @@ -351,21 +351,6 @@ class ZhihuCrawler(AbstractCrawler): await self.batch_get_content_comments(need_get_comment_notes) - @staticmethod - def format_proxy_info( - ip_proxy_info: IpInfoModel, - ) -> Tuple[Optional[Dict], Optional[Dict]]: - """format proxy info for playwright and httpx""" - playwright_proxy = { - "server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}", - "username": ip_proxy_info.user, - "password": ip_proxy_info.password, - } - httpx_proxy = { - f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" - } - return playwright_proxy, httpx_proxy - async def create_zhihu_client(self, httpx_proxy: Optional[str]) -> ZhiHuClient: """Create zhihu client""" utils.logger.info( diff --git a/tools/crawler_util.py b/tools/crawler_util.py index 2e3e1a4..c152b08 100644 --- a/tools/crawler_util.py +++ b/tools/crawler_util.py @@ -27,6 +27,8 @@ import httpx from PIL import Image, ImageDraw from playwright.async_api import Cookie, Page +from proxy.proxy_ip_pool import IpInfoModel + from . import utils @@ -171,7 +173,7 @@ def match_interact_info_count(count_str: str) -> int: return 0 -def format_proxy_info(ip_proxy_info) -> Tuple[Optional[Dict], Optional[Dict]]: +def format_proxy_info(ip_proxy_info: IpInfoModel) -> Tuple[Optional[Dict], Optional[Dict]]: """format proxy info for playwright and httpx""" playwright_proxy = { "server": f"{ip_proxy_info.protocol}{ip_proxy_info.ip}:{ip_proxy_info.port}",