Merge pull request #687 from 2513502304/main

添加抖音平台爬取短视频以及帖子图片的逻辑，并规范目前实现爬取媒体的抖音、b站、小红书、微博四个平台存储逻辑
2025-11-25 11:29:27 +08:00 · 2025-07-30 23:06:35 +08:00
parent c61ed57a20 e9f976117a
commit a1535289c1
19 changed files with 944 additions and 1064 deletions
--- a/base/base_crawler.py
+++ b/base/base_crawler.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 from abc import ABC, abstractmethod
 from typing import Dict, Optional
@@ -16,6 +15,7 @@ from playwright.async_api import BrowserContext, BrowserType, Playwright
 class AbstractCrawler(ABC):
    @abstractmethod
    async def start(self):
        """
@@ -31,8 +31,7 @@ class AbstractCrawler(ABC):
        pass
    @abstractmethod
-    async def launch_browser(self, chromium: BrowserType, playwright_proxy: Optional[Dict], user_agent: Optional[str],
+    async def launch_browser(self, chromium: BrowserType, playwright_proxy: Optional[Dict], user_agent: Optional[str], headless: bool = True) -> BrowserContext:
                             headless: bool = True) -> BrowserContext:
        """
        launch browser
        :param chromium: chromium browser
@@ -43,8 +42,7 @@ class AbstractCrawler(ABC):
        """
        pass
-    async def launch_browser_with_cdp(self, playwright: Playwright, playwright_proxy: Optional[Dict],
+    async def launch_browser_with_cdp(self, playwright: Playwright, playwright_proxy: Optional[Dict], user_agent: Optional[str], headless: bool = True) -> BrowserContext:
                                     user_agent: Optional[str], headless: bool = True) -> BrowserContext:
        """
        使用CDP模式启动浏览器（可选实现）
        :param playwright: playwright实例
@@ -58,6 +56,7 @@ class AbstractCrawler(ABC):
 class AbstractLogin(ABC):
    @abstractmethod
    async def begin(self):
        pass
@@ -76,6 +75,7 @@ class AbstractLogin(ABC):
 class AbstractStore(ABC):
    @abstractmethod
    async def store_content(self, content_item: Dict):
        pass
@@ -99,7 +99,16 @@ class AbstractStoreImage(ABC):
        pass
 class AbstractStoreVideo(ABC):
    # TODO: support all platform
    # only weibo is supported
    # @abstractmethod
    async def store_video(self, video_content_item: Dict):
        pass
 class AbstractApiClient(ABC):
    @abstractmethod
    async def request(self, method, url, **kwargs):
        pass
--- a/config/base_config.py
+++ b/config/base_config.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # 基础配置
 PLATFORM = "xhs"  # 平台，xhs | dy | ks | bili | wb | tieba | zhihu
 KEYWORDS = "编程副业,编程兼职"  # 关键词搜索配置，以英文逗号分隔
@@ -77,8 +76,8 @@ CRAWLER_MAX_NOTES_COUNT = 200
 # 并发爬虫数量控制
 MAX_CONCURRENCY_NUM = 1
-# 是否开启爬图片模式, 默认不开启爬图片
+# 是否开启爬媒体模式（包含图片或视频资源），默认不开启爬媒体
-ENABLE_GET_IMAGES = False
+ENABLE_GET_MEIDAS = False
 # 是否开启爬评论模式, 默认开启爬评论
 ENABLE_GET_COMMENTS = True
--- a/media_platform/bilibili/client.py
+++ b/media_platform/bilibili/client.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/2 18:44
@@ -32,9 +31,10 @@ from .help import BilibiliSign
 class BilibiliClient(AbstractApiClient):
    def __init__(
        self,
-            timeout=10,
+        timeout=60,  # 若开启爬取媒体选项，b 站的长视频需要更久的超时时间
        proxies=None,
        *,
        headers: Dict[str, str],
@@ -50,10 +50,7 @@ class BilibiliClient(AbstractApiClient):
    async def request(self, method, url, **kwargs) -> Any:
        async with httpx.AsyncClient(proxies=self.proxies) as client:
-            response = await client.request(
+            response = await client.request(method, url, timeout=self.timeout, **kwargs)
                method, url, timeout=self.timeout,
                **kwargs
            )
        try:
            data: Dict = response.json()
        except json.JSONDecodeError:
@@ -111,8 +108,7 @@ class BilibiliClient(AbstractApiClient):
    async def post(self, uri: str, data: dict) -> Dict:
        data = await self.pre_request_data(data)
        json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
-        return await self.request(method="POST", url=f"{self._host}{uri}",
+        return await self.request(method="POST", url=f"{self._host}{uri}", data=json_str, headers=self.headers)
                                  data=json_str, headers=self.headers)
    async def pong(self) -> bool:
        """get a note to check if login state is ok"""
@@ -122,12 +118,10 @@ class BilibiliClient(AbstractApiClient):
            check_login_uri = "/x/web-interface/nav"
            response = await self.get(check_login_uri)
            if response.get("isLogin"):
-                utils.logger.info(
+                utils.logger.info("[BilibiliClient.pong] Use cache login state get web interface successfull!")
                    "[BilibiliClient.pong] Use cache login state get web interface successfull!")
                ping_flag = True
        except Exception as e:
-            utils.logger.error(
+            utils.logger.error(f"[BilibiliClient.pong] Pong bilibili failed: {e}, and try to login again...")
                f"[BilibiliClient.pong] Pong bilibili failed: {e}, and try to login again...")
            ping_flag = False
        return ping_flag
@@ -136,10 +130,15 @@ class BilibiliClient(AbstractApiClient):
        self.headers["Cookie"] = cookie_str
        self.cookie_dict = cookie_dict
-    async def search_video_by_keyword(self, keyword: str, page: int = 1, page_size: int = 20,
+    async def search_video_by_keyword(
        self,
        keyword: str,
        page: int = 1,
        page_size: int = 20,
        order: SearchOrderType = SearchOrderType.DEFAULT,
-                                      pubtime_begin_s: int = 0, pubtime_end_s: int = 0) -> Dict:
+        pubtime_begin_s: int = 0,
-
+        pubtime_end_s: int = 0,
    ) -> Dict:
        """
        KuaiShou web search api
        :param keyword: 搜索关键词
@@ -210,10 +209,11 @@ class BilibiliClient(AbstractApiClient):
            else:
                return response.content
-    async def get_video_comments(self,
+    async def get_video_comments(
        self,
        video_id: str,
        order_mode: CommentOrderType = CommentOrderType.DEFAULT,
-                                 next: int = 0
+        next: int = 0,
    ) -> Dict:
        """get video comments
        :param video_id: 视频 ID
@@ -222,18 +222,17 @@ class BilibiliClient(AbstractApiClient):
        :return:
        """
        uri = "/x/v2/reply/wbi/main"
-        post_data = {
+        post_data = {"oid": video_id, "mode": order_mode.value, "type": 1, "ps": 20, "next": next}
            "oid": video_id,
            "mode": order_mode.value,
            "type": 1,
            "ps": 20,
            "next": next
        }
        return await self.get(uri, post_data)
-    async def get_video_all_comments(self, video_id: str, crawl_interval: float = 1.0, is_fetch_sub_comments=False,
+    async def get_video_all_comments(
        self,
        video_id: str,
        crawl_interval: float = 1.0,
        is_fetch_sub_comments=False,
        callback: Optional[Callable] = None,
-                                     max_count: int = 10,):
+        max_count: int = 10,
    ):
        """
        get video all comments include sub comments
        :param video_id:
@@ -256,15 +255,11 @@ class BilibiliClient(AbstractApiClient):
                    break  # Success
                except DataFetchError as e:
                    if attempt < max_retries - 1:
-                        delay = 5 * (2 ** attempt) + random.uniform(0, 1)
+                        delay = 5 * (2**attempt) + random.uniform(0, 1)
-                        utils.logger.warning(
+                        utils.logger.warning(f"[BilibiliClient.get_video_all_comments] Retrying video_id {video_id} in {delay:.2f}s... (Attempt {attempt + 1}/{max_retries})")
                            f"[BilibiliClient.get_video_all_comments] Retrying video_id {video_id} in {delay:.2f}s... (Attempt {attempt + 1}/{max_retries})"
                        )
                        await asyncio.sleep(delay)
                    else:
-                        utils.logger.error(
+                        utils.logger.error(f"[BilibiliClient.get_video_all_comments] Max retries reached for video_id: {video_id}. Skipping comments. Error: {e}")
                            f"[BilibiliClient.get_video_all_comments] Max retries reached for video_id: {video_id}. Skipping comments. Error: {e}"
                        )
                        is_end = True
                        break
            if not comments_res:
@@ -292,10 +287,7 @@ class BilibiliClient(AbstractApiClient):
                for comment in comment_list:
                    comment_id = comment['rpid']
                    if (comment.get("rcount", 0) > 0):
-                        {
+                        {await self.get_video_all_level_two_comments(video_id, comment_id, CommentOrderType.DEFAULT, 10, crawl_interval, callback)}
                            await self.get_video_all_level_two_comments(
                                video_id, comment_id, CommentOrderType.DEFAULT, 10, crawl_interval,  callback)
                        }
            if len(result) + len(comment_list) > max_count:
                comment_list = comment_list[:max_count - len(result)]
            if callback:  # 如果有回调函数，就执行回调函数
@@ -306,7 +298,8 @@ class BilibiliClient(AbstractApiClient):
                continue
        return result
-    async def get_video_all_level_two_comments(self,
+    async def get_video_all_level_two_comments(
        self,
        video_id: str,
        level_one_comment_id: int,
        order_mode: CommentOrderType,
@@ -327,8 +320,7 @@ class BilibiliClient(AbstractApiClient):
        pn = 1
        while True:
-            result = await self.get_video_level_two_comments(
+            result = await self.get_video_level_two_comments(video_id, level_one_comment_id, pn, ps, order_mode)
                video_id, level_one_comment_id, pn, ps, order_mode)
            comment_list: List[Dict] = result.get("replies", [])
            if callback:  # 如果有回调函数，就执行回调函数
                await callback(video_id, comment_list)
@@ -338,7 +330,8 @@ class BilibiliClient(AbstractApiClient):
            pn += 1
-    async def get_video_level_two_comments(self,
+    async def get_video_level_two_comments(
        self,
        video_id: str,
        level_one_comment_id: int,
        pn: int,
@@ -393,7 +386,8 @@ class BilibiliClient(AbstractApiClient):
        }
        return await self.get(uri, post_data)
-    async def get_creator_fans(self,
+    async def get_creator_fans(
        self,
        creator_id: int,
        pn: int,
        ps: int = 24,
@@ -411,11 +405,11 @@ class BilibiliClient(AbstractApiClient):
            "pn": pn,
            "ps": ps,
            "gaia_source": "main_web",
        }
        return await self.get(uri, post_data)
-    async def get_creator_followings(self,
+    async def get_creator_followings(
        self,
        creator_id: int,
        pn: int,
        ps: int = 24,
@@ -452,9 +446,13 @@ class BilibiliClient(AbstractApiClient):
        return await self.get(uri, post_data)
-    async def get_creator_all_fans(self, creator_info: Dict, crawl_interval: float = 1.0,
+    async def get_creator_all_fans(
        self,
        creator_info: Dict,
        crawl_interval: float = 1.0,
        callback: Optional[Callable] = None,
-                                   max_count: int = 100) -> List:
+        max_count: int = 100,
    ) -> List:
        """
        get creator all fans
        :param creator_info:
@@ -482,9 +480,13 @@ class BilibiliClient(AbstractApiClient):
            result.extend(fans_list)
        return result
-    async def get_creator_all_followings(self, creator_info: Dict, crawl_interval: float = 1.0,
+    async def get_creator_all_followings(
        self,
        creator_info: Dict,
        crawl_interval: float = 1.0,
        callback: Optional[Callable] = None,
-                                         max_count: int = 100) -> List:
+        max_count: int = 100,
    ) -> List:
        """
        get creator all followings
        :param creator_info:
@@ -512,9 +514,13 @@ class BilibiliClient(AbstractApiClient):
            result.extend(followings_list)
        return result
-    async def get_creator_all_dynamics(self, creator_info: Dict, crawl_interval: float = 1.0,
+    async def get_creator_all_dynamics(
        self,
        creator_info: Dict,
        crawl_interval: float = 1.0,
        callback: Optional[Callable] = None,
-                                       max_count: int = 20) -> List:
+        max_count: int = 20,
    ) -> List:
        """
        get creator all followings
        :param creator_info:
--- a/media_platform/bilibili/core.py
+++ b/media_platform/bilibili/core.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/2 18:44
@@ -59,13 +58,9 @@ class BilibiliCrawler(AbstractCrawler):
    async def start(self):
        playwright_proxy_format, httpx_proxy_format = None, None
        if config.ENABLE_IP_PROXY:
-            ip_proxy_pool = await create_ip_pool(
+            ip_proxy_pool = await create_ip_pool(config.IP_PROXY_POOL_COUNT, enable_validate_ip=True)
                config.IP_PROXY_POOL_COUNT, enable_validate_ip=True
            )
            ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy()
-            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(
+            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
                ip_proxy_info
            )
        async with async_playwright() as playwright:
            # 根据配置选择启动模式
@@ -81,9 +76,7 @@ class BilibiliCrawler(AbstractCrawler):
                utils.logger.info("[BilibiliCrawler] 使用标准模式启动浏览器")
                # Launch a browser context.
                chromium = playwright.chromium
-                self.browser_context = await self.launch_browser(
+                self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS)
                    chromium, None, self.user_agent, headless=config.HEADLESS
                )
            # stealth.min.js is a js script to prevent the website from detecting the crawler.
            await self.browser_context.add_init_script(path="libs/stealth.min.js")
            self.context_page = await self.browser_context.new_page()
@@ -100,9 +93,7 @@ class BilibiliCrawler(AbstractCrawler):
                    cookie_str=config.COOKIES,
                )
                await login_obj.begin()
-                await self.bili_client.update_cookies(
+                await self.bili_client.update_cookies(browser_context=self.browser_context)
                    browser_context=self.browser_context
                )
            crawler_type_var.set(config.CRAWLER_TYPE)
            if config.CRAWLER_TYPE == "search":
@@ -136,7 +127,8 @@ class BilibiliCrawler(AbstractCrawler):
    @staticmethod
    async def get_pubtime_datetime(
-        start: str = config.START_DAY, end: str = config.END_DAY
+        start: str = config.START_DAY,
        end: str = config.END_DAY,
    ) -> Tuple[str, str]:
        """
        获取 bilibili 作品发布日期起始时间戳 pubtime_begin_s 与发布日期结束时间戳 pubtime_end_s
@@ -158,17 +150,11 @@ class BilibiliCrawler(AbstractCrawler):
        start_day: datetime = datetime.strptime(start, "%Y-%m-%d")
        end_day: datetime = datetime.strptime(end, "%Y-%m-%d")
        if start_day > end_day:
-            raise ValueError(
+            raise ValueError("Wrong time range, please check your start and end argument, to ensure that the start cannot exceed end")
                "Wrong time range, please check your start and end argument, to ensure that the start cannot exceed end"
            )
        elif start_day == end_day:  # 搜索同一天的内容
-            end_day = (
+            end_day = (start_day + timedelta(days=1) - timedelta(seconds=1))  # 则将 end_day 设置为 start_day + 1 day - 1 second
                start_day + timedelta(days=1) - timedelta(seconds=1)
            )  # 则将 end_day 设置为 start_day + 1 day - 1 second
        else:  # 搜索 start 至 end
-            end_day = (
+            end_day = (end_day + timedelta(days=1) - timedelta(seconds=1))  # 则将 end_day 设置为 end_day + 1 day - 1 second
                end_day + timedelta(days=1) - timedelta(seconds=1)
            )  # 则将 end_day 设置为 end_day + 1 day - 1 second
        # 将其重新转换为时间戳
        return str(int(start_day.timestamp())), str(int(end_day.timestamp()))
@@ -177,32 +163,22 @@ class BilibiliCrawler(AbstractCrawler):
        search bilibili video with keywords in normal mode
        :return:
        """
-        utils.logger.info(
+        utils.logger.info("[BilibiliCrawler.search_by_keywords] Begin search bilibli keywords")
            "[BilibiliCrawler.search_by_keywords] Begin search bilibli keywords"
        )
        bili_limit_count = 20  # bilibili limit page fixed value
        if config.CRAWLER_MAX_NOTES_COUNT < bili_limit_count:
            config.CRAWLER_MAX_NOTES_COUNT = bili_limit_count
        start_page = config.START_PAGE  # start page number
        for keyword in config.KEYWORDS.split(","):
            source_keyword_var.set(keyword)
-            utils.logger.info(
+            utils.logger.info(f"[BilibiliCrawler.search_by_keywords] Current search keyword: {keyword}")
                f"[BilibiliCrawler.search_by_keywords] Current search keyword: {keyword}"
            )
            page = 1
-            while (
+            while (page - start_page + 1) * bili_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                page - start_page + 1
            ) * bili_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                if page < start_page:
-                    utils.logger.info(
+                    utils.logger.info(f"[BilibiliCrawler.search_by_keywords] Skip page: {page}")
                        f"[BilibiliCrawler.search_by_keywords] Skip page: {page}"
                    )
                    page += 1
                    continue
-                utils.logger.info(
+                utils.logger.info(f"[BilibiliCrawler.search_by_keywords] search bilibili keyword: {keyword}, page: {page}")
                    f"[BilibiliCrawler.search_by_keywords] search bilibili keyword: {keyword}, page: {page}"
                )
                video_id_list: List[str] = []
                videos_res = await self.bili_client.search_video_by_keyword(
                    keyword=keyword,
@@ -215,24 +191,15 @@ class BilibiliCrawler(AbstractCrawler):
                video_list: List[Dict] = videos_res.get("result")
                if not video_list:
-                    utils.logger.info(
+                    utils.logger.info(f"[BilibiliCrawler.search_by_keywords] No more videos for '{keyword}', moving to next keyword.")
                        f"[BilibiliCrawler.search_by_keywords] No more videos for '{keyword}', moving to next keyword."
                    )
                    break
                semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
                task_list = []
                try:
-                    task_list = [
+                    task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
                        self.get_video_info_task(
                            aid=video_item.get("aid"), bvid="", semaphore=semaphore
                        )
                        for video_item in video_list
                    ]
                except Exception as e:
-                    utils.logger.warning(
+                    utils.logger.warning(f"[BilibiliCrawler.search_by_keywords] error in the task list. The video for this page will not be included. {e}")
                        f"[BilibiliCrawler.search_by_keywords] error in the task list. The video for this page will not be included. {e}"
                    )
                video_items = await asyncio.gather(*task_list)
                for video_item in video_items:
                    if video_item:
@@ -248,74 +215,40 @@ class BilibiliCrawler(AbstractCrawler):
        Search bilibili video with keywords in a given time range.
        :param daily_limit: if True, strictly limit the number of notes per day and total.
        """
-        utils.logger.info(
+        utils.logger.info(f"[BilibiliCrawler.search_by_keywords_in_time_range] Begin search with daily_limit={daily_limit}")
            f"[BilibiliCrawler.search_by_keywords_in_time_range] Begin search with daily_limit={daily_limit}"
        )
        bili_limit_count = 20
        start_page = config.START_PAGE
        for keyword in config.KEYWORDS.split(","):
            source_keyword_var.set(keyword)
-            utils.logger.info(
+            utils.logger.info(f"[BilibiliCrawler.search_by_keywords_in_time_range] Current search keyword: {keyword}")
                f"[BilibiliCrawler.search_by_keywords_in_time_range] Current search keyword: {keyword}"
            )
            total_notes_crawled_for_keyword = 0
-            for day in pd.date_range(
+            for day in pd.date_range(start=config.START_DAY, end=config.END_DAY, freq="D"):
-                start=config.START_DAY, end=config.END_DAY, freq="D"
+                if (daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-            ):
+                    utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days.")
                if (
                    daily_limit
                    and total_notes_crawled_for_keyword
                    >= config.CRAWLER_MAX_NOTES_COUNT
                ):
                    utils.logger.info(
                        f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days."
                    )
                    break
-                if (
+                if (not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                    not daily_limit
+                    utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days.")
                    and total_notes_crawled_for_keyword
                    >= config.CRAWLER_MAX_NOTES_COUNT
                ):
                    utils.logger.info(
                        f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days."
                    )
                    break
-                pubtime_begin_s, pubtime_end_s = await self.get_pubtime_datetime(
+                pubtime_begin_s, pubtime_end_s = await self.get_pubtime_datetime(start=day.strftime("%Y-%m-%d"), end=day.strftime("%Y-%m-%d"))
                    start=day.strftime("%Y-%m-%d"), end=day.strftime("%Y-%m-%d")
                )
                page = 1
                notes_count_this_day = 0
                while True:
                    if notes_count_this_day >= config.MAX_NOTES_PER_DAY:
-                        utils.logger.info(
+                        utils.logger.info(f"[BilibiliCrawler.search] Reached MAX_NOTES_PER_DAY limit for {day.ctime()}.")
                            f"[BilibiliCrawler.search] Reached MAX_NOTES_PER_DAY limit for {day.ctime()}."
                        )
                        break
-                    if (
+                    if (daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                        daily_limit
+                        utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}'.")
                        and total_notes_crawled_for_keyword
                        >= config.CRAWLER_MAX_NOTES_COUNT
                    ):
                        utils.logger.info(
                            f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}'."
                        )
                        break
-                    if (
+                    if (not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
                        not daily_limit
                        and total_notes_crawled_for_keyword
                        >= config.CRAWLER_MAX_NOTES_COUNT
                    ):
                        break
                    try:
-                        utils.logger.info(
+                        utils.logger.info(f"[BilibiliCrawler.search] search bilibili keyword: {keyword}, date: {day.ctime()}, page: {page}")
                            f"[BilibiliCrawler.search] search bilibili keyword: {keyword}, date: {day.ctime()}, page: {page}"
                        )
                        video_id_list: List[str] = []
                        videos_res = await self.bili_client.search_video_by_keyword(
                            keyword=keyword,
@@ -328,33 +261,18 @@ class BilibiliCrawler(AbstractCrawler):
                        video_list: List[Dict] = videos_res.get("result")
                        if not video_list:
-                            utils.logger.info(
+                            utils.logger.info(f"[BilibiliCrawler.search] No more videos for '{keyword}' on {day.ctime()}, moving to next day.")
                                f"[BilibiliCrawler.search] No more videos for '{keyword}' on {day.ctime()}, moving to next day."
                            )
                            break
                        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-                        task_list = [
+                        task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
                            self.get_video_info_task(
                                aid=video_item.get("aid"), bvid="", semaphore=semaphore
                            )
                            for video_item in video_list
                        ]
                        video_items = await asyncio.gather(*task_list)
                        for video_item in video_items:
                            if video_item:
-                                if (
+                                if (daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
                                    daily_limit
                                    and total_notes_crawled_for_keyword
                                    >= config.CRAWLER_MAX_NOTES_COUNT
                                ):
                                    break
-                                if (
+                                if (not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
                                    not daily_limit
                                    and total_notes_crawled_for_keyword
                                    >= config.CRAWLER_MAX_NOTES_COUNT
                                ):
                                    break
                                if notes_count_this_day >= config.MAX_NOTES_PER_DAY:
                                    break
@@ -369,9 +287,7 @@ class BilibiliCrawler(AbstractCrawler):
                        await self.batch_get_video_comments(video_id_list)
                    except Exception as e:
-                        utils.logger.error(
+                        utils.logger.error(f"[BilibiliCrawler.search] Error searching on {day.ctime()}: {e}")
                            f"[BilibiliCrawler.search] Error searching on {day.ctime()}: {e}"
                        )
                        break
    async def batch_get_video_comments(self, video_id_list: List[str]):
@@ -381,20 +297,14 @@ class BilibiliCrawler(AbstractCrawler):
        :return:
        """
        if not config.ENABLE_GET_COMMENTS:
-            utils.logger.info(
+            utils.logger.info(f"[BilibiliCrawler.batch_get_note_comments] Crawling comment mode is not enabled")
                f"[BilibiliCrawler.batch_get_note_comments] Crawling comment mode is not enabled"
            )
            return
-        utils.logger.info(
+        utils.logger.info(f"[BilibiliCrawler.batch_get_video_comments] video ids:{video_id_list}")
            f"[BilibiliCrawler.batch_get_video_comments] video ids:{video_id_list}"
        )
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
        task_list: List[Task] = []
        for video_id in video_id_list:
-            task = asyncio.create_task(
+            task = asyncio.create_task(self.get_comments(video_id, semaphore), name=video_id)
                self.get_comments(video_id, semaphore), name=video_id
            )
            task_list.append(task)
        await asyncio.gather(*task_list)
@@ -407,9 +317,7 @@ class BilibiliCrawler(AbstractCrawler):
        """
        async with semaphore:
            try:
-                utils.logger.info(
+                utils.logger.info(f"[BilibiliCrawler.get_comments] begin get video_id: {video_id} comments ...")
                    f"[BilibiliCrawler.get_comments] begin get video_id: {video_id} comments ..."
                )
                await asyncio.sleep(random.uniform(0.5, 1.5))
                await self.bili_client.get_video_all_comments(
                    video_id=video_id,
@@ -420,13 +328,9 @@ class BilibiliCrawler(AbstractCrawler):
                )
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_comments] get video_id: {video_id} comment error: {ex}")
                    f"[BilibiliCrawler.get_comments] get video_id: {video_id} comment error: {ex}"
                )
            except Exception as e:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_comments] may be been blocked, err:{e}")
                    f"[BilibiliCrawler.get_comments] may be been blocked, err:{e}"
                )
                # Propagate the exception to be caught by the main loop
                raise
@@ -452,10 +356,7 @@ class BilibiliCrawler(AbstractCrawler):
        :return:
        """
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list = [
+        task_list = [self.get_video_info_task(aid=0, bvid=video_id, semaphore=semaphore) for video_id in bvids_list]
            self.get_video_info_task(aid=0, bvid=video_id, semaphore=semaphore)
            for video_id in bvids_list
        ]
        video_details = await asyncio.gather(*task_list)
        video_aids_list = []
        for video_detail in video_details:
@@ -469,9 +370,7 @@ class BilibiliCrawler(AbstractCrawler):
                await self.get_bilibili_video(video_detail, semaphore)
        await self.batch_get_video_comments(video_aids_list)
-    async def get_video_info_task(
+    async def get_video_info_task(self, aid: int, bvid: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
        self, aid: int, bvid: str, semaphore: asyncio.Semaphore
    ) -> Optional[Dict]:
        """
        Get video detail task
        :param aid:
@@ -484,19 +383,13 @@ class BilibiliCrawler(AbstractCrawler):
                result = await self.bili_client.get_video_info(aid=aid, bvid=bvid)
                return result
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_video_info_task] Get video detail error: {ex}")
                    f"[BilibiliCrawler.get_video_info_task] Get video detail error: {ex}"
                )
                return None
            except KeyError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_video_info_task] have not fund note detail video_id:{bvid}, err: {ex}")
                    f"[BilibiliCrawler.get_video_info_task] have not fund note detail video_id:{bvid}, err: {ex}"
                )
                return None
-    async def get_video_play_url_task(
+    async def get_video_play_url_task(self, aid: int, cid: int, semaphore: asyncio.Semaphore) -> Union[Dict, None]:
        self, aid: int, cid: int, semaphore: asyncio.Semaphore
    ) -> Union[Dict, None]:
        """
        Get video play url
        :param aid:
@@ -509,30 +402,20 @@ class BilibiliCrawler(AbstractCrawler):
                result = await self.bili_client.get_video_play_url(aid=aid, cid=cid)
                return result
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_video_play_url_task] Get video play url error: {ex}")
                    f"[BilibiliCrawler.get_video_play_url_task] Get video play url error: {ex}"
                )
                return None
            except KeyError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_video_play_url_task] have not fund play url from :{aid}|{cid}, err: {ex}")
                    f"[BilibiliCrawler.get_video_play_url_task] have not fund play url from :{aid}|{cid}, err: {ex}"
                )
                return None
-    async def create_bilibili_client(
+    async def create_bilibili_client(self, httpx_proxy: Optional[str]) -> BilibiliClient:
        self, httpx_proxy: Optional[str]
    ) -> BilibiliClient:
        """
        create bilibili client
        :param httpx_proxy: httpx proxy
        :return: bilibili client
        """
-        utils.logger.info(
+        utils.logger.info("[BilibiliCrawler.create_bilibili_client] Begin create bilibili API client ...")
-            "[BilibiliCrawler.create_bilibili_client] Begin create bilibili API client ..."
+        cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies())
        )
        cookie_str, cookie_dict = utils.convert_cookies(
            await self.browser_context.cookies()
        )
        bilibili_client_obj = BilibiliClient(
            proxies=httpx_proxy,
            headers={
@@ -562,30 +445,27 @@ class BilibiliCrawler(AbstractCrawler):
        :param headless: headless mode
        :return: browser context
        """
-        utils.logger.info(
+        utils.logger.info("[BilibiliCrawler.launch_browser] Begin create browser context ...")
            "[BilibiliCrawler.launch_browser] Begin create browser context ..."
        )
        if config.SAVE_LOGIN_STATE:
            # feat issue #14
            # we will save login state to avoid login every time
-            user_data_dir = os.path.join(
+            user_data_dir = os.path.join(os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM)  # type: ignore
                os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM
            )  # type: ignore
            browser_context = await chromium.launch_persistent_context(
                user_data_dir=user_data_dir,
                accept_downloads=True,
                headless=headless,
                proxy=playwright_proxy,  # type: ignore
-                viewport={"width": 1920, "height": 1080},
+                viewport={
                    "width": 1920,
                    "height": 1080
                },
                user_agent=user_agent,
            )
            return browser_context
        else:
            # type: ignore
            browser = await chromium.launch(headless=headless, proxy=playwright_proxy)
-            browser_context = await browser.new_context(
+            browser_context = await browser.new_context(viewport={"width": 1920, "height": 1080}, user_agent=user_agent)
                viewport={"width": 1920, "height": 1080}, user_agent=user_agent
            )
            return browser_context
    async def launch_browser_with_cdp(
@@ -614,14 +494,10 @@ class BilibiliCrawler(AbstractCrawler):
            return browser_context
        except Exception as e:
-            utils.logger.error(
+            utils.logger.error(f"[BilibiliCrawler] CDP模式启动失败，回退到标准模式: {e}")
                f"[BilibiliCrawler] CDP模式启动失败，回退到标准模式: {e}"
            )
            # 回退到标准模式
            chromium = playwright.chromium
-            return await self.launch_browser(
+            return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)
                chromium, playwright_proxy, user_agent, headless
            )
    async def close(self):
        """Close browser context"""
@@ -634,13 +510,9 @@ class BilibiliCrawler(AbstractCrawler):
                await self.browser_context.close()
            utils.logger.info("[BilibiliCrawler.close] Browser context closed ...")
        except TargetClosedError:
-            utils.logger.warning(
+            utils.logger.warning("[BilibiliCrawler.close] Browser context was already closed.")
                "[BilibiliCrawler.close] Browser context was already closed."
            )
        except Exception as e:
-            utils.logger.error(
+            utils.logger.error(f"[BilibiliCrawler.close] An error occurred during close: {e}")
                f"[BilibiliCrawler.close] An error occurred during close: {e}"
            )
    async def get_bilibili_video(self, video_item: Dict, semaphore: asyncio.Semaphore):
        """
@@ -649,19 +521,15 @@ class BilibiliCrawler(AbstractCrawler):
        :param semaphore:
        :return:
        """
-        if not config.ENABLE_GET_IMAGES:
+        if not config.ENABLE_GET_MEIDAS:
-            utils.logger.info(
+            utils.logger.info(f"[BilibiliCrawler.get_bilibili_video] Crawling image mode is not enabled")
                f"[BilibiliCrawler.get_bilibili_video] Crawling image mode is not enabled"
            )
            return
        video_item_view: Dict = video_item.get("View")
        aid = video_item_view.get("aid")
        cid = video_item_view.get("cid")
        result = await self.get_video_play_url_task(aid, cid, semaphore)
        if result is None:
-            utils.logger.info(
+            utils.logger.info("[BilibiliCrawler.get_bilibili_video] get video play url failed")
                "[BilibiliCrawler.get_bilibili_video] get video play url failed"
            )
            return
        durl_list = result.get("durl")
        max_size = -1
@@ -672,9 +540,7 @@ class BilibiliCrawler(AbstractCrawler):
                max_size = size
                video_url = durl.get("url")
        if video_url == "":
-            utils.logger.info(
+            utils.logger.info("[BilibiliCrawler.get_bilibili_video] get video url failed")
                "[BilibiliCrawler.get_bilibili_video] get video url failed"
            )
            return
        content = await self.bili_client.get_video_media(video_url)
@@ -687,25 +553,17 @@ class BilibiliCrawler(AbstractCrawler):
        """
        creator_id_list: get details for creator from creator_id_list
        """
-        utils.logger.info(
+        utils.logger.info(f"[BilibiliCrawler.get_creator_details] Crawling the detalis of creator")
-            f"[BilibiliCrawler.get_creator_details] Crawling the detalis of creator"
+        utils.logger.info(f"[BilibiliCrawler.get_creator_details] creator ids:{creator_id_list}")
        )
        utils.logger.info(
            f"[BilibiliCrawler.get_creator_details] creator ids:{creator_id_list}"
        )
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
        task_list: List[Task] = []
        try:
            for creator_id in creator_id_list:
-                task = asyncio.create_task(
+                task = asyncio.create_task(self.get_creator_details(creator_id, semaphore), name=creator_id)
                    self.get_creator_details(creator_id, semaphore), name=creator_id
                )
                task_list.append(task)
        except Exception as e:
-            utils.logger.warning(
+            utils.logger.warning(f"[BilibiliCrawler.get_all_creator_details] error in the task list. The creator will not be included. {e}")
                f"[BilibiliCrawler.get_all_creator_details] error in the task list. The creator will not be included. {e}"
            )
        await asyncio.gather(*task_list)
@@ -717,9 +575,7 @@ class BilibiliCrawler(AbstractCrawler):
        :return:
        """
        async with semaphore:
-            creator_unhandled_info: Dict = await self.bili_client.get_creator_info(
+            creator_unhandled_info: Dict = await self.bili_client.get_creator_info(creator_id)
                creator_id
            )
            creator_info: Dict = {
                "id": creator_id,
                "name": creator_unhandled_info.get("name"),
@@ -740,9 +596,7 @@ class BilibiliCrawler(AbstractCrawler):
        creator_id = creator_info["id"]
        async with semaphore:
            try:
-                utils.logger.info(
+                utils.logger.info(f"[BilibiliCrawler.get_fans] begin get creator_id: {creator_id} fans ...")
                    f"[BilibiliCrawler.get_fans] begin get creator_id: {creator_id} fans ..."
                )
                await self.bili_client.get_creator_all_fans(
                    creator_info=creator_info,
                    crawl_interval=random.random(),
@@ -751,13 +605,9 @@ class BilibiliCrawler(AbstractCrawler):
                )
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_fans] get creator_id: {creator_id} fans error: {ex}")
                    f"[BilibiliCrawler.get_fans] get creator_id: {creator_id} fans error: {ex}"
                )
            except Exception as e:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_fans] may be been blocked, err:{e}")
                    f"[BilibiliCrawler.get_fans] may be been blocked, err:{e}"
                )
    async def get_followings(self, creator_info: Dict, semaphore: asyncio.Semaphore):
        """
@@ -769,9 +619,7 @@ class BilibiliCrawler(AbstractCrawler):
        creator_id = creator_info["id"]
        async with semaphore:
            try:
-                utils.logger.info(
+                utils.logger.info(f"[BilibiliCrawler.get_followings] begin get creator_id: {creator_id} followings ...")
                    f"[BilibiliCrawler.get_followings] begin get creator_id: {creator_id} followings ..."
                )
                await self.bili_client.get_creator_all_followings(
                    creator_info=creator_info,
                    crawl_interval=random.random(),
@@ -780,13 +628,9 @@ class BilibiliCrawler(AbstractCrawler):
                )
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_followings] get creator_id: {creator_id} followings error: {ex}")
                    f"[BilibiliCrawler.get_followings] get creator_id: {creator_id} followings error: {ex}"
                )
            except Exception as e:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_followings] may be been blocked, err:{e}")
                    f"[BilibiliCrawler.get_followings] may be been blocked, err:{e}"
                )
    async def get_dynamics(self, creator_info: Dict, semaphore: asyncio.Semaphore):
        """
@@ -798,9 +642,7 @@ class BilibiliCrawler(AbstractCrawler):
        creator_id = creator_info["id"]
        async with semaphore:
            try:
-                utils.logger.info(
+                utils.logger.info(f"[BilibiliCrawler.get_dynamics] begin get creator_id: {creator_id} dynamics ...")
                    f"[BilibiliCrawler.get_dynamics] begin get creator_id: {creator_id} dynamics ..."
                )
                await self.bili_client.get_creator_all_dynamics(
                    creator_info=creator_info,
                    crawl_interval=random.random(),
@@ -809,10 +651,6 @@ class BilibiliCrawler(AbstractCrawler):
                )
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_dynamics] get creator_id: {creator_id} dynamics error: {ex}")
                    f"[BilibiliCrawler.get_dynamics] get creator_id: {creator_id} dynamics error: {ex}"
                )
            except Exception as e:
-                utils.logger.error(
+                utils.logger.error(f"[BilibiliCrawler.get_dynamics] may be been blocked, err:{e}")
                    f"[BilibiliCrawler.get_dynamics] may be been blocked, err:{e}"
                )
--- a/media_platform/douyin/client.py
+++ b/media_platform/douyin/client.py
@@ -8,14 +8,13 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 import asyncio
 import copy
 import json
 import urllib.parse
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Dict, Union, Optional
-import requests
+import httpx
 from playwright.async_api import BrowserContext
 from base.base_crawler import AbstractApiClient
@@ -27,15 +26,16 @@ from .field import *
 from .help import *
-class DOUYINClient(AbstractApiClient):
+class DouYinClient(AbstractApiClient):
    def __init__(
        self,
-            timeout=30,
+        timeout=30,  # 若开启爬取媒体选项，抖音的短视频需要更久的超时时间
        proxies=None,
        *,
        headers: Dict,
        playwright_page: Optional[Page],
-            cookie_dict: Dict
+        cookie_dict: Dict,
    ):
        self.proxies = proxies
        self.timeout = timeout
@@ -45,8 +45,11 @@ class DOUYINClient(AbstractApiClient):
        self.cookie_dict = cookie_dict
    async def __process_req_params(
-            self, uri: str, params: Optional[Dict] = None, headers: Optional[Dict] = None,
+        self,
-            request_method="GET"
+        uri: str,
        params: Optional[Dict] = None,
        headers: Optional[Dict] = None,
        request_method="GET",
    ):
        if not params:
@@ -93,10 +96,8 @@ class DOUYINClient(AbstractApiClient):
    async def request(self, method, url, **kwargs):
        response = None
-        if method == "GET":
+        async with httpx.AsyncClient(proxies=self.proxies) as client:
-            response = requests.request(method, url, **kwargs)
+            response = await client.request(method, url, timeout=self.timeout, **kwargs)
        elif method == "POST":
            response = requests.request(method, url, **kwargs)
        try:
            if response.text == "" or response.text == "blocked":
                utils.logger.error(f"request params incrr, response.text: {response.text}")
@@ -138,7 +139,7 @@ class DOUYINClient(AbstractApiClient):
        search_channel: SearchChannelType = SearchChannelType.GENERAL,
        sort_type: SearchSortType = SearchSortType.GENERAL,
        publish_time: PublishTimeType = PublishTimeType.UNLIMITED,
-            search_id: str = ""
+        search_id: str = "",
    ):
        """
        DouYin Web Search API
@@ -165,10 +166,7 @@ class DOUYINClient(AbstractApiClient):
            'search_id': search_id,
        }
        if sort_type.value != SearchSortType.GENERAL.value or publish_time.value != PublishTimeType.UNLIMITED.value:
-            query_params["filter_selected"] = json.dumps({
+            query_params["filter_selected"] = json.dumps({"sort_type": str(sort_type.value), "publish_time": str(publish_time.value)})
                "sort_type": str(sort_type.value),
                "publish_time": str(publish_time.value)
            })
            query_params["is_filter_search"] = 1
            query_params["search_source"] = "tab_search"
        referer_url = f"https://www.douyin.com/search/{keyword}?aid=f594bbd9-a0e2-4651-9319-ebe3cb6298c1&type=general"
@@ -182,9 +180,7 @@ class DOUYINClient(AbstractApiClient):
        :param aweme_id:
        :return:
        """
-        params = {
+        params = {"aweme_id": aweme_id}
            "aweme_id": aweme_id
        }
        headers = copy.copy(self.headers)
        del headers["Origin"]
        res = await self.get("/aweme/v1/web/aweme/detail/", params, headers)
@@ -195,12 +191,7 @@ class DOUYINClient(AbstractApiClient):
        """
        uri = "/aweme/v1/web/comment/list/"
-        params = {
+        params = {"aweme_id": aweme_id, "cursor": cursor, "count": 20, "item_type": 0}
            "aweme_id": aweme_id,
            "cursor": cursor,
            "count": 20,
            "item_type": 0
        }
        keywords = request_keyword_var.get()
        referer_url = "https://www.douyin.com/search/" + keywords + '?aid=3a3cec5a-9e27-4040-b6aa-ef548c2c1138&publish_time=0&sort_type=0&source=search_history&type=general'
        headers = copy.copy(self.headers)
@@ -315,9 +306,17 @@ class DOUYINClient(AbstractApiClient):
            posts_has_more = aweme_post_res.get("has_more", 0)
            max_cursor = aweme_post_res.get("max_cursor")
            aweme_list = aweme_post_res.get("aweme_list") if aweme_post_res.get("aweme_list") else []
-            utils.logger.info(
+            utils.logger.info(f"[DouYinCrawler.get_all_user_aweme_posts] get sec_user_id:{sec_user_id} video len : {len(aweme_list)}")
                f"[DOUYINClient.get_all_user_aweme_posts] got sec_user_id:{sec_user_id} video len : {len(aweme_list)}")
            if callback:
                await callback(aweme_list)
            result.extend(aweme_list)
        return result
    async def get_aweme_media(self, url: str) -> Union[bytes, None]:
        async with httpx.AsyncClient(proxies=self.proxies) as client:
            response = await client.request("GET", url, timeout=self.timeout, follow_redirects=True)
            if not response.reason_phrase == "OK":
                utils.logger.error(f"[DouYinCrawler.get_aweme_media] request {url} err, res:{response.text}")
                return None
            else:
                return response.content
--- a/media_platform/douyin/core.py
+++ b/media_platform/douyin/core.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 import asyncio
 import os
 import random
@@ -31,7 +30,7 @@ from tools import utils
 from tools.cdp_browser import CDPBrowserManager
 from var import crawler_type_var, source_keyword_var
-from .client import DOUYINClient
+from .client import DouYinClient
 from .exception import DataFetchError
 from .field import PublishTimeType
 from .login import DouYinLogin
@@ -39,7 +38,7 @@ from .login import DouYinLogin
 class DouYinCrawler(AbstractCrawler):
    context_page: Page
-    dy_client: DOUYINClient
+    dy_client: DouYinClient
    browser_context: BrowserContext
    cdp_manager: Optional[CDPBrowserManager]
@@ -50,13 +49,9 @@ class DouYinCrawler(AbstractCrawler):
    async def start(self) -> None:
        playwright_proxy_format, httpx_proxy_format = None, None
        if config.ENABLE_IP_PROXY:
-            ip_proxy_pool = await create_ip_pool(
+            ip_proxy_pool = await create_ip_pool(config.IP_PROXY_POOL_COUNT, enable_validate_ip=True)
                config.IP_PROXY_POOL_COUNT, enable_validate_ip=True
            )
            ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy()
-            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(
+            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
                ip_proxy_info
            )
        async with async_playwright() as playwright:
            # 根据配置选择启动模式
@@ -93,9 +88,7 @@ class DouYinCrawler(AbstractCrawler):
                    cookie_str=config.COOKIES,
                )
                await login_obj.begin()
-                await self.dy_client.update_cookies(
+                await self.dy_client.update_cookies(browser_context=self.browser_context)
                    browser_context=self.browser_context
                )
            crawler_type_var.set(config.CRAWLER_TYPE)
            if config.CRAWLER_TYPE == "search":
                # Search for notes and retrieve their comment information.
@@ -121,17 +114,13 @@ class DouYinCrawler(AbstractCrawler):
            aweme_list: List[str] = []
            page = 0
            dy_search_id = ""
-            while (
+            while (page - start_page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                page - start_page + 1
            ) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                if page < start_page:
                    utils.logger.info(f"[DouYinCrawler.search] Skip {page}")
                    page += 1
                    continue
                try:
-                    utils.logger.info(
+                    utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}")
                        f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page}"
                    )
                    posts_res = await self.dy_client.search_info_by_keyword(
                        keyword=keyword,
                        offset=page * dy_limit_count - dy_limit_count,
@@ -139,67 +128,49 @@ class DouYinCrawler(AbstractCrawler):
                        search_id=dy_search_id,
                    )
                    if posts_res.get("data") is None or posts_res.get("data") == []:
-                        utils.logger.info(
+                        utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page} is empty,{posts_res.get('data')}`")
                            f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page} is empty,{posts_res.get('data')}`"
                        )
                        break
                except DataFetchError:
-                    utils.logger.error(
+                    utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
                        f"[DouYinCrawler.search] search douyin keyword: {keyword} failed"
                    )
                    break
                page += 1
                if "data" not in posts_res:
-                    utils.logger.error(
+                    utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed，账号也许被风控了。")
                        f"[DouYinCrawler.search] search douyin keyword: {keyword} failed，账号也许被风控了。"
                    )
                    break
                dy_search_id = posts_res.get("extra", {}).get("logid", "")
                for post_item in posts_res.get("data"):
                    try:
-                        aweme_info: Dict = (
+                        aweme_info: Dict = (post_item.get("aweme_info") or post_item.get("aweme_mix_info", {}).get("mix_items")[0])
                            post_item.get("aweme_info")
                            or post_item.get("aweme_mix_info", {}).get("mix_items")[0]
                        )
                    except TypeError:
                        continue
                    aweme_list.append(aweme_info.get("aweme_id", ""))
                    await douyin_store.update_douyin_aweme(aweme_item=aweme_info)
-            utils.logger.info(
+                    await self.get_aweme_media(aweme_item=aweme_info)
-                f"[DouYinCrawler.search] keyword:{keyword}, aweme_list:{aweme_list}"
+            utils.logger.info(f"[DouYinCrawler.search] keyword:{keyword}, aweme_list:{aweme_list}")
            )
            await self.batch_get_note_comments(aweme_list)
    async def get_specified_awemes(self):
        """Get the information and comments of the specified post"""
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list = [
+        task_list = [self.get_aweme_detail(aweme_id=aweme_id, semaphore=semaphore) for aweme_id in config.DY_SPECIFIED_ID_LIST]
            self.get_aweme_detail(aweme_id=aweme_id, semaphore=semaphore)
            for aweme_id in config.DY_SPECIFIED_ID_LIST
        ]
        aweme_details = await asyncio.gather(*task_list)
        for aweme_detail in aweme_details:
            if aweme_detail is not None:
-                await douyin_store.update_douyin_aweme(aweme_detail)
+                await douyin_store.update_douyin_aweme(aweme_item=aweme_detail)
                await self.get_aweme_media(aweme_item=aweme_detail)
        await self.batch_get_note_comments(config.DY_SPECIFIED_ID_LIST)
-    async def get_aweme_detail(
+    async def get_aweme_detail(self, aweme_id: str, semaphore: asyncio.Semaphore) -> Any:
        self, aweme_id: str, semaphore: asyncio.Semaphore
    ) -> Any:
        """Get note detail"""
        async with semaphore:
            try:
                return await self.dy_client.get_video_by_id(aweme_id)
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[DouYinCrawler.get_aweme_detail] Get aweme detail error: {ex}")
                    f"[DouYinCrawler.get_aweme_detail] Get aweme detail error: {ex}"
                )
                return None
            except KeyError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[DouYinCrawler.get_aweme_detail] have not fund note detail aweme_id:{aweme_id}, err: {ex}")
                    f"[DouYinCrawler.get_aweme_detail] have not fund note detail aweme_id:{aweme_id}, err: {ex}"
                )
                return None
    async def batch_get_note_comments(self, aweme_list: List[str]) -> None:
@@ -207,17 +178,13 @@ class DouYinCrawler(AbstractCrawler):
        Batch get note comments
        """
        if not config.ENABLE_GET_COMMENTS:
-            utils.logger.info(
+            utils.logger.info(f"[DouYinCrawler.batch_get_note_comments] Crawling comment mode is not enabled")
                f"[DouYinCrawler.batch_get_note_comments] Crawling comment mode is not enabled"
            )
            return
        task_list: List[Task] = []
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
        for aweme_id in aweme_list:
-            task = asyncio.create_task(
+            task = asyncio.create_task(self.get_comments(aweme_id, semaphore), name=aweme_id)
                self.get_comments(aweme_id, semaphore), name=aweme_id
            )
            task_list.append(task)
        if len(task_list) > 0:
            await asyncio.wait(task_list)
@@ -233,30 +200,22 @@ class DouYinCrawler(AbstractCrawler):
                    callback=douyin_store.batch_update_dy_aweme_comments,
                    max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
                )
-                utils.logger.info(
+                utils.logger.info(f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ...")
                    f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ..."
                )
            except DataFetchError as e:
-                utils.logger.error(
+                utils.logger.error(f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} get comments failed, error: {e}")
                    f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} get comments failed, error: {e}"
                )
    async def get_creators_and_videos(self) -> None:
        """
        Get the information and videos of the specified creator
        """
-        utils.logger.info(
+        utils.logger.info("[DouYinCrawler.get_creators_and_videos] Begin get douyin creators")
            "[DouYinCrawler.get_creators_and_videos] Begin get douyin creators"
        )
        for user_id in config.DY_CREATOR_ID_LIST:
            creator_info: Dict = await self.dy_client.get_user_info(user_id)
            if creator_info:
                await douyin_store.save_creator(user_id, creator=creator_info)
            # Get all video information of the creator
-            all_video_list = await self.dy_client.get_all_user_aweme_posts(
+            all_video_list = await self.dy_client.get_all_user_aweme_posts(sec_user_id=user_id, callback=self.fetch_creator_video_detail)
                sec_user_id=user_id, callback=self.fetch_creator_video_detail
            )
            video_ids = [video_item.get("aweme_id") for video_item in all_video_list]
            await self.batch_get_note_comments(video_ids)
@@ -266,25 +225,21 @@ class DouYinCrawler(AbstractCrawler):
        Concurrently obtain the specified post list and save the data
        """
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list = [
+        task_list = [self.get_aweme_detail(post_item.get("aweme_id"), semaphore) for post_item in video_list]
            self.get_aweme_detail(post_item.get("aweme_id"), semaphore)
            for post_item in video_list
        ]
        note_details = await asyncio.gather(*task_list)
        for aweme_item in note_details:
            if aweme_item is not None:
-                await douyin_store.update_douyin_aweme(aweme_item)
+                await douyin_store.update_douyin_aweme(aweme_item=aweme_item)
                await self.get_aweme_media(aweme_item=aweme_item)
-    async def create_douyin_client(self, httpx_proxy: Optional[str]) -> DOUYINClient:
+    async def create_douyin_client(self, httpx_proxy: Optional[str]) -> DouYinClient:
        """Create douyin client"""
        cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies())  # type: ignore
-        douyin_client = DOUYINClient(
+        douyin_client = DouYinClient(
            proxies=httpx_proxy,
            headers={
-                "User-Agent": await self.context_page.evaluate(
+                "User-Agent": await self.context_page.evaluate("() => navigator.userAgent"),
                    "() => navigator.userAgent"
                ),
                "Cookie": cookie_str,
                "Host": "www.douyin.com",
                "Origin": "https://www.douyin.com/",
@@ -305,23 +260,22 @@ class DouYinCrawler(AbstractCrawler):
    ) -> BrowserContext:
        """Launch browser and create browser context"""
        if config.SAVE_LOGIN_STATE:
-            user_data_dir = os.path.join(
+            user_data_dir = os.path.join(os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM)  # type: ignore
                os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM
            )  # type: ignore
            browser_context = await chromium.launch_persistent_context(
                user_data_dir=user_data_dir,
                accept_downloads=True,
                headless=headless,
                proxy=playwright_proxy,  # type: ignore
-                viewport={"width": 1920, "height": 1080},
+                viewport={
                    "width": 1920,
                    "height": 1080
                },
                user_agent=user_agent,
            )  # type: ignore
            return browser_context
        else:
            browser = await chromium.launch(headless=headless, proxy=playwright_proxy)  # type: ignore
-            browser_context = await browser.new_context(
+            browser_context = await browser.new_context(viewport={"width": 1920, "height": 1080}, user_agent=user_agent)
                viewport={"width": 1920, "height": 1080}, user_agent=user_agent
            )
            return browser_context
    async def launch_browser_with_cdp(
@@ -356,9 +310,7 @@ class DouYinCrawler(AbstractCrawler):
            utils.logger.error(f"[DouYinCrawler] CDP模式启动失败，回退到标准模式: {e}")
            # 回退到标准模式
            chromium = playwright.chromium
-            return await self.launch_browser(
+            return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)
                chromium, playwright_proxy, user_agent, headless
            )
    async def close(self) -> None:
        """Close browser context"""
@@ -369,3 +321,73 @@ class DouYinCrawler(AbstractCrawler):
        else:
            await self.browser_context.close()
        utils.logger.info("[DouYinCrawler.close] Browser context closed ...")
    async def get_aweme_media(self, aweme_item: Dict):
        """
        获取抖音媒体，自动判断媒体类型是短视频还是帖子图片并下载
        Args:
            aweme_item (Dict): 抖音作品详情
        """
        if not config.ENABLE_GET_MEIDAS:
            utils.logger.info(f"[DouYinCrawler.get_aweme_media] Crawling image mode is not enabled")
            return
        # 笔记 urls 列表，若为短视频类型则返回为空列表
        note_download_url: List[str] = douyin_store._extract_note_image_list(aweme_item)
        # 视频 url，永远存在，但为短视频类型时的文件其实是音频文件
        video_download_url: str = douyin_store._extract_video_download_url(aweme_item)
        # TODO: 抖音并没采用音视频分离的策略，故音频可从原视频中分离，暂不提取
        if note_download_url:
            await self.get_aweme_images(aweme_item)
        else:
            await self.get_aweme_video(aweme_item)
    async def get_aweme_images(self, aweme_item: Dict):
        """
        get aweme images. please use get_aweme_media
        Args:
            aweme_item (Dict): 抖音作品详情
        """
        if not config.ENABLE_GET_MEIDAS:
            return
        aweme_id = aweme_item.get("aweme_id")
        # 笔记 urls 列表，若为短视频类型则返回为空列表
        note_download_url: List[str] = douyin_store._extract_note_image_list(aweme_item)
        if not note_download_url:
            return
        picNum = 0
        for url in note_download_url:
            if not url:
                continue
            content = await self.dy_client.get_aweme_media(url)
            if content is None:
                continue
            extension_file_name = f"{picNum}.jpeg"
            picNum += 1
            await douyin_store.update_dy_aweme_image(aweme_id, content, extension_file_name)
    async def get_aweme_video(self, aweme_item: Dict):
        """
        get aweme videos. please use get_aweme_media
        Args:
            aweme_item (Dict): 抖音作品详情
        """
        if not config.ENABLE_GET_MEIDAS:
            return
        aweme_id = aweme_item.get("aweme_id")
        # 视频 url，永远存在，但为短视频类型时的文件其实是音频文件
        video_download_url: str = douyin_store._extract_video_download_url(aweme_item)
        if not video_download_url:
            return
        videoNum = 0
        content = await self.dy_client.get_aweme_media(video_download_url)
        if content is None:
            return
        extension_file_name = f"{videoNum}.mp4"
        videoNum += 1
        await douyin_store.update_dy_aweme_video(aweme_id, content, extension_file_name)
--- a/media_platform/weibo/client.py
+++ b/media_platform/weibo/client.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/23 15:40
@@ -33,9 +32,10 @@ from .field import SearchType
 class WeiboClient:
    def __init__(
        self,
-            timeout=10,
+        timeout=30,  # 若开启爬取媒体选项，weibo 的图片需要更久的超时时间
        proxies=None,
        *,
        headers: Dict[str, str],
@@ -53,10 +53,7 @@ class WeiboClient:
    async def request(self, method, url, **kwargs) -> Union[Response, Dict]:
        enable_return_response = kwargs.pop("return_response", False)
        async with httpx.AsyncClient(proxies=self.proxies) as client:
-            response = await client.request(
+            response = await client.request(method, url, timeout=self.timeout, **kwargs)
                method, url, timeout=self.timeout,
                **kwargs
            )
        if enable_return_response:
            return response
@@ -84,8 +81,7 @@ class WeiboClient:
    async def post(self, uri: str, data: dict) -> Dict:
        json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
-        return await self.request(method="POST", url=f"{self._host}{uri}",
+        return await self.request(method="POST", url=f"{self._host}{uri}", data=json_str, headers=self.headers)
                                  data=json_str, headers=self.headers)
    async def pong(self) -> bool:
        """get a note to check if login state is ok"""
@@ -112,7 +108,7 @@ class WeiboClient:
        self,
        keyword: str,
        page: int = 1,
-            search_type: SearchType = SearchType.DEFAULT
+        search_type: SearchType = SearchType.DEFAULT,
    ) -> Dict:
        """
        search note by keyword
@@ -187,8 +183,11 @@ class WeiboClient:
        return result
    @staticmethod
-    async def get_comments_all_sub_comments(note_id: str, comment_list: List[Dict],
+    async def get_comments_all_sub_comments(
-                                            callback: Optional[Callable] = None) -> List[Dict]:
+        note_id: str,
        comment_list: List[Dict],
        callback: Optional[Callable] = None,
    ) -> List[Dict]:
        """
        获取评论的所有子评论
        Args:
@@ -200,8 +199,7 @@ class WeiboClient:
        """
        if not config.ENABLE_GET_SUB_COMMENTS:
-            utils.logger.info(
+            utils.logger.info(f"[WeiboClient.get_comments_all_sub_comments] Crawling sub_comment mode is not enabled")
                f"[WeiboClient.get_comments_all_sub_comments] Crawling sub_comment mode is not enabled")
            return []
        res_sub_comments = []
@@ -220,9 +218,7 @@ class WeiboClient:
        """
        url = f"{self._host}/detail/{note_id}"
        async with httpx.AsyncClient(proxies=self.proxies) as client:
-            response = await client.request(
+            response = await client.request("GET", url, timeout=self.timeout, headers=self.headers)
                "GET", url, timeout=self.timeout, headers=self.headers
            )
            if response.status_code != 200:
                raise DataFetchError(f"get weibo detail err: {response.text}")
            match = re.search(r'var \$render_data = (\[.*?\])\[0\]', response.text, re.DOTALL)
@@ -230,9 +226,7 @@ class WeiboClient:
                render_data_json = match.group(1)
                render_data_dict = json.loads(render_data_json)
                note_detail = render_data_dict[0].get("status")
-                note_item = {
+                note_item = {"mblog": note_detail}
                    "mblog": note_detail
                }
                return note_item
            else:
                utils.logger.info(f"[WeiboClient.get_note_info_by_id] 未找到$render_data的值")
@@ -251,7 +245,8 @@ class WeiboClient:
                image_url += sub_url[i] + "/"
        # 微博图床对外存在防盗链，所以需要代理访问
        # 由于微博图片是通过 i1.wp.com 来访问的，所以需要拼接一下
-        final_uri = (f"{self._image_agent_host}" f"{image_url}")
+        final_uri = (f"{self._image_agent_host}"
                     f"{image_url}")
        async with httpx.AsyncClient(proxies=self.proxies) as client:
            response = await client.request("GET", final_uri, timeout=self.timeout)
            if not response.reason_phrase == "OK":
@@ -260,8 +255,6 @@ class WeiboClient:
            else:
                return response.content
    async def get_creator_container_info(self, creator_id: str) -> Dict:
        """
        获取用户的容器ID, 容器信息代表着真实请求的API路径
@@ -278,10 +271,7 @@ class WeiboClient:
        if not m_weibocn_params:
            raise DataFetchError("get containerid failed")
        m_weibocn_params_dict = parse_qs(unquote(m_weibocn_params))
-        return {
+        return {"fid_container_id": m_weibocn_params_dict.get("fid", [""])[0], "lfid_container_id": m_weibocn_params_dict.get("lfid", [""])[0]}
            "fid_container_id": m_weibocn_params_dict.get("fid", [""])[0],
            "lfid_container_id": m_weibocn_params_dict.get("lfid", [""])[0]
        }
    async def get_creator_info_by_id(self, creator_id: str) -> Dict:
        """
@@ -316,7 +306,12 @@ class WeiboClient:
        user_res.update(container_info)
        return user_res
-    async def get_notes_by_creator(self, creator: str, container_id: str, since_id: str = "0", ) -> Dict:
+    async def get_notes_by_creator(
        self,
        creator: str,
        container_id: str,
        since_id: str = "0",
    ) -> Dict:
        """
        获取博主的笔记
        Args:
@@ -337,8 +332,13 @@ class WeiboClient:
        }
        return await self.get(uri, params)
-    async def get_all_notes_by_creator_id(self, creator_id: str, container_id: str, crawl_interval: float = 1.0,
+    async def get_all_notes_by_creator_id(
-                                          callback: Optional[Callable] = None) -> List[Dict]:
+        self,
        creator_id: str,
        container_id: str,
        crawl_interval: float = 1.0,
        callback: Optional[Callable] = None,
    ) -> List[Dict]:
        """
        获取指定用户下的所有发过的帖子，该方法会一直查找一个用户下的所有帖子信息
        Args:
@@ -357,18 +357,15 @@ class WeiboClient:
        while notes_has_more:
            notes_res = await self.get_notes_by_creator(creator_id, container_id, since_id)
            if not notes_res:
-                utils.logger.error(
+                utils.logger.error(f"[WeiboClient.get_notes_by_creator] The current creator may have been banned by xhs, so they cannot access the data.")
                    f"[WeiboClient.get_notes_by_creator] The current creator may have been banned by xhs, so they cannot access the data.")
                break
            since_id = notes_res.get("cardlistInfo", {}).get("since_id", "0")
            if "cards" not in notes_res:
-                utils.logger.info(
+                utils.logger.info(f"[WeiboClient.get_all_notes_by_creator] No 'notes' key found in response: {notes_res}")
                    f"[WeiboClient.get_all_notes_by_creator] No 'notes' key found in response: {notes_res}")
                break
            notes = notes_res["cards"]
-            utils.logger.info(
+            utils.logger.info(f"[WeiboClient.get_all_notes_by_creator] got user_id:{creator_id} notes len : {len(notes)}")
                f"[WeiboClient.get_all_notes_by_creator] got user_id:{creator_id} notes len : {len(notes)}")
            notes = [note for note in notes if note.get("card_type") == 9]
            if callback:
                await callback(notes)
@@ -377,4 +374,3 @@ class WeiboClient:
            crawler_total_count += 10
            notes_has_more = notes_res.get("cardlistInfo", {}).get("total", 0) > crawler_total_count
        return result
--- a/media_platform/weibo/core.py
+++ b/media_platform/weibo/core.py
@@ -8,13 +8,11 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/23 15:41
 # @Desc    : 微博爬虫主流程代码
 import asyncio
 import os
 import random
@@ -60,13 +58,9 @@ class WeiboCrawler(AbstractCrawler):
    async def start(self):
        playwright_proxy_format, httpx_proxy_format = None, None
        if config.ENABLE_IP_PROXY:
-            ip_proxy_pool = await create_ip_pool(
+            ip_proxy_pool = await create_ip_pool(config.IP_PROXY_POOL_COUNT, enable_validate_ip=True)
                config.IP_PROXY_POOL_COUNT, enable_validate_ip=True
            )
            ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy()
-            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(
+            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
                ip_proxy_info
            )
        async with async_playwright() as playwright:
            # 根据配置选择启动模式
@@ -82,9 +76,7 @@ class WeiboCrawler(AbstractCrawler):
                utils.logger.info("[WeiboCrawler] 使用标准模式启动浏览器")
                # Launch a browser context.
                chromium = playwright.chromium
-                self.browser_context = await self.launch_browser(
+                self.browser_context = await self.launch_browser(chromium, None, self.mobile_user_agent, headless=config.HEADLESS)
                    chromium, None, self.mobile_user_agent, headless=config.HEADLESS
                )
            # stealth.min.js is a js script to prevent the website from detecting the crawler.
            await self.browser_context.add_init_script(path="libs/stealth.min.js")
            self.context_page = await self.browser_context.new_page()
@@ -103,14 +95,10 @@ class WeiboCrawler(AbstractCrawler):
                await login_obj.begin()
                # 登录成功后重定向到手机端的网站，再更新手机端登录成功的cookie
-                utils.logger.info(
+                utils.logger.info("[WeiboCrawler.start] redirect weibo mobile homepage and update cookies on mobile platform")
                    "[WeiboCrawler.start] redirect weibo mobile homepage and update cookies on mobile platform"
                )
                await self.context_page.goto(self.mobile_index_url)
                await asyncio.sleep(2)
-                await self.wb_client.update_cookies(
+                await self.wb_client.update_cookies(browser_context=self.browser_context)
                    browser_context=self.browser_context
                )
            crawler_type_var.set(config.CRAWLER_TYPE)
            if config.CRAWLER_TYPE == "search":
@@ -147,30 +135,20 @@ class WeiboCrawler(AbstractCrawler):
        elif config.WEIBO_SEARCH_TYPE == "video":
            search_type = SearchType.VIDEO
        else:
-            utils.logger.error(
+            utils.logger.error(f"[WeiboCrawler.search] Invalid WEIBO_SEARCH_TYPE: {config.WEIBO_SEARCH_TYPE}")
                f"[WeiboCrawler.search] Invalid WEIBO_SEARCH_TYPE: {config.WEIBO_SEARCH_TYPE}"
            )
            return
        for keyword in config.KEYWORDS.split(","):
            source_keyword_var.set(keyword)
-            utils.logger.info(
+            utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}")
                f"[WeiboCrawler.search] Current search keyword: {keyword}"
            )
            page = 1
-            while (
+            while (page - start_page + 1) * weibo_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                page - start_page + 1
            ) * weibo_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                if page < start_page:
                    utils.logger.info(f"[WeiboCrawler.search] Skip page: {page}")
                    page += 1
                    continue
-                utils.logger.info(
+                utils.logger.info(f"[WeiboCrawler.search] search weibo keyword: {keyword}, page: {page}")
-                    f"[WeiboCrawler.search] search weibo keyword: {keyword}, page: {page}"
+                search_res = await self.wb_client.get_note_by_keyword(keyword=keyword, page=page, search_type=search_type)
                )
                search_res = await self.wb_client.get_note_by_keyword(
                    keyword=keyword, page=page, search_type=search_type
                )
                note_id_list: List[str] = []
                note_list = filter_search_result_card(search_res.get("cards"))
                for note_item in note_list:
@@ -190,19 +168,14 @@ class WeiboCrawler(AbstractCrawler):
        :return:
        """
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list = [
+        task_list = [self.get_note_info_task(note_id=note_id, semaphore=semaphore) for note_id in config.WEIBO_SPECIFIED_ID_LIST]
            self.get_note_info_task(note_id=note_id, semaphore=semaphore)
            for note_id in config.WEIBO_SPECIFIED_ID_LIST
        ]
        video_details = await asyncio.gather(*task_list)
        for note_item in video_details:
            if note_item:
                await weibo_store.update_weibo_note(note_item)
        await self.batch_get_notes_comments(config.WEIBO_SPECIFIED_ID_LIST)
-    async def get_note_info_task(
+    async def get_note_info_task(self, note_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
        self, note_id: str, semaphore: asyncio.Semaphore
    ) -> Optional[Dict]:
        """
        Get note detail task
        :param note_id:
@@ -214,14 +187,10 @@ class WeiboCrawler(AbstractCrawler):
                result = await self.wb_client.get_note_info_by_id(note_id)
                return result
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[WeiboCrawler.get_note_info_task] Get note detail error: {ex}")
                    f"[WeiboCrawler.get_note_info_task] Get note detail error: {ex}"
                )
                return None
            except KeyError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[WeiboCrawler.get_note_info_task] have not fund note detail note_id:{note_id}, err: {ex}")
                    f"[WeiboCrawler.get_note_info_task] have not fund note detail note_id:{note_id}, err: {ex}"
                )
                return None
    async def batch_get_notes_comments(self, note_id_list: List[str]):
@@ -231,20 +200,14 @@ class WeiboCrawler(AbstractCrawler):
        :return:
        """
        if not config.ENABLE_GET_COMMENTS:
-            utils.logger.info(
+            utils.logger.info(f"[WeiboCrawler.batch_get_note_comments] Crawling comment mode is not enabled")
                f"[WeiboCrawler.batch_get_note_comments] Crawling comment mode is not enabled"
            )
            return
-        utils.logger.info(
+        utils.logger.info(f"[WeiboCrawler.batch_get_notes_comments] note ids:{note_id_list}")
            f"[WeiboCrawler.batch_get_notes_comments] note ids:{note_id_list}"
        )
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
        task_list: List[Task] = []
        for note_id in note_id_list:
-            task = asyncio.create_task(
+            task = asyncio.create_task(self.get_note_comments(note_id, semaphore), name=note_id)
                self.get_note_comments(note_id, semaphore), name=note_id
            )
            task_list.append(task)
        await asyncio.gather(*task_list)
@@ -257,25 +220,17 @@ class WeiboCrawler(AbstractCrawler):
        """
        async with semaphore:
            try:
-                utils.logger.info(
+                utils.logger.info(f"[WeiboCrawler.get_note_comments] begin get note_id: {note_id} comments ...")
                    f"[WeiboCrawler.get_note_comments] begin get note_id: {note_id} comments ..."
                )
                await self.wb_client.get_note_all_comments(
                    note_id=note_id,
-                    crawl_interval=random.randint(
+                    crawl_interval=random.randint(1, 3),  # 微博对API的限流比较严重，所以延时提高一些
                        1, 3
                    ),  # 微博对API的限流比较严重，所以延时提高一些
                    callback=weibo_store.batch_update_weibo_note_comments,
                    max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
                )
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[WeiboCrawler.get_note_comments] get note_id: {note_id} comment error: {ex}")
                    f"[WeiboCrawler.get_note_comments] get note_id: {note_id} comment error: {ex}"
                )
            except Exception as e:
-                utils.logger.error(
+                utils.logger.error(f"[WeiboCrawler.get_note_comments] may be been blocked, err:{e}")
                    f"[WeiboCrawler.get_note_comments] may be been blocked, err:{e}"
                )
    async def get_note_images(self, mblog: Dict):
        """
@@ -283,10 +238,8 @@ class WeiboCrawler(AbstractCrawler):
        :param mblog:
        :return:
        """
-        if not config.ENABLE_GET_IMAGES:
+        if not config.ENABLE_GET_MEIDAS:
-            utils.logger.info(
+            utils.logger.info(f"[WeiboCrawler.get_note_images] Crawling image mode is not enabled")
                f"[WeiboCrawler.get_note_images] Crawling image mode is not enabled"
            )
            return
        pics: Dict = mblog.get("pics")
@@ -299,9 +252,7 @@ class WeiboCrawler(AbstractCrawler):
            content = await self.wb_client.get_note_image(url)
            if content != None:
                extension_file_name = url.split(".")[-1]
-                await weibo_store.update_weibo_note_image(
+                await weibo_store.update_weibo_note_image(pic["pid"], content, extension_file_name)
                    pic["pid"], content, extension_file_name
                )
    async def get_creators_and_notes(self) -> None:
        """
@@ -309,18 +260,12 @@ class WeiboCrawler(AbstractCrawler):
        Returns:
        """
-        utils.logger.info(
+        utils.logger.info("[WeiboCrawler.get_creators_and_notes] Begin get weibo creators")
            "[WeiboCrawler.get_creators_and_notes] Begin get weibo creators"
        )
        for user_id in config.WEIBO_CREATOR_ID_LIST:
-            createor_info_res: Dict = await self.wb_client.get_creator_info_by_id(
+            createor_info_res: Dict = await self.wb_client.get_creator_info_by_id(creator_id=user_id)
                creator_id=user_id
            )
            if createor_info_res:
                createor_info: Dict = createor_info_res.get("userInfo", {})
-                utils.logger.info(
+                utils.logger.info(f"[WeiboCrawler.get_creators_and_notes] creator info: {createor_info}")
                    f"[WeiboCrawler.get_creators_and_notes] creator info: {createor_info}"
                )
                if not createor_info:
                    raise DataFetchError("Get creator info error")
                await weibo_store.save_creator(user_id, user_info=createor_info)
@@ -333,26 +278,16 @@ class WeiboCrawler(AbstractCrawler):
                    callback=weibo_store.batch_update_weibo_notes,
                )
-                note_ids = [
+                note_ids = [note_item.get("mblog", {}).get("id") for note_item in all_notes_list if note_item.get("mblog", {}).get("id")]
                    note_item.get("mblog", {}).get("id")
                    for note_item in all_notes_list
                    if note_item.get("mblog", {}).get("id")
                ]
                await self.batch_get_notes_comments(note_ids)
            else:
-                utils.logger.error(
+                utils.logger.error(f"[WeiboCrawler.get_creators_and_notes] get creator info error, creator_id:{user_id}")
                    f"[WeiboCrawler.get_creators_and_notes] get creator info error, creator_id:{user_id}"
                )
    async def create_weibo_client(self, httpx_proxy: Optional[str]) -> WeiboClient:
        """Create xhs client"""
-        utils.logger.info(
+        utils.logger.info("[WeiboCrawler.create_weibo_client] Begin create weibo API client ...")
-            "[WeiboCrawler.create_weibo_client] Begin create weibo API client ..."
+        cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies())
        )
        cookie_str, cookie_dict = utils.convert_cookies(
            await self.browser_context.cookies()
        )
        weibo_client_obj = WeiboClient(
            proxies=httpx_proxy,
            headers={
@@ -375,27 +310,24 @@ class WeiboCrawler(AbstractCrawler):
        headless: bool = True,
    ) -> BrowserContext:
        """Launch browser and create browser context"""
-        utils.logger.info(
+        utils.logger.info("[WeiboCrawler.launch_browser] Begin create browser context ...")
            "[WeiboCrawler.launch_browser] Begin create browser context ..."
        )
        if config.SAVE_LOGIN_STATE:
-            user_data_dir = os.path.join(
+            user_data_dir = os.path.join(os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM)  # type: ignore
                os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM
            )  # type: ignore
            browser_context = await chromium.launch_persistent_context(
                user_data_dir=user_data_dir,
                accept_downloads=True,
                headless=headless,
                proxy=playwright_proxy,  # type: ignore
-                viewport={"width": 1920, "height": 1080},
+                viewport={
                    "width": 1920,
                    "height": 1080
                },
                user_agent=user_agent,
            )
            return browser_context
        else:
            browser = await chromium.launch(headless=headless, proxy=playwright_proxy)  # type: ignore
-            browser_context = await browser.new_context(
+            browser_context = await browser.new_context(viewport={"width": 1920, "height": 1080}, user_agent=user_agent)
                viewport={"width": 1920, "height": 1080}, user_agent=user_agent
            )
            return browser_context
    async def launch_browser_with_cdp(
@@ -427,9 +359,7 @@ class WeiboCrawler(AbstractCrawler):
            utils.logger.error(f"[WeiboCrawler] CDP模式启动失败，回退到标准模式: {e}")
            # 回退到标准模式
            chromium = playwright.chromium
-            return await self.launch_browser(
+            return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)
                chromium, playwright_proxy, user_agent, headless
            )
    async def close(self):
        """Close browser context"""
--- a/media_platform/xhs/client.py
+++ b/media_platform/xhs/client.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 import asyncio
 import json
 import re
@@ -30,9 +29,10 @@ from .help import get_search_id, sign
 class XiaoHongShuClient(AbstractApiClient):
    def __init__(
        self,
-        timeout=10,
+        timeout=30,  # 若开启爬取媒体选项，xhs 的长视频需要更久的超时时间
        proxies=None,
        *,
        headers: Dict[str, str],
@@ -61,9 +61,7 @@ class XiaoHongShuClient(AbstractApiClient):
        Returns:
        """
-        encrypt_params = await self.playwright_page.evaluate(
+        encrypt_params = await self.playwright_page.evaluate("([url, data]) => window._webmsxyw(url,data)", [url, data])
            "([url, data]) => window._webmsxyw(url,data)", [url, data]
        )
        local_storage = await self.playwright_page.evaluate("() => window.localStorage")
        signs = sign(
            a1=self.cookie_dict.get("a1", ""),
@@ -130,9 +128,7 @@ class XiaoHongShuClient(AbstractApiClient):
        if isinstance(params, dict):
            final_uri = f"{uri}?" f"{urlencode(params)}"
        headers = await self._pre_headers(final_uri)
-        return await self.request(
+        return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=headers)
            method="GET", url=f"{self._host}{final_uri}", headers=headers
        )
    async def post(self, uri: str, data: dict, **kwargs) -> Dict:
        """
@@ -158,9 +154,7 @@ class XiaoHongShuClient(AbstractApiClient):
        async with httpx.AsyncClient(proxies=self.proxies) as client:
            response = await client.request("GET", url, timeout=self.timeout)
            if not response.reason_phrase == "OK":
-                utils.logger.error(
+                utils.logger.error(f"[XiaoHongShuClient.get_note_media] request {url} err, res:{response.text}")
                    f"[XiaoHongShuClient.get_note_media] request {url} err, res:{response.text}"
                )
                return None
            else:
                return response.content
@@ -179,9 +173,7 @@ class XiaoHongShuClient(AbstractApiClient):
            if note_card.get("items"):
                ping_flag = True
        except Exception as e:
-            utils.logger.error(
+            utils.logger.error(f"[XiaoHongShuClient.pong] Ping xhs failed: {e}, and try to login again...")
                f"[XiaoHongShuClient.pong] Ping xhs failed: {e}, and try to login again..."
            )
            ping_flag = False
        return ping_flag
@@ -231,7 +223,10 @@ class XiaoHongShuClient(AbstractApiClient):
        return await self.post(uri, data)
    async def get_note_by_id(
-        self, note_id: str, xsec_source: str, xsec_token: str
+        self,
        note_id: str,
        xsec_source: str,
        xsec_token: str,
    ) -> Dict:
        """
        获取笔记详情API
@@ -249,7 +244,9 @@ class XiaoHongShuClient(AbstractApiClient):
        data = {
            "source_note_id": note_id,
            "image_formats": ["jpg", "webp", "avif"],
-            "extra": {"need_body_topic": 1},
+            "extra": {
                "need_body_topic": 1
            },
            "xsec_source": xsec_source,
            "xsec_token": xsec_token,
        }
@@ -259,13 +256,14 @@ class XiaoHongShuClient(AbstractApiClient):
            res_dict: Dict = res["items"][0]["note_card"]
            return res_dict
        # 爬取频繁了可能会出现有的笔记能有结果有的没有
-        utils.logger.error(
+        utils.logger.error(f"[XiaoHongShuClient.get_note_by_id] get note id:{note_id} empty and res:{res}")
            f"[XiaoHongShuClient.get_note_by_id] get note id:{note_id} empty and res:{res}"
        )
        return dict()
    async def get_note_comments(
-        self, note_id: str, xsec_token: str, cursor: str = ""
+        self,
        note_id: str,
        xsec_token: str,
        cursor: str = "",
    ) -> Dict:
        """
        获取一级评论的API
@@ -342,19 +340,15 @@ class XiaoHongShuClient(AbstractApiClient):
        comments_has_more = True
        comments_cursor = ""
        while comments_has_more and len(result) < max_count:
-            comments_res = await self.get_note_comments(
+            comments_res = await self.get_note_comments(note_id=note_id, xsec_token=xsec_token, cursor=comments_cursor)
                note_id=note_id, xsec_token=xsec_token, cursor=comments_cursor
            )
            comments_has_more = comments_res.get("has_more", False)
            comments_cursor = comments_res.get("cursor", "")
            if "comments" not in comments_res:
-                utils.logger.info(
+                utils.logger.info(f"[XiaoHongShuClient.get_note_all_comments] No 'comments' key found in response: {comments_res}")
                    f"[XiaoHongShuClient.get_note_all_comments] No 'comments' key found in response: {comments_res}"
                )
                break
            comments = comments_res["comments"]
            if len(result) + len(comments) > max_count:
-                comments = comments[: max_count - len(result)]
+                comments = comments[:max_count - len(result)]
            if callback:
                await callback(note_id, comments)
            await asyncio.sleep(crawl_interval)
@@ -387,9 +381,7 @@ class XiaoHongShuClient(AbstractApiClient):
        """
        if not config.ENABLE_GET_SUB_COMMENTS:
-            utils.logger.info(
+            utils.logger.info(f"[XiaoHongShuCrawler.get_comments_all_sub_comments] Crawling sub_comment mode is not enabled")
                f"[XiaoHongShuCrawler.get_comments_all_sub_comments] Crawling sub_comment mode is not enabled"
            )
            return []
        result = []
@@ -416,16 +408,12 @@ class XiaoHongShuClient(AbstractApiClient):
                )
                if comments_res is None:
-                    utils.logger.info(
+                    utils.logger.info(f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: {note_id}")
                        f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: {note_id}"
                    )
                    continue
                sub_comment_has_more = comments_res.get("has_more", False)
                sub_comment_cursor = comments_res.get("cursor", "")
                if "comments" not in comments_res:
-                    utils.logger.info(
+                    utils.logger.info(f"[XiaoHongShuClient.get_comments_all_sub_comments] No 'comments' key found in response: {comments_res}")
                        f"[XiaoHongShuClient.get_comments_all_sub_comments] No 'comments' key found in response: {comments_res}"
                    )
                    break
                comments = comments_res["comments"]
                if callback:
@@ -441,12 +429,8 @@ class XiaoHongShuClient(AbstractApiClient):
        eg: https://www.xiaohongshu.com/user/profile/59d8cb33de5fb4696bf17217
        """
        uri = f"/user/profile/{user_id}"
-        html_content = await self.request(
+        html_content = await self.request("GET", self._domain + uri, return_response=True, headers=self.headers)
-            "GET", self._domain + uri, return_response=True, headers=self.headers
+        match = re.search(r"<script>window.__INITIAL_STATE__=(.+)<\/script>", html_content, re.M)
        )
        match = re.search(
            r"<script>window.__INITIAL_STATE__=(.+)<\/script>", html_content, re.M
        )
        if match is None:
            return {}
@@ -457,7 +441,10 @@ class XiaoHongShuClient(AbstractApiClient):
        return info.get("user").get("userPageData")
    async def get_notes_by_creator(
-        self, creator: str, cursor: str, page_size: int = 30
+        self,
        creator: str,
        cursor: str,
        page_size: int = 30,
    ) -> Dict:
        """
        获取博主的笔记
@@ -500,23 +487,17 @@ class XiaoHongShuClient(AbstractApiClient):
        while notes_has_more and len(result) < config.CRAWLER_MAX_NOTES_COUNT:
            notes_res = await self.get_notes_by_creator(user_id, notes_cursor)
            if not notes_res:
-                utils.logger.error(
+                utils.logger.error(f"[XiaoHongShuClient.get_notes_by_creator] The current creator may have been banned by xhs, so they cannot access the data.")
                    f"[XiaoHongShuClient.get_notes_by_creator] The current creator may have been banned by xhs, so they cannot access the data."
                )
                break
            notes_has_more = notes_res.get("has_more", False)
            notes_cursor = notes_res.get("cursor", "")
            if "notes" not in notes_res:
-                utils.logger.info(
+                utils.logger.info(f"[XiaoHongShuClient.get_all_notes_by_creator] No 'notes' key found in response: {notes_res}")
                    f"[XiaoHongShuClient.get_all_notes_by_creator] No 'notes' key found in response: {notes_res}"
                )
                break
            notes = notes_res["notes"]
-            utils.logger.info(
+            utils.logger.info(f"[XiaoHongShuClient.get_all_notes_by_creator] got user_id:{user_id} notes len : {len(notes)}")
                f"[XiaoHongShuClient.get_all_notes_by_creator] got user_id:{user_id} notes len : {len(notes)}"
            )
            remaining = config.CRAWLER_MAX_NOTES_COUNT - len(result)
            if remaining <= 0:
@@ -529,9 +510,7 @@ class XiaoHongShuClient(AbstractApiClient):
            result.extend(notes_to_add)
            await asyncio.sleep(crawl_interval)
-        utils.logger.info(
+        utils.logger.info(f"[XiaoHongShuClient.get_all_notes_by_creator] Finished getting notes for user {user_id}, total: {len(result)}")
            f"[XiaoHongShuClient.get_all_notes_by_creator] Finished getting notes for user {user_id}, total: {len(result)}"
        )
        return result
    async def get_note_short_url(self, note_id: str) -> Dict:
@@ -582,35 +561,20 @@ class XiaoHongShuClient(AbstractApiClient):
                elif isinstance(value, dict):
                    dict_new[new_key] = transform_json_keys(json.dumps(value))
                elif isinstance(value, list):
-                    dict_new[new_key] = [
+                    dict_new[new_key] = [(transform_json_keys(json.dumps(item)) if (item and isinstance(item, dict)) else item) for item in value]
                        (
                            transform_json_keys(json.dumps(item))
                            if (item and isinstance(item, dict))
                            else item
                        )
                        for item in value
                    ]
                else:
                    dict_new[new_key] = value
            return dict_new
-        url = (
+        url = ("https://www.xiaohongshu.com/explore/" + note_id + f"?xsec_token={xsec_token}&xsec_source={xsec_source}")
            "https://www.xiaohongshu.com/explore/"
            + note_id
            + f"?xsec_token={xsec_token}&xsec_source={xsec_source}"
        )
        copy_headers = self.headers.copy()
        if not enable_cookie:
            del copy_headers["Cookie"]
-        html = await self.request(
+        html = await self.request(method="GET", url=url, return_response=True, headers=copy_headers)
            method="GET", url=url, return_response=True, headers=copy_headers
        )
        def get_note_dict(html):
-            state = re.findall(r"window.__INITIAL_STATE__=({.*})</script>", html)[
+            state = re.findall(r"window.__INITIAL_STATE__=({.*})</script>", html)[0].replace("undefined", '""')
                0
            ].replace("undefined", '""')
            if state != "{}":
                note_dict = transform_json_keys(state)
--- a/media_platform/xhs/core.py
+++ b/media_platform/xhs/core.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 import asyncio
 import os
 import random
@@ -57,13 +56,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
    async def start(self) -> None:
        playwright_proxy_format, httpx_proxy_format = None, None
        if config.ENABLE_IP_PROXY:
-            ip_proxy_pool = await create_ip_pool(
+            ip_proxy_pool = await create_ip_pool(config.IP_PROXY_POOL_COUNT, enable_validate_ip=True)
                config.IP_PROXY_POOL_COUNT, enable_validate_ip=True
            )
            ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy()
-            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(
+            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
                ip_proxy_info
            )
        async with async_playwright() as playwright:
            # 根据配置选择启动模式
@@ -101,9 +96,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
                    cookie_str=config.COOKIES,
                )
                await login_obj.begin()
-                await self.xhs_client.update_cookies(
+                await self.xhs_client.update_cookies(browser_context=self.browser_context)
                    browser_context=self.browser_context
                )
            crawler_type_var.set(config.CRAWLER_TYPE)
            if config.CRAWLER_TYPE == "search":
@@ -122,47 +115,33 @@ class XiaoHongShuCrawler(AbstractCrawler):
    async def search(self) -> None:
        """Search for notes and retrieve their comment information."""
-        utils.logger.info(
+        utils.logger.info("[XiaoHongShuCrawler.search] Begin search xiaohongshu keywords")
            "[XiaoHongShuCrawler.search] Begin search xiaohongshu keywords"
        )
        xhs_limit_count = 20  # xhs limit page fixed value
        if config.CRAWLER_MAX_NOTES_COUNT < xhs_limit_count:
            config.CRAWLER_MAX_NOTES_COUNT = xhs_limit_count
        start_page = config.START_PAGE
        for keyword in config.KEYWORDS.split(","):
            source_keyword_var.set(keyword)
-            utils.logger.info(
+            utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}")
                f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}"
            )
            page = 1
            search_id = get_search_id()
-            while (
+            while (page - start_page + 1) * xhs_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                    page - start_page + 1
            ) * xhs_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
                if page < start_page:
                    utils.logger.info(f"[XiaoHongShuCrawler.search] Skip page {page}")
                    page += 1
                    continue
                try:
-                    utils.logger.info(
+                    utils.logger.info(f"[XiaoHongShuCrawler.search] search xhs keyword: {keyword}, page: {page}")
                        f"[XiaoHongShuCrawler.search] search xhs keyword: {keyword}, page: {page}"
                    )
                    note_ids: List[str] = []
                    xsec_tokens: List[str] = []
                    notes_res = await self.xhs_client.get_note_by_keyword(
                        keyword=keyword,
                        search_id=search_id,
                        page=page,
-                        sort=(
+                        sort=(SearchSortType(config.SORT_TYPE) if config.SORT_TYPE != "" else SearchSortType.GENERAL),
                            SearchSortType(config.SORT_TYPE)
                            if config.SORT_TYPE != ""
                            else SearchSortType.GENERAL
                        ),
                    )
                    utils.logger.info(
                        f"[XiaoHongShuCrawler.search] Search notes res:{notes_res}"
                    )
                    utils.logger.info(f"[XiaoHongShuCrawler.search] Search notes res:{notes_res}")
                    if not notes_res or not notes_res.get("has_more", False):
                        utils.logger.info("No more content!")
                        break
@@ -173,9 +152,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
                            xsec_source=post_item.get("xsec_source"),
                            xsec_token=post_item.get("xsec_token"),
                            semaphore=semaphore,
-                        )
+                        ) for post_item in notes_res.get("items", {}) if post_item.get("model_type") not in ("rec_query", "hot_query")
                        for post_item in notes_res.get("items", {})
                        if post_item.get("model_type") not in ("rec_query", "hot_query")
                    ]
                    note_details = await asyncio.gather(*task_list)
                    for note_detail in note_details:
@@ -185,26 +162,18 @@ class XiaoHongShuCrawler(AbstractCrawler):
                            note_ids.append(note_detail.get("note_id"))
                            xsec_tokens.append(note_detail.get("xsec_token"))
                    page += 1
-                    utils.logger.info(
+                    utils.logger.info(f"[XiaoHongShuCrawler.search] Note details: {note_details}")
                        f"[XiaoHongShuCrawler.search] Note details: {note_details}"
                    )
                    await self.batch_get_note_comments(note_ids, xsec_tokens)
                except DataFetchError:
-                    utils.logger.error(
+                    utils.logger.error("[XiaoHongShuCrawler.search] Get note detail error")
                        "[XiaoHongShuCrawler.search] Get note detail error"
                    )
                    break
    async def get_creators_and_notes(self) -> None:
        """Get creator's notes and retrieve their comment information."""
-        utils.logger.info(
+        utils.logger.info("[XiaoHongShuCrawler.get_creators_and_notes] Begin get xiaohongshu creators")
            "[XiaoHongShuCrawler.get_creators_and_notes] Begin get xiaohongshu creators"
        )
        for user_id in config.XHS_CREATOR_ID_LIST:
            # get creator detail info from web html content
-            createor_info: Dict = await self.xhs_client.get_creator_info(
+            createor_info: Dict = await self.xhs_client.get_creator_info(user_id=user_id)
                user_id=user_id
            )
            if createor_info:
                await xhs_store.save_creator(user_id, creator=createor_info)
@@ -238,14 +207,14 @@ class XiaoHongShuCrawler(AbstractCrawler):
                xsec_source=post_item.get("xsec_source"),
                xsec_token=post_item.get("xsec_token"),
                semaphore=semaphore,
-            )
+            ) for post_item in note_list
            for post_item in note_list
        ]
        note_details = await asyncio.gather(*task_list)
        for note_detail in note_details:
            if note_detail:
                await xhs_store.update_xhs_note(note_detail)
                await self.get_notice_media(note_detail)
    async def get_specified_notes(self):
        """
@@ -257,9 +226,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
        get_note_detail_task_list = []
        for full_note_url in config.XHS_SPECIFIED_NOTE_URL_LIST:
            note_url_info: NoteUrlInfo = parse_note_info_from_note_url(full_note_url)
-            utils.logger.info(
+            utils.logger.info(f"[XiaoHongShuCrawler.get_specified_notes] Parse note url info: {note_url_info}")
                f"[XiaoHongShuCrawler.get_specified_notes] Parse note url info: {note_url_info}"
            )
            crawler_task = self.get_note_detail_async_task(
                note_id=note_url_info.note_id,
                xsec_source=note_url_info.xsec_source,
@@ -276,6 +243,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
                need_get_comment_note_ids.append(note_detail.get("note_id", ""))
                xsec_tokens.append(note_detail.get("xsec_token", ""))
                await xhs_store.update_xhs_note(note_detail)
                await self.get_notice_media(note_detail)
        await self.batch_get_note_comments(need_get_comment_note_ids, xsec_tokens)
    async def get_note_detail_async_task(
@@ -299,72 +267,49 @@ class XiaoHongShuCrawler(AbstractCrawler):
        note_detail = None
        async with semaphore:
            try:
-                utils.logger.info(
+                utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
                    f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}"
                )
                try:
-                    note_detail = await self.xhs_client.get_note_by_id(
+                    note_detail = await self.xhs_client.get_note_by_id(note_id, xsec_source, xsec_token)
                        note_id, xsec_source, xsec_token
                    )
                except RetryError as e:
                    pass
                if not note_detail:
-                    note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token,
+                    note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=False)
                                                                                 enable_cookie=False)
                    if not note_detail:
                        raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
-                note_detail.update(
+                note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source})
                    {"xsec_token": xsec_token, "xsec_source": xsec_source}
                )
                return note_detail
            except DataFetchError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[XiaoHongShuCrawler.get_note_detail_async_task] Get note detail error: {ex}")
                    f"[XiaoHongShuCrawler.get_note_detail_async_task] Get note detail error: {ex}"
                )
                return None
            except KeyError as ex:
-                utils.logger.error(
+                utils.logger.error(f"[XiaoHongShuCrawler.get_note_detail_async_task] have not fund note detail note_id:{note_id}, err: {ex}")
                    f"[XiaoHongShuCrawler.get_note_detail_async_task] have not fund note detail note_id:{note_id}, err: {ex}"
                )
                return None
-    async def batch_get_note_comments(
+    async def batch_get_note_comments(self, note_list: List[str], xsec_tokens: List[str]):
            self, note_list: List[str], xsec_tokens: List[str]
    ):
        """Batch get note comments"""
        if not config.ENABLE_GET_COMMENTS:
-            utils.logger.info(
+            utils.logger.info(f"[XiaoHongShuCrawler.batch_get_note_comments] Crawling comment mode is not enabled")
                f"[XiaoHongShuCrawler.batch_get_note_comments] Crawling comment mode is not enabled"
            )
            return
-        utils.logger.info(
+        utils.logger.info(f"[XiaoHongShuCrawler.batch_get_note_comments] Begin batch get note comments, note list: {note_list}")
            f"[XiaoHongShuCrawler.batch_get_note_comments] Begin batch get note comments, note list: {note_list}"
        )
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
        task_list: List[Task] = []
        for index, note_id in enumerate(note_list):
            task = asyncio.create_task(
-                self.get_comments(
+                self.get_comments(note_id=note_id, xsec_token=xsec_tokens[index], semaphore=semaphore),
                    note_id=note_id, xsec_token=xsec_tokens[index], semaphore=semaphore
                ),
                name=note_id,
            )
            task_list.append(task)
        await asyncio.gather(*task_list)
-    async def get_comments(
+    async def get_comments(self, note_id: str, xsec_token: str, semaphore: asyncio.Semaphore):
            self, note_id: str, xsec_token: str, semaphore: asyncio.Semaphore
    ):
        """Get note comments with keyword filtering and quantity limitation"""
        async with semaphore:
-            utils.logger.info(
+            utils.logger.info(f"[XiaoHongShuCrawler.get_comments] Begin get note id comments {note_id}")
                f"[XiaoHongShuCrawler.get_comments] Begin get note id comments {note_id}"
            )
            # When proxy is not enabled, increase the crawling interval
            if config.ENABLE_IP_PROXY:
                crawl_interval = random.random()
@@ -380,12 +325,8 @@ class XiaoHongShuCrawler(AbstractCrawler):
    async def create_xhs_client(self, httpx_proxy: Optional[str]) -> XiaoHongShuClient:
        """Create xhs client"""
-        utils.logger.info(
+        utils.logger.info("[XiaoHongShuCrawler.create_xhs_client] Begin create xiaohongshu API client ...")
-            "[XiaoHongShuCrawler.create_xhs_client] Begin create xiaohongshu API client ..."
+        cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies())
        )
        cookie_str, cookie_dict = utils.convert_cookies(
            await self.browser_context.cookies()
        )
        xhs_client_obj = XiaoHongShuClient(
            proxies=httpx_proxy,
            headers={
@@ -419,29 +360,26 @@ class XiaoHongShuCrawler(AbstractCrawler):
        headless: bool = True,
    ) -> BrowserContext:
        """Launch browser and create browser context"""
-        utils.logger.info(
+        utils.logger.info("[XiaoHongShuCrawler.launch_browser] Begin create browser context ...")
            "[XiaoHongShuCrawler.launch_browser] Begin create browser context ..."
        )
        if config.SAVE_LOGIN_STATE:
            # feat issue #14
            # we will save login state to avoid login every time
-            user_data_dir = os.path.join(
+            user_data_dir = os.path.join(os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM)  # type: ignore
                os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM
            )  # type: ignore
            browser_context = await chromium.launch_persistent_context(
                user_data_dir=user_data_dir,
                accept_downloads=True,
                headless=headless,
                proxy=playwright_proxy,  # type: ignore
-                viewport={"width": 1920, "height": 1080},
+                viewport={
                    "width": 1920,
                    "height": 1080
                },
                user_agent=user_agent,
            )
            return browser_context
        else:
            browser = await chromium.launch(headless=headless, proxy=playwright_proxy)  # type: ignore
-            browser_context = await browser.new_context(
+            browser_context = await browser.new_context(viewport={"width": 1920, "height": 1080}, user_agent=user_agent)
                viewport={"width": 1920, "height": 1080}, user_agent=user_agent
            )
            return browser_context
    async def launch_browser_with_cdp(
@@ -470,14 +408,10 @@ class XiaoHongShuCrawler(AbstractCrawler):
            return browser_context
        except Exception as e:
-            utils.logger.error(
+            utils.logger.error(f"[XiaoHongShuCrawler] CDP模式启动失败，回退到标准模式: {e}")
                f"[XiaoHongShuCrawler] CDP模式启动失败，回退到标准模式: {e}"
            )
            # 回退到标准模式
            chromium = playwright.chromium
-            return await self.launch_browser(
+            return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)
                chromium, playwright_proxy, user_agent, headless
            )
    async def close(self):
        """Close browser context"""
@@ -490,10 +424,8 @@ class XiaoHongShuCrawler(AbstractCrawler):
        utils.logger.info("[XiaoHongShuCrawler.close] Browser context closed ...")
    async def get_notice_media(self, note_detail: Dict):
-        if not config.ENABLE_GET_IMAGES:
+        if not config.ENABLE_GET_MEIDAS:
-            utils.logger.info(
+            utils.logger.info(f"[XiaoHongShuCrawler.get_notice_media] Crawling image mode is not enabled")
                f"[XiaoHongShuCrawler.get_notice_media] Crawling image mode is not enabled"
            )
            return
        await self.get_note_images(note_detail)
        await self.get_notice_video(note_detail)
@@ -504,7 +436,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
        :param note_item:
        :return:
        """
-        if not config.ENABLE_GET_IMAGES:
+        if not config.ENABLE_GET_MEIDAS:
            return
        note_id = note_item.get("note_id")
        image_list: List[Dict] = note_item.get("image_list", [])
@@ -529,11 +461,11 @@ class XiaoHongShuCrawler(AbstractCrawler):
    async def get_notice_video(self, note_item: Dict):
        """
-        get note images. please use get_notice_media
+        get note videos. please use get_notice_media
        :param note_item:
        :return:
        """
-        if not config.ENABLE_GET_IMAGES:
+        if not config.ENABLE_GET_MEIDAS:
            return
        note_id = note_item.get("note_id")
@@ -548,4 +480,4 @@ class XiaoHongShuCrawler(AbstractCrawler):
                continue
            extension_file_name = f"{videoNum}.mp4"
            videoNum += 1
-            await xhs_store.update_xhs_note_image(note_id, content, extension_file_name)
+            await xhs_store.update_xhs_note_video(note_id, content, extension_file_name)
--- a/store/bilibili/init.py
+++ b/store/bilibili/init.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2024/1/14 19:34
@@ -20,7 +19,7 @@ import config
 from var import source_keyword_var
 from .bilibili_store_impl import *
-from .bilibilli_store_video import *
+from .bilibilli_store_media import *
 class BiliStoreFactory:
@@ -35,9 +34,7 @@ class BiliStoreFactory:
    def create_store() -> AbstractStore:
        store_class = BiliStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
-            raise ValueError(
+            raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite ...")
                "[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite ..."
            )
        return store_class()
@@ -68,9 +65,7 @@ async def update_bilibili_video(video_item: Dict):
        "video_cover_url": video_item_view.get("pic", ""),
        "source_keyword": source_keyword_var.get(),
    }
-    utils.logger.info(
+    utils.logger.info(f"[store.bilibili.update_bilibili_video] bilibili video id:{video_id}, title:{save_content_item.get('title')}")
        f"[store.bilibili.update_bilibili_video] bilibili video id:{video_id}, title:{save_content_item.get('title')}"
    )
    await BiliStoreFactory.create_store().store_content(content_item=save_content_item)
@@ -89,9 +84,7 @@ async def update_up_info(video_item: Dict):
        "user_rank": video_item_card.get("level_info").get("current_level"),
        "is_official": video_item_card.get("official_verify").get("type"),
    }
-    utils.logger.info(
+    utils.logger.info(f"[store.bilibili.update_up_info] bilibili user_id:{video_item_card.get('mid')}")
        f"[store.bilibili.update_up_info] bilibili user_id:{video_item_card.get('mid')}"
    )
    await BiliStoreFactory.create_store().store_creator(creator=saver_up_info)
@@ -123,9 +116,7 @@ async def update_bilibili_video_comment(video_id: str, comment_item: Dict):
        "like_count": like_count,
        "last_modify_ts": utils.get_current_timestamp(),
    }
-    utils.logger.info(
+    utils.logger.info(f"[store.bilibili.update_bilibili_video_comment] Bilibili video comment: {comment_id}, content: {save_comment_item.get('content')}")
        f"[store.bilibili.update_bilibili_video_comment] Bilibili video comment: {comment_id}, content: {save_comment_item.get('content')}"
    )
    await BiliStoreFactory.create_store().store_comment(comment_item=save_comment_item)
@@ -137,13 +128,11 @@ async def store_video(aid, video_content, extension_file_name):
        video_content:
        extension_file_name:
    """
-    await BilibiliVideo().store_video(
+    await BilibiliVideo().store_video({
        {
        "aid": aid,
        "video_content": video_content,
        "extension_file_name": extension_file_name,
-        }
+    })
    )
 async def batch_update_bilibili_creator_fans(creator_info: Dict, fans_list: List[Dict]):
@@ -156,14 +145,10 @@ async def batch_update_bilibili_creator_fans(creator_info: Dict, fans_list: List
            "sign": fan_item.get("sign"),
            "avatar": fan_item.get("face"),
        }
-        await update_bilibili_creator_contact(
+        await update_bilibili_creator_contact(creator_info=creator_info, fan_info=fan_info)
            creator_info=creator_info, fan_info=fan_info
        )
-async def batch_update_bilibili_creator_followings(
+async def batch_update_bilibili_creator_followings(creator_info: Dict, followings_list: List[Dict]):
    creator_info: Dict, followings_list: List[Dict]
 ):
    if not followings_list:
        return
    for following_item in followings_list:
@@ -173,14 +158,10 @@ async def batch_update_bilibili_creator_followings(
            "sign": following_item.get("sign"),
            "avatar": following_item.get("face"),
        }
-        await update_bilibili_creator_contact(
+        await update_bilibili_creator_contact(creator_info=following_info, fan_info=creator_info)
            creator_info=following_info, fan_info=creator_info
        )
-async def batch_update_bilibili_creator_dynamics(
+async def batch_update_bilibili_creator_dynamics(creator_info: Dict, dynamics_list: List[Dict]):
    creator_info: Dict, dynamics_list: List[Dict]
 ):
    if not dynamics_list:
        return
    for dynamic_item in dynamics_list:
@@ -203,9 +184,7 @@ async def batch_update_bilibili_creator_dynamics(
            "total_forwards": dynamic_forward,
            "total_liked": dynamic_like,
        }
-        await update_bilibili_creator_dynamic(
+        await update_bilibili_creator_dynamic(creator_info=creator_info, dynamic_info=dynamic_info)
            creator_info=creator_info, dynamic_info=dynamic_info
        )
 async def update_bilibili_creator_contact(creator_info: Dict, fan_info: Dict):
--- a/store/bilibili/bilibilli_store_media.py
+++ b/store/bilibili/bilibilli_store_media.py
@@ -8,40 +8,42 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : helloteemo
 # @Time    : 2024/7/12 20:01
-# @Desc    : bilibili图片保存
+# @Desc    : bilibili 媒体保存
 import pathlib
 from typing import Dict
 import aiofiles
-from base.base_crawler import AbstractStoreImage
+from base.base_crawler import AbstractStoreImage, AbstractStoreVideo
 from tools import utils
-class BilibiliVideo(AbstractStoreImage):
+class BilibiliVideo(AbstractStoreVideo):
    video_store_path: str = "data/bilibili/videos"
    async def store_video(self, video_content_item: Dict):
        """
        store content
        Args:
-            content_item:
+            video_content_item:
        Returns:
        """
-        await self.save_video(video_content_item.get("aid"), video_content_item.get("video_content"),
+        await self.save_video(video_content_item.get("aid"), video_content_item.get("video_content"), video_content_item.get("extension_file_name"))
                              video_content_item.get("extension_file_name"))
    def make_save_file_name(self, aid: str, extension_file_name: str) -> str:
        """
        make save file name by store type
        Args:
            aid: aid
            extension_file_name: video filename with extension
        Returns:
        """
@@ -50,9 +52,11 @@ class BilibiliVideo(AbstractStoreImage):
    async def save_video(self, aid: int, video_content: str, extension_file_name="mp4"):
        """
        save video to local
        Args:
            aid: aid
            video_content: video content
            extension_file_name: video filename with extension
        Returns:
--- a/store/douyin/init.py
+++ b/store/douyin/init.py
@@ -18,6 +18,7 @@ import config
 from var import source_keyword_var
 from .douyin_store_impl import *
 from .douyin_store_media import *
 class DouyinStoreFactory:
@@ -233,3 +234,33 @@ async def save_creator(user_id: str, creator: Dict):
    }
    utils.logger.info(f"[store.douyin.save_creator] creator:{local_db_item}")
    await DouyinStoreFactory.create_store().store_creator(local_db_item)
 async def update_dy_aweme_image(aweme_id, pic_content, extension_file_name):
    """
    更新抖音笔记图片
    Args:
        aweme_id:
        pic_content:
        extension_file_name:
    Returns:
    """
    await DouYinImage().store_image({"aweme_id": aweme_id, "pic_content": pic_content, "extension_file_name": extension_file_name})
 async def update_dy_aweme_video(aweme_id, video_content, extension_file_name):
    """
    更新抖音短视频
    Args:
        aweme_id:
        video_content:
        extension_file_name:
    Returns:
    """
    await DouYinVideo().store_video({"aweme_id": aweme_id, "video_content": video_content, "extension_file_name": extension_file_name})
--- a/store/douyin/douyin_store_media.py
+++ b/store/douyin/douyin_store_media.py
@@ -0,0 +1,111 @@
 # 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
 # 1. 不得用于任何商业用途。
 # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
 # 3. 不得进行大规模爬取或对平台造成运营干扰。
 # 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
 # 5. 不得用于任何非法或不当的用途。
 #
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 import pathlib
 from typing import Dict
 import aiofiles
 from base.base_crawler import AbstractStoreImage, AbstractStoreVideo
 from tools import utils
 class DouYinImage(AbstractStoreImage):
    image_store_path: str = "data/douyin/images"
    async def store_image(self, image_content_item: Dict):
        """
        store content
        Args:
            image_content_item:
        Returns:
        """
        await self.save_image(image_content_item.get("aweme_id"), image_content_item.get("pic_content"), image_content_item.get("extension_file_name"))
    def make_save_file_name(self, aweme_id: str, extension_file_name: str) -> str:
        """
        make save file name by store type
        Args:
            aweme_id: aweme id
            extension_file_name: image filename with extension
        Returns:
        """
        return f"{self.image_store_path}/{aweme_id}/{extension_file_name}"
    async def save_image(self, aweme_id: str, pic_content: str, extension_file_name):
        """
        save image to local
        Args:
            aweme_id: aweme id
            pic_content: image content
            extension_file_name: image filename with extension
        Returns:
        """
        pathlib.Path(self.image_store_path + "/" + aweme_id).mkdir(parents=True, exist_ok=True)
        save_file_name = self.make_save_file_name(aweme_id, extension_file_name)
        async with aiofiles.open(save_file_name, 'wb') as f:
            await f.write(pic_content)
            utils.logger.info(f"[DouYinImageStoreImplement.save_image] save image {save_file_name} success ...")
 class DouYinVideo(AbstractStoreVideo):
    video_store_path: str = "data/douyin/videos"
    async def store_video(self, video_content_item: Dict):
        """
        store content
        Args:
            video_content_item:
        Returns:
        """
        await self.save_video(video_content_item.get("aweme_id"), video_content_item.get("video_content"), video_content_item.get("extension_file_name"))
    def make_save_file_name(self, aweme_id: str, extension_file_name: str) -> str:
        """
        make save file name by store type
        Args:
            aweme_id: aweme id
            extension_file_name: video filename with extension
        Returns:
        """
        return f"{self.video_store_path}/{aweme_id}/{extension_file_name}"
    async def save_video(self, aweme_id: str, video_content: str, extension_file_name):
        """
        save video to local
        Args:
            aweme_id: aweme id
            video_content: video content
            extension_file_name: video filename with extension
        Returns:
        """
        pathlib.Path(self.video_store_path + "/" + aweme_id).mkdir(parents=True, exist_ok=True)
        save_file_name = self.make_save_file_name(aweme_id, extension_file_name)
        async with aiofiles.open(save_file_name, 'wb') as f:
            await f.write(video_content)
            utils.logger.info(f"[DouYinVideoStoreImplement.save_video] save video {save_file_name} success ...")
--- a/store/weibo/init.py
+++ b/store/weibo/init.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2024/1/14 21:34
@@ -19,7 +18,7 @@ from typing import List
 from var import source_keyword_var
-from .weibo_store_image import *
+from .weibo_store_media import *
 from .weibo_store_impl import *
@@ -35,8 +34,7 @@ class WeibostoreFactory:
    def create_store() -> AbstractStore:
        store_class = WeibostoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
-            raise ValueError(
+            raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite ...")
                "[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite ...")
        return store_class()
@@ -91,11 +89,9 @@ async def update_weibo_note(note_item: Dict):
        "gender": user_info.get("gender", ""),
        "profile_url": user_info.get("profile_url", ""),
        "avatar": user_info.get("profile_image_url", ""),
        "source_keyword": source_keyword_var.get(),
    }
-    utils.logger.info(
+    utils.logger.info(f"[store.weibo.update_weibo_note] weibo note id:{note_id}, title:{save_content_item.get('content')[:24]} ...")
        f"[store.weibo.update_weibo_note] weibo note id:{note_id}, title:{save_content_item.get('content')[:24]} ...")
    await WeibostoreFactory.create_store().store_content(content_item=save_content_item)
@@ -150,8 +146,7 @@ async def update_weibo_note_comment(note_id: str, comment_item: Dict):
        "profile_url": user_info.get("profile_url", ""),
        "avatar": user_info.get("profile_image_url", ""),
    }
-    utils.logger.info(
+    utils.logger.info(f"[store.weibo.update_weibo_note_comment] Weibo note comment: {comment_id}, content: {save_comment_item.get('content', '')[:24]} ...")
        f"[store.weibo.update_weibo_note_comment] Weibo note comment: {comment_id}, content: {save_comment_item.get('content', '')[:24]} ...")
    await WeibostoreFactory.create_store().store_comment(comment_item=save_comment_item)
@@ -166,8 +161,7 @@ async def update_weibo_note_image(picid: str, pic_content, extension_file_name):
    Returns:
    """
-    await WeiboStoreImage().store_image(
+    await WeiboStoreImage().store_image({"pic_id": picid, "pic_content": pic_content, "extension_file_name": extension_file_name})
        {"pic_id": picid, "pic_content": pic_content, "extension_file_name": extension_file_name})
 async def save_creator(user_id: str, user_info: Dict):
--- a/store/weibo/weibo_store_media.py
+++ b/store/weibo/weibo_store_media.py
@@ -8,17 +8,16 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : Erm
 # @Time    : 2024/4/9 17:35
-# @Desc    : 微博保存图片类
+# @Desc    : 微博媒体保存
 import pathlib
 from typing import Dict
 import aiofiles
-from base.base_crawler import AbstractStoreImage
+from base.base_crawler import AbstractStoreImage, AbstractStoreVideo
 from tools import utils
@@ -28,8 +27,9 @@ class WeiboStoreImage(AbstractStoreImage):
    async def store_image(self, image_content_item: Dict):
        """
        store content
        Args:
-            content_item:
+            image_content_item:
        Returns:
@@ -39,8 +39,10 @@ class WeiboStoreImage(AbstractStoreImage):
    def make_save_file_name(self, picid: str, extension_file_name: str) -> str:
        """
        make save file name by store type
        Args:
            picid: image id
            extension_file_name: video filename with extension
        Returns:
@@ -50,9 +52,11 @@ class WeiboStoreImage(AbstractStoreImage):
    async def save_image(self, picid: str, pic_content: str, extension_file_name="jpg"):
        """
        save image to local
        Args:
            picid: image id
            pic_content: image content
            extension_file_name: image filename with extension
        Returns:
--- a/store/xhs/init.py
+++ b/store/xhs/init.py
@@ -8,7 +8,6 @@
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2024/1/14 17:34
@@ -19,7 +18,7 @@ import config
 from var import source_keyword_var
 from . import xhs_store_impl
-from .xhs_store_image import *
+from .xhs_store_media import *
 from .xhs_store_impl import *
@@ -28,7 +27,7 @@ class XhsStoreFactory:
        "csv": XhsCsvStoreImplement,
        "db": XhsDbStoreImplement,
        "json": XhsJsonStoreImplement,
-        "sqlite": XhsSqliteStoreImplement
+        "sqlite": XhsSqliteStoreImplement,
    }
    @staticmethod
@@ -204,8 +203,8 @@ async def save_creator(user_id: str, creator: Dict):
        'follows': follows,  # 关注数
        'fans': fans,  # 粉丝数
        'interaction': interaction,  # 互动数
-        'tag_list': json.dumps({tag.get('tagType'): tag.get('name') for tag in creator.get('tags')},
+        'tag_list': json.dumps({tag.get('tagType'): tag.get('name')
-                               ensure_ascii=False), # 标签
+                                for tag in creator.get('tags')}, ensure_ascii=False),  # 标签
        "last_modify_ts": utils.get_current_timestamp(),  # 最后更新时间戳（MediaCrawler程序生成的，主要用途在db存储的时候记录一条记录最新更新时间）
    }
    utils.logger.info(f"[store.xhs.save_creator] creator:{local_db_item}")
@@ -214,7 +213,7 @@ async def save_creator(user_id: str, creator: Dict):
 async def update_xhs_note_image(note_id, pic_content, extension_file_name):
    """
-    更新小红书笔
+    更新小红书笔记图片
    Args:
        note_id:
        pic_content:
@@ -224,5 +223,19 @@ async def update_xhs_note_image(note_id, pic_content, extension_file_name):
    """
-    await XiaoHongShuImage().store_image(
+    await XiaoHongShuImage().store_image({"notice_id": note_id, "pic_content": pic_content, "extension_file_name": extension_file_name})
-        {"notice_id": note_id, "pic_content": pic_content, "extension_file_name": extension_file_name})
+
 async def update_xhs_note_video(note_id, video_content, extension_file_name):
    """
    更新小红书笔记视频
    Args:
        note_id:
        video_content:
        extension_file_name:
    Returns:
    """
    await XiaoHongShuVideo().store_video({"notice_id": note_id, "video_content": video_content, "extension_file_name": extension_file_name})
--- a/store/xhs/xhs_store_image.py
+++ b/store/xhs/xhs_store_image.py
@@ -1,66 +0,0 @@
 # 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：  
 # 1. 不得用于任何商业用途。  
 # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。  
 # 3. 不得进行大规模爬取或对平台造成运营干扰。  
 # 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。   
 # 5. 不得用于任何非法或不当的用途。
 #   
 # 详细许可条款请参阅项目根目录下的LICENSE文件。  
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。  
 # -*- coding: utf-8 -*-
 # @Author  : helloteemo
 # @Time    : 2024/7/11 22:35
 # @Desc    : 小红书图片保存
 import pathlib
 from typing import Dict
 import aiofiles
 from base.base_crawler import AbstractStoreImage
 from tools import utils
 class XiaoHongShuImage(AbstractStoreImage):
    image_store_path: str = "data/xhs/images"
    async def store_image(self, image_content_item: Dict):
        """
        store content
        Args:
            content_item:
        Returns:
        """
        await self.save_image(image_content_item.get("notice_id"), image_content_item.get("pic_content"),
                              image_content_item.get("extension_file_name"))
    def make_save_file_name(self, notice_id: str, extension_file_name: str) -> str:
        """
        make save file name by store type
        Args:
            notice_id: notice id
            picid: image id
        Returns:
        """
        return f"{self.image_store_path}/{notice_id}/{extension_file_name}"
    async def save_image(self, notice_id: str, pic_content: str, extension_file_name="jpg"):
        """
        save image to local
        Args:
            notice_id: notice id
            pic_content: image content
        Returns:
        """
        pathlib.Path(self.image_store_path + "/" + notice_id).mkdir(parents=True, exist_ok=True)
        save_file_name = self.make_save_file_name(notice_id, extension_file_name)
        async with aiofiles.open(save_file_name, 'wb') as f:
            await f.write(pic_content)
            utils.logger.info(f"[XiaoHongShuImageStoreImplement.save_image] save image {save_file_name} success ...")
--- a/store/xhs/xhs_store_media.py
+++ b/store/xhs/xhs_store_media.py
@@ -0,0 +1,115 @@
 # 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
 # 1. 不得用于任何商业用途。
 # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
 # 3. 不得进行大规模爬取或对平台造成运营干扰。
 # 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
 # 5. 不得用于任何非法或不当的用途。
 #
 # 详细许可条款请参阅项目根目录下的LICENSE文件。
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
 # -*- coding: utf-8 -*-
 # @Author  : helloteemo
 # @Time    : 2024/7/11 22:35
 # @Desc    : 小红书媒体保存
 import pathlib
 from typing import Dict
 import aiofiles
 from base.base_crawler import AbstractStoreImage, AbstractStoreVideo
 from tools import utils
 class XiaoHongShuImage(AbstractStoreImage):
    image_store_path: str = "data/xhs/images"
    async def store_image(self, image_content_item: Dict):
        """
        store content
        Args:
            image_content_item:
        Returns:
        """
        await self.save_image(image_content_item.get("notice_id"), image_content_item.get("pic_content"), image_content_item.get("extension_file_name"))
    def make_save_file_name(self, notice_id: str, extension_file_name: str) -> str:
        """
        make save file name by store type
        Args:
            notice_id: notice id
            extension_file_name: image filename with extension
        Returns:
        """
        return f"{self.image_store_path}/{notice_id}/{extension_file_name}"
    async def save_image(self, notice_id: str, pic_content: str, extension_file_name):
        """
        save image to local
        Args:
            notice_id: notice id
            pic_content: image content
            extension_file_name: image filename with extension
        Returns:
        """
        pathlib.Path(self.image_store_path + "/" + notice_id).mkdir(parents=True, exist_ok=True)
        save_file_name = self.make_save_file_name(notice_id, extension_file_name)
        async with aiofiles.open(save_file_name, 'wb') as f:
            await f.write(pic_content)
            utils.logger.info(f"[XiaoHongShuImageStoreImplement.save_image] save image {save_file_name} success ...")
 class XiaoHongShuVideo(AbstractStoreVideo):
    video_store_path: str = "data/xhs/videos"
    async def store_video(self, video_content_item: Dict):
        """
        store content
        Args:
            video_content_item:
        Returns:
        """
        await self.save_video(video_content_item.get("notice_id"), video_content_item.get("video_content"), video_content_item.get("extension_file_name"))
    def make_save_file_name(self, notice_id: str, extension_file_name: str) -> str:
        """
        make save file name by store type
        Args:
            notice_id: notice id
            extension_file_name: video filename with extension
        Returns:
        """
        return f"{self.video_store_path}/{notice_id}/{extension_file_name}"
    async def save_video(self, notice_id: str, video_content: str, extension_file_name):
        """
        save video to local
        Args:
            notice_id: notice id
            video_content: video content
            extension_file_name: video filename with extension
        Returns:
        """
        pathlib.Path(self.video_store_path + "/" + notice_id).mkdir(parents=True, exist_ok=True)
        save_file_name = self.make_save_file_name(notice_id, extension_file_name)
        async with aiofiles.open(save_file_name, 'wb') as f:
            await f.write(video_content)
            utils.logger.info(f"[XiaoHongShuVideoStoreImplement.save_video] save video {save_file_name} success ...")