feat: Add daily limit for video/post crawling in Bilibili and base config

This commit is contained in:
gaoxiaobei
2025-07-12 14:50:59 +08:00
parent ec0d29cf0f
commit cad9fc7af8
2 changed files with 8 additions and 0 deletions

View File

@@ -86,6 +86,9 @@ START_PAGE = 1
# 爬取视频/帖子的数量控制 # 爬取视频/帖子的数量控制
CRAWLER_MAX_NOTES_COUNT = 200 CRAWLER_MAX_NOTES_COUNT = 200
# 每天爬取视频/帖子的数量控制
MAX_NOTES_PER_DAY = 20
# 并发爬虫数量控制 # 并发爬虫数量控制
MAX_CONCURRENCY_NUM = 1 MAX_CONCURRENCY_NUM = 1

View File

@@ -196,10 +196,14 @@ class BilibiliCrawler(AbstractCrawler):
# 按照每一天进行爬取的时间戳参数 # 按照每一天进行爬取的时间戳参数
pubtime_begin_s, pubtime_end_s = await self.get_pubtime_datetime(start=day.strftime('%Y-%m-%d'), end=day.strftime('%Y-%m-%d')) pubtime_begin_s, pubtime_end_s = await self.get_pubtime_datetime(start=day.strftime('%Y-%m-%d'), end=day.strftime('%Y-%m-%d'))
page = 1 page = 1
notes_count_this_day = 0
#!该段 while 语句在发生异常时(通常情况下为当天数据为空时)会自动跳转到下一天,以实现最大程度爬取该关键词下当天的所有视频 #!该段 while 语句在发生异常时(通常情况下为当天数据为空时)会自动跳转到下一天,以实现最大程度爬取该关键词下当天的所有视频
#!除了仅保留现在原有的 try, except Exception 语句外,不要再添加其他的异常处理!!!否则将使该段代码失效,使其仅能爬取当天一天数据而无法跳转到下一天 #!除了仅保留现在原有的 try, except Exception 语句外,不要再添加其他的异常处理!!!否则将使该段代码失效,使其仅能爬取当天一天数据而无法跳转到下一天
#!除非将该段代码的逻辑进行重构以实现相同的功能,否则不要进行修改!!! #!除非将该段代码的逻辑进行重构以实现相同的功能,否则不要进行修改!!!
while (page - start_page + 1) * bili_limit_count <= config.CRAWLER_MAX_NOTES_COUNT: while (page - start_page + 1) * bili_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
if notes_count_this_day >= config.MAX_NOTES_PER_DAY:
utils.logger.info(f"[BilibiliCrawler.search] Reached the maximum number of notes for today {day.ctime()}.")
break
#! Catch any error if response return nothing, go to next day #! Catch any error if response return nothing, go to next day
try: try:
#! Don't skip any page, to make sure gather all video in one day #! Don't skip any page, to make sure gather all video in one day
@@ -225,6 +229,7 @@ class BilibiliCrawler(AbstractCrawler):
video_items = await asyncio.gather(*task_list) video_items = await asyncio.gather(*task_list)
for video_item in video_items: for video_item in video_items:
if video_item: if video_item:
notes_count_this_day += 1
video_id_list.append(video_item.get("View").get("aid")) video_id_list.append(video_item.get("View").get("aid"))
await bilibili_store.update_bilibili_video(video_item) await bilibili_store.update_bilibili_video(video_item)
await bilibili_store.update_up_info(video_item) await bilibili_store.update_up_info(video_item)