2025-11-18 12:24:02 +08:00
# -*- coding: utf-8 -*-
# Copyright (c) 2025 relakkes@gmail.com
#
# This file is part of MediaCrawler project.
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/media_platform/bilibili/client.py
# GitHub: https://github.com/NanmiCoder
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
#
2025-07-30 21:19:56 +08:00
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
2024-10-20 00:43:25 +08:00
# 5. 不得用于任何非法或不当的用途。
2025-07-30 21:19:56 +08:00
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
2024-10-20 00:43:25 +08:00
2023-12-03 00:30:10 +08:00
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 18:44
# @Desc : bilibili 请求客户端
import asyncio
import json
2025-07-17 06:40:56 +08:00
import random
2023-12-23 01:04:08 +08:00
from typing import Any , Callable , Dict , List , Optional , Tuple , Union
2023-12-03 00:30:10 +08:00
from urllib . parse import urlencode
import httpx
from playwright . async_api import BrowserContext , Page
2025-05-19 19:57:36 +08:00
import config
2024-04-13 20:18:04 +08:00
from base . base_crawler import AbstractApiClient
2023-12-03 00:30:10 +08:00
from tools import utils
from . exception import DataFetchError
2023-12-06 23:49:56 +08:00
from . field import CommentOrderType , SearchOrderType
from . help import BilibiliSign
2023-12-03 00:30:10 +08:00
2024-04-13 20:18:04 +08:00
class BilibiliClient ( AbstractApiClient ) :
2025-07-30 21:19:56 +08:00
2023-12-03 00:30:10 +08:00
def __init__ (
2025-07-30 21:19:56 +08:00
self ,
timeout = 60 , # 若开启爬取媒体选项, b 站的长视频需要更久的超时时间
2025-07-31 22:48:02 +08:00
proxy = None ,
2025-07-30 21:19:56 +08:00
* ,
headers : Dict [ str , str ] ,
playwright_page : Page ,
cookie_dict : Dict [ str , str ] ,
2023-12-03 00:30:10 +08:00
) :
2025-07-31 22:48:02 +08:00
self . proxy = proxy
2023-12-03 00:30:10 +08:00
self . timeout = timeout
self . headers = headers
self . _host = " https://api.bilibili.com "
self . playwright_page = playwright_page
self . cookie_dict = cookie_dict
async def request ( self , method , url , * * kwargs ) - > Any :
2025-07-31 22:48:02 +08:00
async with httpx . AsyncClient ( proxy = self . proxy ) as client :
2025-07-30 21:19:56 +08:00
response = await client . request ( method , url , timeout = self . timeout , * * kwargs )
2025-07-13 10:42:15 +08:00
try :
data : Dict = response . json ( )
except json . JSONDecodeError :
utils . logger . error ( f " [BilibiliClient.request] Failed to decode JSON from response. status_code: { response . status_code } , response_text: { response . text } " )
raise DataFetchError ( f " Failed to decode JSON, content: { response . text } " )
2023-12-03 00:30:10 +08:00
if data . get ( " code " ) != 0 :
raise DataFetchError ( data . get ( " message " , " unkonw error " ) )
else :
return data . get ( " data " , { } )
async def pre_request_data ( self , req_data : Dict ) - > Dict :
"""
发送请求进行请求参数签名
需要从 localStorage 拿 wbi_img_urls 这参数 , 值如下 :
https : / / i0 . hdslb . com / bfs / wbi / 7 cd084941338484aae1ad9425b84077c . png - https : / / i0 . hdslb . com / bfs / wbi / 4932 caff0ff746eab6f01bf08b70ac45 . png
: param req_data :
: return :
"""
2023-12-05 22:47:36 +08:00
if not req_data :
return { }
2023-12-03 23:19:02 +08:00
img_key , sub_key = await self . get_wbi_keys ( )
2023-12-03 00:30:10 +08:00
return BilibiliSign ( img_key , sub_key ) . sign ( req_data )
2023-12-05 23:32:35 +08:00
async def get_wbi_keys ( self ) - > Tuple [ str , str ] :
2023-12-03 00:30:10 +08:00
"""
获取最新的 img_key 和 sub_key
: return :
"""
local_storage = await self . playwright_page . evaluate ( " () => window.localStorage " )
2025-07-17 06:40:56 +08:00
wbi_img_urls = local_storage . get ( " wbi_img_urls " , " " )
if not wbi_img_urls :
img_url_from_storage = local_storage . get ( " wbi_img_url " )
sub_url_from_storage = local_storage . get ( " wbi_sub_url " )
if img_url_from_storage and sub_url_from_storage :
wbi_img_urls = f " { img_url_from_storage } - { sub_url_from_storage } "
2023-12-03 23:19:02 +08:00
if wbi_img_urls and " - " in wbi_img_urls :
img_url , sub_url = wbi_img_urls . split ( " - " )
else :
2023-12-03 00:30:10 +08:00
resp = await self . request ( method = " GET " , url = self . _host + " /x/web-interface/nav " )
img_url : str = resp [ ' wbi_img ' ] [ ' img_url ' ]
sub_url : str = resp [ ' wbi_img ' ] [ ' sub_url ' ]
img_key = img_url . rsplit ( ' / ' , 1 ) [ 1 ] . split ( ' . ' ) [ 0 ]
sub_key = sub_url . rsplit ( ' / ' , 1 ) [ 1 ] . split ( ' . ' ) [ 0 ]
return img_key , sub_key
2023-12-09 21:10:01 +08:00
async def get ( self , uri : str , params = None , enable_params_sign : bool = True ) - > Dict :
2023-12-03 00:30:10 +08:00
final_uri = uri
2023-12-09 21:10:01 +08:00
if enable_params_sign :
params = await self . pre_request_data ( params )
2023-12-03 00:30:10 +08:00
if isinstance ( params , dict ) :
final_uri = ( f " { uri } ? "
f " { urlencode ( params ) } " )
return await self . request ( method = " GET " , url = f " { self . _host } { final_uri } " , headers = self . headers )
async def post ( self , uri : str , data : dict ) - > Dict :
2023-12-03 23:19:02 +08:00
data = await self . pre_request_data ( data )
2023-12-03 00:30:10 +08:00
json_str = json . dumps ( data , separators = ( ' , ' , ' : ' ) , ensure_ascii = False )
2025-07-30 21:19:56 +08:00
return await self . request ( method = " POST " , url = f " { self . _host } { uri } " , data = json_str , headers = self . headers )
2023-12-03 00:30:10 +08:00
async def pong ( self ) - > bool :
""" get a note to check if login state is ok """
2023-12-23 01:04:08 +08:00
utils . logger . info ( " [BilibiliClient.pong] Begin pong bilibili... " )
2023-12-03 00:30:10 +08:00
ping_flag = False
try :
2023-12-05 22:47:36 +08:00
check_login_uri = " /x/web-interface/nav "
response = await self . get ( check_login_uri )
if response . get ( " isLogin " ) :
2025-07-30 21:19:56 +08:00
utils . logger . info ( " [BilibiliClient.pong] Use cache login state get web interface successfull! " )
2023-12-05 22:47:36 +08:00
ping_flag = True
2023-12-03 00:30:10 +08:00
except Exception as e :
2025-07-30 21:19:56 +08:00
utils . logger . error ( f " [BilibiliClient.pong] Pong bilibili failed: { e } , and try to login again... " )
2023-12-03 00:30:10 +08:00
ping_flag = False
return ping_flag
async def update_cookies ( self , browser_context : BrowserContext ) :
cookie_str , cookie_dict = utils . convert_cookies ( await browser_context . cookies ( ) )
self . headers [ " Cookie " ] = cookie_str
self . cookie_dict = cookie_dict
2025-07-30 21:19:56 +08:00
async def search_video_by_keyword (
self ,
keyword : str ,
page : int = 1 ,
page_size : int = 20 ,
order : SearchOrderType = SearchOrderType . DEFAULT ,
pubtime_begin_s : int = 0 ,
pubtime_end_s : int = 0 ,
) - > Dict :
2023-12-03 00:30:10 +08:00
"""
KuaiShou web search api
2023-12-03 23:19:02 +08:00
: param keyword : 搜索关键词
: param page : 分页参数具体第几页
: param page_size : 每一页参数的数量
: param order : 搜索结果排序 , 默认位综合排序
2024-10-17 15:11:25 +08:00
: param pubtime_begin_s : 发布时间开始时间戳
: param pubtime_end_s : 发布时间结束时间戳
2023-12-03 00:30:10 +08:00
: return :
"""
2023-12-03 23:19:02 +08:00
uri = " /x/web-interface/wbi/search/type "
2023-12-03 00:30:10 +08:00
post_data = {
2023-12-03 23:19:02 +08:00
" search_type " : " video " ,
" keyword " : keyword ,
" page " : page ,
" page_size " : page_size ,
2024-10-17 15:11:25 +08:00
" order " : order . value ,
2025-01-15 18:21:03 +08:00
" pubtime_begin_s " : pubtime_begin_s ,
" pubtime_end_s " : pubtime_end_s
2023-12-03 00:30:10 +08:00
}
2023-12-03 23:19:02 +08:00
return await self . get ( uri , post_data )
2023-12-03 00:30:10 +08:00
2023-12-23 01:04:08 +08:00
async def get_video_info ( self , aid : Union [ int , None ] = None , bvid : Union [ str , None ] = None ) - > Dict :
2023-12-03 00:30:10 +08:00
"""
2023-12-23 01:04:08 +08:00
Bilibli web video detail api , aid 和 bvid任选一个参数
: param aid : 稿件avid
: param bvid : 稿件bvid
2023-12-03 00:30:10 +08:00
: return :
"""
2023-12-23 01:04:08 +08:00
if not aid and not bvid :
raise ValueError ( " 请提供 aid 或 bvid 中的至少一个参数 " )
2023-12-09 21:10:01 +08:00
uri = " /x/web-interface/view/detail "
2023-12-23 01:04:08 +08:00
params = dict ( )
if aid :
params . update ( { " aid " : aid } )
else :
params . update ( { " bvid " : bvid } )
2023-12-09 21:10:01 +08:00
return await self . get ( uri , params , enable_params_sign = False )
2023-12-03 00:30:10 +08:00
2024-07-12 20:09:16 +08:00
async def get_video_play_url ( self , aid : int , cid : int ) - > Dict :
"""
Bilibli web video play url api
: param aid : 稿件avid
: param cid : cid
: return :
"""
if not aid or not cid or aid < = 0 or cid < = 0 :
raise ValueError ( " aid 和 cid 必须存在 " )
uri = " /x/player/wbi/playurl "
2025-09-24 12:27:16 +08:00
qn_value = getattr ( config , " BILI_QN " , 80 )
2024-07-12 20:09:16 +08:00
params = {
" avid " : aid ,
" cid " : cid ,
2025-09-24 12:27:16 +08:00
" qn " : qn_value ,
2024-07-12 20:09:16 +08:00
" fourk " : 1 ,
" fnval " : 1 ,
" platform " : " pc " ,
}
return await self . get ( uri , params , enable_params_sign = True )
async def get_video_media ( self , url : str ) - > Union [ bytes , None ] :
2025-09-24 12:27:16 +08:00
# Follow CDN 302 redirects and treat any 2xx as success (some endpoints return 206)
async with httpx . AsyncClient ( proxy = self . proxy , follow_redirects = True ) as client :
2025-08-05 13:11:00 +08:00
try :
response = await client . request ( " GET " , url , timeout = self . timeout , headers = self . headers )
response . raise_for_status ( )
2025-09-24 12:27:16 +08:00
if 200 < = response . status_code < 300 :
2025-08-05 13:11:00 +08:00
return response . content
2025-09-24 12:27:16 +08:00
utils . logger . error (
f " [BilibiliClient.get_video_media] Unexpected status { response . status_code } for { url } "
)
return None
2025-08-06 11:24:51 +08:00
except httpx . HTTPError as exc : # some wrong when call httpx.request method, such as connection error, client error, server error or response status code is not 2xx
utils . logger . error ( f " [BilibiliClient.get_video_media] { exc . __class__ . __name__ } for { exc . request . url } - { exc } " ) # 保留原始异常类型名称,以便开发者调试
2024-07-12 20:09:16 +08:00
return None
2025-07-30 21:19:56 +08:00
async def get_video_comments (
self ,
video_id : str ,
order_mode : CommentOrderType = CommentOrderType . DEFAULT ,
next : int = 0 ,
) - > Dict :
2023-12-03 00:30:10 +08:00
""" get video comments
2023-12-04 23:16:02 +08:00
: param video_id : 视频 ID
: param order_mode : 排序方式
2023-12-09 21:10:01 +08:00
: param next : 评论页选择
2023-12-03 00:30:10 +08:00
: return :
"""
2023-12-04 23:16:02 +08:00
uri = " /x/v2/reply/wbi/main "
2025-07-30 21:19:56 +08:00
post_data = { " oid " : video_id , " mode " : order_mode . value , " type " : 1 , " ps " : 20 , " next " : next }
2023-12-09 21:10:01 +08:00
return await self . get ( uri , post_data )
2023-12-03 00:30:10 +08:00
2025-07-30 21:19:56 +08:00
async def get_video_all_comments (
self ,
video_id : str ,
crawl_interval : float = 1.0 ,
is_fetch_sub_comments = False ,
callback : Optional [ Callable ] = None ,
max_count : int = 10 ,
) :
2023-12-03 00:30:10 +08:00
"""
get video all comments include sub comments
2023-12-04 23:16:02 +08:00
: param video_id :
2023-12-03 00:30:10 +08:00
: param crawl_interval :
: param is_fetch_sub_comments :
: param callback :
2024-10-23 16:32:02 +08:00
max_count : 一次笔记爬取的最大评论数量
2023-12-03 00:30:10 +08:00
: return :
"""
result = [ ]
2023-12-04 23:16:02 +08:00
is_end = False
2024-05-26 10:53:46 +08:00
next_page = 0
2025-07-13 10:42:15 +08:00
max_retries = 3
2024-10-23 16:32:02 +08:00
while not is_end and len ( result ) < max_count :
2025-07-13 10:42:15 +08:00
comments_res = None
for attempt in range ( max_retries ) :
try :
comments_res = await self . get_video_comments ( video_id , CommentOrderType . DEFAULT , next_page )
break # Success
except DataFetchError as e :
if attempt < max_retries - 1 :
2025-07-30 21:19:56 +08:00
delay = 5 * ( 2 * * attempt ) + random . uniform ( 0 , 1 )
utils . logger . warning ( f " [BilibiliClient.get_video_all_comments] Retrying video_id { video_id } in { delay : .2f } s... (Attempt { attempt + 1 } / { max_retries } ) " )
2025-07-13 10:42:15 +08:00
await asyncio . sleep ( delay )
else :
2025-07-30 21:19:56 +08:00
utils . logger . error ( f " [BilibiliClient.get_video_all_comments] Max retries reached for video_id: { video_id } . Skipping comments. Error: { e } " )
2025-07-13 10:42:15 +08:00
is_end = True
break
if not comments_res :
break
2024-05-26 10:53:46 +08:00
cursor_info : Dict = comments_res . get ( " cursor " )
2025-07-13 10:42:15 +08:00
if not cursor_info :
utils . logger . warning ( f " [BilibiliClient.get_video_all_comments] Could not find ' cursor ' in response for video_id: { video_id } . Skipping. " )
break
2023-12-04 23:16:02 +08:00
comment_list : List [ Dict ] = comments_res . get ( " replies " , [ ] )
2025-07-30 21:19:56 +08:00
2025-07-17 06:40:56 +08:00
# 检查 is_end 和 next 是否存在
if " is_end " not in cursor_info or " next " not in cursor_info :
utils . logger . warning ( f " [BilibiliClient.get_video_all_comments] ' is_end ' or ' next ' not in cursor for video_id: { video_id } . Assuming end of comments. " )
is_end = True
else :
is_end = cursor_info . get ( " is_end " )
next_page = cursor_info . get ( " next " )
if not isinstance ( is_end , bool ) :
utils . logger . warning ( f " [BilibiliClient.get_video_all_comments] ' is_end ' is not a boolean for video_id: { video_id } . Assuming end of comments. " )
is_end = True
2024-05-26 10:53:46 +08:00
if is_fetch_sub_comments :
for comment in comment_list :
comment_id = comment [ ' rpid ' ]
if ( comment . get ( " rcount " , 0 ) > 0 ) :
2025-07-30 21:19:56 +08:00
{ await self . get_video_all_level_two_comments ( video_id , comment_id , CommentOrderType . DEFAULT , 10 , crawl_interval , callback ) }
2024-10-23 16:32:02 +08:00
if len ( result ) + len ( comment_list ) > max_count :
comment_list = comment_list [ : max_count - len ( result ) ]
2023-12-03 00:30:10 +08:00
if callback : # 如果有回调函数,就执行回调函数
2023-12-04 23:16:02 +08:00
await callback ( video_id , comment_list )
2023-12-03 00:30:10 +08:00
await asyncio . sleep ( crawl_interval )
if not is_fetch_sub_comments :
2023-12-04 23:16:02 +08:00
result . extend ( comment_list )
2023-12-03 00:30:10 +08:00
continue
return result
2024-05-26 10:53:46 +08:00
2025-07-30 21:19:56 +08:00
async def get_video_all_level_two_comments (
self ,
video_id : str ,
level_one_comment_id : int ,
order_mode : CommentOrderType ,
ps : int = 10 ,
crawl_interval : float = 1.0 ,
callback : Optional [ Callable ] = None ,
) - > Dict :
2024-05-26 10:53:46 +08:00
"""
get video all level two comments for a level one comment
: param video_id : 视频 ID
: param level_one_comment_id : 一级评论 ID
: param order_mode :
: param ps : 一页评论数
: param crawl_interval :
: param callback :
: return :
"""
2024-06-12 15:18:55 +08:00
pn = 1
2024-05-26 10:53:46 +08:00
while True :
2025-07-30 21:19:56 +08:00
result = await self . get_video_level_two_comments ( video_id , level_one_comment_id , pn , ps , order_mode )
2024-05-26 10:53:46 +08:00
comment_list : List [ Dict ] = result . get ( " replies " , [ ] )
if callback : # 如果有回调函数,就执行回调函数
await callback ( video_id , comment_list )
await asyncio . sleep ( crawl_interval )
2024-06-12 15:18:55 +08:00
if ( int ( result [ " page " ] [ " count " ] ) < = pn * ps ) :
2024-05-26 10:53:46 +08:00
break
pn + = 1
2025-07-30 21:19:56 +08:00
async def get_video_level_two_comments (
self ,
video_id : str ,
level_one_comment_id : int ,
pn : int ,
ps : int ,
order_mode : CommentOrderType ,
) - > Dict :
2024-05-26 10:53:46 +08:00
""" get video level two comments
: param video_id : 视频 ID
: param level_one_comment_id : 一级评论 ID
: param order_mode : 排序方式
: return :
"""
uri = " /x/v2/reply/reply "
post_data = {
" oid " : video_id ,
" mode " : order_mode . value ,
" type " : 1 ,
" ps " : ps ,
" pn " : pn ,
" root " : level_one_comment_id ,
}
result = await self . get ( uri , post_data )
return result
2024-06-12 15:18:55 +08:00
2025-05-22 22:26:30 +08:00
async def get_creator_videos ( self , creator_id : str , pn : int , ps : int = 30 , order_mode : SearchOrderType = SearchOrderType . LAST_PUBLISH ) - > Dict :
2024-06-12 15:18:55 +08:00
""" get all videos for a creator
: param creator_id : 创作者 ID
: param pn : 页数
: param ps : 一页视频数
: param order_mode : 排序方式
: return :
"""
uri = " /x/space/wbi/arc/search "
post_data = {
" mid " : creator_id ,
" pn " : pn ,
" ps " : ps ,
" order " : order_mode ,
}
return await self . get ( uri , post_data )
2025-05-19 19:57:36 +08:00
async def get_creator_info ( self , creator_id : int ) - > Dict :
2025-05-22 20:31:48 +08:00
"""
get creator info
2025-05-19 19:57:36 +08:00
: param creator_id : 作者 ID
"""
uri = " /x/space/wbi/acc/info "
post_data = {
" mid " : creator_id ,
}
return await self . get ( uri , post_data )
2025-07-30 21:19:56 +08:00
async def get_creator_fans (
self ,
creator_id : int ,
pn : int ,
ps : int = 24 ,
) - > Dict :
2025-05-22 20:31:48 +08:00
"""
get creator fans
2025-05-19 19:57:36 +08:00
: param creator_id : 创作者 ID
: param pn : 开始页数
: param ps : 每页数量
: return :
"""
uri = " /x/relation/fans "
post_data = {
' vmid ' : creator_id ,
" pn " : pn ,
" ps " : ps ,
" gaia_source " : " main_web " ,
}
return await self . get ( uri , post_data )
2025-07-30 21:19:56 +08:00
async def get_creator_followings (
self ,
creator_id : int ,
pn : int ,
ps : int = 24 ,
) - > Dict :
2025-05-22 20:31:48 +08:00
"""
get creator followings
2025-05-19 19:57:36 +08:00
: param creator_id : 创作者 ID
: param pn : 开始页数
: param ps : 每页数量
: return :
"""
uri = " /x/relation/followings "
post_data = {
" vmid " : creator_id ,
" pn " : pn ,
" ps " : ps ,
" gaia_source " : " main_web " ,
}
return await self . get ( uri , post_data )
2025-05-22 20:31:48 +08:00
async def get_creator_dynamics ( self , creator_id : int , offset : str = " " ) :
"""
get creator comments
: param creator_id : 创作者 ID
: param offset : 发送请求所需参数
: return :
"""
uri = " /x/polymer/web-dynamic/v1/feed/space "
post_data = {
" offset " : offset ,
" host_mid " : creator_id ,
" platform " : " web " ,
}
return await self . get ( uri , post_data )
2025-07-30 21:19:56 +08:00
async def get_creator_all_fans (
self ,
creator_info : Dict ,
crawl_interval : float = 1.0 ,
callback : Optional [ Callable ] = None ,
max_count : int = 100 ,
) - > List :
2025-05-19 19:57:36 +08:00
"""
2025-05-22 20:31:48 +08:00
get creator all fans
2025-05-19 19:57:36 +08:00
: param creator_info :
: param crawl_interval :
: param callback :
: param max_count : 一个up主爬取的最大粉丝数量
: return : up主粉丝数列表
"""
creator_id = creator_info [ " id " ]
result = [ ]
pn = config . START_CONTACTS_PAGE
while len ( result ) < max_count :
fans_res : Dict = await self . get_creator_fans ( creator_id , pn = pn )
fans_list : List [ Dict ] = fans_res . get ( " list " , [ ] )
pn + = 1
if len ( result ) + len ( fans_list ) > max_count :
fans_list = fans_list [ : max_count - len ( result ) ]
if callback : # 如果有回调函数,就执行回调函数
await callback ( creator_info , fans_list )
await asyncio . sleep ( crawl_interval )
if not fans_list :
break
result . extend ( fans_list )
return result
2025-07-30 21:19:56 +08:00
async def get_creator_all_followings (
self ,
creator_info : Dict ,
crawl_interval : float = 1.0 ,
callback : Optional [ Callable ] = None ,
max_count : int = 100 ,
) - > List :
2025-05-19 19:57:36 +08:00
"""
2025-05-22 20:31:48 +08:00
get creator all followings
2025-05-19 19:57:36 +08:00
: param creator_info :
: param crawl_interval :
: param callback :
: param max_count : 一个up主爬取的最大关注者数量
: return : up主关注者列表
"""
creator_id = creator_info [ " id " ]
result = [ ]
pn = config . START_CONTACTS_PAGE
while len ( result ) < max_count :
followings_res : Dict = await self . get_creator_followings ( creator_id , pn = pn )
followings_list : List [ Dict ] = followings_res . get ( " list " , [ ] )
pn + = 1
if len ( result ) + len ( followings_list ) > max_count :
followings_list = followings_list [ : max_count - len ( result ) ]
if callback : # 如果有回调函数,就执行回调函数
await callback ( creator_info , followings_list )
await asyncio . sleep ( crawl_interval )
if not followings_list :
break
result . extend ( followings_list )
2025-05-22 20:31:48 +08:00
return result
2025-07-30 21:19:56 +08:00
async def get_creator_all_dynamics (
self ,
creator_info : Dict ,
crawl_interval : float = 1.0 ,
callback : Optional [ Callable ] = None ,
max_count : int = 20 ,
) - > List :
2025-05-22 20:31:48 +08:00
"""
get creator all followings
: param creator_info :
: param crawl_interval :
: param callback :
: param max_count : 一个up主爬取的最大动态数量
2025-05-19 19:57:36 +08:00
2025-05-22 20:31:48 +08:00
: return : up主关注者列表
"""
creator_id = creator_info [ " id " ]
result = [ ]
offset = " "
has_more = True
while has_more and len ( result ) < max_count :
dynamics_res = await self . get_creator_dynamics ( creator_id , offset )
dynamics_list : List [ Dict ] = dynamics_res [ " items " ]
has_more = dynamics_res [ " has_more " ]
offset = dynamics_res [ " offset " ]
if len ( result ) + len ( dynamics_list ) > max_count :
dynamics_list = dynamics_list [ : max_count - len ( result ) ]
if callback :
await callback ( creator_info , dynamics_list )
await asyncio . sleep ( crawl_interval )
result . extend ( dynamics_list )
2025-05-19 19:57:36 +08:00
return result