mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2025-11-25 03:15:17 +08:00
refactor: update xhs note detail
This commit is contained in:
@@ -17,7 +17,7 @@ SORT_TYPE = "popularity_descending"
|
||||
|
||||
# 指定笔记URL列表, 必须要携带xsec_token参数
|
||||
XHS_SPECIFIED_NOTE_URL_LIST = [
|
||||
"https://www.xiaohongshu.com/explore/68f99f6d0000000007033fcf?xsec_token=ABZEzjuN2fPjKF9EcMsCCxfbt3IBRsFZldGFoCJbdDmXI=&xsec_source=pc_feed"
|
||||
"https://www.xiaohongshu.com/explore/64b95d01000000000c034587?xsec_token=AB0EFqJvINCkj6xOCKCQgfNNh8GdnBC_6XecG4QOddo3Q=&xsec_source=pc_cfeed"
|
||||
# ........................
|
||||
]
|
||||
|
||||
|
||||
@@ -279,12 +279,19 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||
Dict: note detail
|
||||
"""
|
||||
note_detail = None
|
||||
utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
|
||||
async with semaphore:
|
||||
try:
|
||||
utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}")
|
||||
note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True)
|
||||
try:
|
||||
note_detail = await self.xhs_client.get_note_by_id(note_id, xsec_source, xsec_token)
|
||||
except RetryError:
|
||||
pass
|
||||
|
||||
if not note_detail:
|
||||
raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
|
||||
note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token,
|
||||
enable_cookie=True)
|
||||
if not note_detail:
|
||||
raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}")
|
||||
|
||||
note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user