添加功能:抖音每个视频抓取评论最大条数限制,抖音评论关键词筛选

This commit is contained in:
peanutsplash
2023-12-05 11:21:47 +08:00
parent 8f04943105
commit ab1a10bac1
3 changed files with 59 additions and 17 deletions

View File

@@ -128,18 +128,23 @@ class DouYinCrawler(AbstractCrawler):
task_list: List[Task] = []
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
for aweme_id in aweme_list:
task = asyncio.create_task(self.get_comments(aweme_id, semaphore), name=aweme_id)
task = asyncio.create_task(
self.get_comments(aweme_id, semaphore, max_comments=config.DY_MAX_COMMENTS_PER_POST), name=aweme_id)
task_list.append(task)
await asyncio.wait(task_list)
async def get_comments(self, aweme_id: str, semaphore: asyncio.Semaphore) -> None:
async def get_comments(self, aweme_id: str, semaphore: asyncio.Semaphore, max_comments: int = None) -> None:
async with semaphore:
try:
await self.dy_client.get_aweme_all_comments(
# 将关键词列表传递给 get_aweme_all_comments 方法
comments = await self.dy_client.get_aweme_all_comments(
aweme_id=aweme_id,
callback=douyin.batch_update_dy_aweme_comments,
max_comments=max_comments, # 最大数量
keywords=config.DY_COMMENT_KEYWORDS # 关键词列表
)
utils.logger.info(f"aweme_id: {aweme_id} comments have all been obtained completed ...")
# 现在返回的 comments 已经是经过关键词筛选的
await douyin.batch_update_dy_aweme_comments(aweme_id, comments)
utils.logger.info(f"aweme_id: {aweme_id} comments have all been obtained and filtered ...")
except DataFetchError as e:
utils.logger.error(f"aweme_id: {aweme_id} get comments failed, error: {e}")