mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2025-11-25 11:29:27 +08:00
fix(数据库): 修复模型字段类型以支持更广泛的数据格式;
修复xhs评论存储方法,从批量处理改为单条处理
This commit is contained in:
@@ -161,12 +161,12 @@ class DyCreator(Base):
|
||||
class KuaishouVideo(Base):
|
||||
__tablename__ = 'kuaishou_video'
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(String(255))
|
||||
user_id = Column(BigInteger)
|
||||
nickname = Column(Text)
|
||||
avatar = Column(Text)
|
||||
add_ts = Column(BigInteger)
|
||||
last_modify_ts = Column(BigInteger)
|
||||
video_id = Column(BigInteger, index=True)
|
||||
video_id = Column(String(255), index=True)
|
||||
video_type = Column(Text)
|
||||
title = Column(Text)
|
||||
desc = Column(Text)
|
||||
@@ -181,13 +181,13 @@ class KuaishouVideo(Base):
|
||||
class KuaishouVideoComment(Base):
|
||||
__tablename__ = 'kuaishou_video_comment'
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(Integer)
|
||||
user_id = Column(Text)
|
||||
nickname = Column(Text)
|
||||
avatar = Column(Text)
|
||||
add_ts = Column(BigInteger)
|
||||
last_modify_ts = Column(BigInteger)
|
||||
comment_id = Column(BigInteger, index=True)
|
||||
video_id = Column(BigInteger, index=True)
|
||||
video_id = Column(String(255), index=True)
|
||||
content = Column(Text)
|
||||
create_time = Column(BigInteger)
|
||||
sub_comment_count = Column(Text)
|
||||
@@ -206,7 +206,7 @@ class WeiboNote(Base):
|
||||
note_id = Column(BigInteger, index=True)
|
||||
content = Column(Text)
|
||||
create_time = Column(BigInteger, index=True)
|
||||
create_date_time = Column(BigInteger, index=True)
|
||||
create_date_time = Column(String(255), index=True)
|
||||
liked_count = Column(Text)
|
||||
comments_count = Column(Text)
|
||||
shared_count = Column(Text)
|
||||
@@ -228,7 +228,7 @@ class WeiboNoteComment(Base):
|
||||
note_id = Column(BigInteger, index=True)
|
||||
content = Column(Text)
|
||||
create_time = Column(BigInteger)
|
||||
create_date_time = Column(BigInteger, index=True)
|
||||
create_date_time = Column(String(255), index=True)
|
||||
comment_like_count = Column(Text)
|
||||
sub_comment_count = Column(Text)
|
||||
parent_comment_id = Column(String(255))
|
||||
@@ -273,7 +273,7 @@ class XhsNote(Base):
|
||||
ip_location = Column(Text)
|
||||
add_ts = Column(BigInteger)
|
||||
last_modify_ts = Column(BigInteger)
|
||||
note_id = Column(BigInteger, index=True)
|
||||
note_id = Column(String(255), index=True)
|
||||
type = Column(Text)
|
||||
title = Column(Text)
|
||||
desc = Column(Text)
|
||||
@@ -299,7 +299,7 @@ class XhsNoteComment(Base):
|
||||
ip_location = Column(Text)
|
||||
add_ts = Column(BigInteger)
|
||||
last_modify_ts = Column(BigInteger)
|
||||
comment_id = Column(BigInteger, index=True)
|
||||
comment_id = Column(String(255), index=True)
|
||||
create_time = Column(BigInteger, index=True)
|
||||
note_id = Column(String(255))
|
||||
content = Column(Text)
|
||||
@@ -311,15 +311,15 @@ class XhsNoteComment(Base):
|
||||
class TiebaNote(Base):
|
||||
__tablename__ = 'tieba_note'
|
||||
id = Column(Integer, primary_key=True)
|
||||
note_id = Column(BigInteger, index=True)
|
||||
note_id = Column(String(644), index=True)
|
||||
title = Column(Text)
|
||||
desc = Column(Text)
|
||||
note_url = Column(Text)
|
||||
publish_time = Column(BigInteger, index=True)
|
||||
publish_time = Column(String(255), index=True)
|
||||
user_link = Column(Text, default='')
|
||||
user_nickname = Column(Text, default='')
|
||||
user_avatar = Column(Text, default='')
|
||||
tieba_id = Column(Integer, default='')
|
||||
tieba_id = Column(String(255), default='')
|
||||
tieba_name = Column(Text)
|
||||
tieba_link = Column(Text)
|
||||
total_replay_num = Column(Integer, default=0)
|
||||
@@ -332,7 +332,7 @@ class TiebaNote(Base):
|
||||
class TiebaComment(Base):
|
||||
__tablename__ = 'tieba_comment'
|
||||
id = Column(Integer, primary_key=True)
|
||||
comment_id = Column(BigInteger, index=True)
|
||||
comment_id = Column(String(255), index=True)
|
||||
parent_comment_id = Column(String(255), default='')
|
||||
content = Column(Text)
|
||||
user_link = Column(Text, default='')
|
||||
@@ -341,10 +341,10 @@ class TiebaComment(Base):
|
||||
tieba_id = Column(String(255), default='')
|
||||
tieba_name = Column(Text)
|
||||
tieba_link = Column(Text)
|
||||
publish_time = Column(BigInteger, index=True)
|
||||
publish_time = Column(String(255), index=True)
|
||||
ip_location = Column(Text, default='')
|
||||
sub_comment_count = Column(Integer, default=0)
|
||||
note_id = Column(BigInteger, index=True)
|
||||
note_id = Column(String(255), index=True)
|
||||
note_url = Column(Text)
|
||||
add_ts = Column(BigInteger)
|
||||
last_modify_ts = Column(BigInteger)
|
||||
@@ -352,7 +352,7 @@ class TiebaComment(Base):
|
||||
class TiebaCreator(Base):
|
||||
__tablename__ = 'tieba_creator'
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(String(255))
|
||||
user_id = Column(String(64))
|
||||
user_name = Column(Text)
|
||||
nickname = Column(Text)
|
||||
avatar = Column(Text)
|
||||
@@ -367,14 +367,14 @@ class TiebaCreator(Base):
|
||||
class ZhihuContent(Base):
|
||||
__tablename__ = 'zhihu_content'
|
||||
id = Column(Integer, primary_key=True)
|
||||
content_id = Column(BigInteger, index=True)
|
||||
content_id = Column(String(64), index=True)
|
||||
content_type = Column(Text)
|
||||
content_text = Column(Text)
|
||||
content_url = Column(Text)
|
||||
question_id = Column(String(255))
|
||||
title = Column(Text)
|
||||
desc = Column(Text)
|
||||
created_time = Column(BigInteger, index=True)
|
||||
created_time = Column(String(32), index=True)
|
||||
updated_time = Column(Text)
|
||||
voteup_count = Column(Integer, default=0)
|
||||
comment_count = Column(Integer, default=0)
|
||||
@@ -395,17 +395,17 @@ class ZhihuContent(Base):
|
||||
class ZhihuComment(Base):
|
||||
__tablename__ = 'zhihu_comment'
|
||||
id = Column(Integer, primary_key=True)
|
||||
comment_id = Column(BigInteger, index=True)
|
||||
parent_comment_id = Column(String(255))
|
||||
comment_id = Column(String(64), index=True)
|
||||
parent_comment_id = Column(String(64))
|
||||
content = Column(Text)
|
||||
publish_time = Column(BigInteger, index=True)
|
||||
publish_time = Column(String(32), index=True)
|
||||
ip_location = Column(Text)
|
||||
sub_comment_count = Column(Integer, default=0)
|
||||
like_count = Column(Integer, default=0)
|
||||
dislike_count = Column(Integer, default=0)
|
||||
content_id = Column(BigInteger, index=True)
|
||||
content_id = Column(String(64), index=True)
|
||||
content_type = Column(Text)
|
||||
user_id = Column(String(255))
|
||||
user_id = Column(String(64))
|
||||
user_link = Column(Text)
|
||||
user_nickname = Column(Text)
|
||||
user_avatar = Column(Text)
|
||||
@@ -415,7 +415,7 @@ class ZhihuComment(Base):
|
||||
class ZhihuCreator(Base):
|
||||
__tablename__ = 'zhihu_creator'
|
||||
id = Column(Integer, primary_key=True)
|
||||
user_id = Column(Integer, unique=True, index=True)
|
||||
user_id = Column(String(64), unique=True, index=True)
|
||||
user_link = Column(Text)
|
||||
user_nickname = Column(Text)
|
||||
user_avatar = Column(Text)
|
||||
|
||||
Binary file not shown.
@@ -143,18 +143,17 @@ class XhsDbStoreImplement(AbstractStore):
|
||||
result = await session.execute(stmt)
|
||||
return result.first() is not None
|
||||
|
||||
async def store_comments(self, comments: List[Dict]):
|
||||
if not comments:
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
if not comment_item:
|
||||
return
|
||||
async with get_session() as session:
|
||||
for comment_item in comments:
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
continue
|
||||
if await self.comment_is_exist(session, comment_id):
|
||||
await self.update_comment(session, comment_item)
|
||||
else:
|
||||
await self.add_comment(session, comment_item)
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
if await self.comment_is_exist(session, comment_id):
|
||||
await self.update_comment(session, comment_item)
|
||||
else:
|
||||
await self.add_comment(session, comment_item)
|
||||
|
||||
async def add_comment(self, session: AsyncSession, comment_item: Dict):
|
||||
add_ts = int(get_current_timestamp())
|
||||
|
||||
Reference in New Issue
Block a user