fix(数据库): 修复模型字段类型以支持更广泛的数据格式;

修复xhs评论存储方法,从批量处理改为单条处理
This commit is contained in:
persist-1
2025-09-07 04:10:49 +08:00
parent b04f5bcd6f
commit 684a16ed9a
3 changed files with 32 additions and 33 deletions

View File

@@ -161,12 +161,12 @@ class DyCreator(Base):
class KuaishouVideo(Base):
__tablename__ = 'kuaishou_video'
id = Column(Integer, primary_key=True)
user_id = Column(String(255))
user_id = Column(BigInteger)
nickname = Column(Text)
avatar = Column(Text)
add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger)
video_id = Column(BigInteger, index=True)
video_id = Column(String(255), index=True)
video_type = Column(Text)
title = Column(Text)
desc = Column(Text)
@@ -181,13 +181,13 @@ class KuaishouVideo(Base):
class KuaishouVideoComment(Base):
__tablename__ = 'kuaishou_video_comment'
id = Column(Integer, primary_key=True)
user_id = Column(Integer)
user_id = Column(Text)
nickname = Column(Text)
avatar = Column(Text)
add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger)
comment_id = Column(BigInteger, index=True)
video_id = Column(BigInteger, index=True)
video_id = Column(String(255), index=True)
content = Column(Text)
create_time = Column(BigInteger)
sub_comment_count = Column(Text)
@@ -206,7 +206,7 @@ class WeiboNote(Base):
note_id = Column(BigInteger, index=True)
content = Column(Text)
create_time = Column(BigInteger, index=True)
create_date_time = Column(BigInteger, index=True)
create_date_time = Column(String(255), index=True)
liked_count = Column(Text)
comments_count = Column(Text)
shared_count = Column(Text)
@@ -228,7 +228,7 @@ class WeiboNoteComment(Base):
note_id = Column(BigInteger, index=True)
content = Column(Text)
create_time = Column(BigInteger)
create_date_time = Column(BigInteger, index=True)
create_date_time = Column(String(255), index=True)
comment_like_count = Column(Text)
sub_comment_count = Column(Text)
parent_comment_id = Column(String(255))
@@ -273,7 +273,7 @@ class XhsNote(Base):
ip_location = Column(Text)
add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger)
note_id = Column(BigInteger, index=True)
note_id = Column(String(255), index=True)
type = Column(Text)
title = Column(Text)
desc = Column(Text)
@@ -299,7 +299,7 @@ class XhsNoteComment(Base):
ip_location = Column(Text)
add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger)
comment_id = Column(BigInteger, index=True)
comment_id = Column(String(255), index=True)
create_time = Column(BigInteger, index=True)
note_id = Column(String(255))
content = Column(Text)
@@ -311,15 +311,15 @@ class XhsNoteComment(Base):
class TiebaNote(Base):
__tablename__ = 'tieba_note'
id = Column(Integer, primary_key=True)
note_id = Column(BigInteger, index=True)
note_id = Column(String(644), index=True)
title = Column(Text)
desc = Column(Text)
note_url = Column(Text)
publish_time = Column(BigInteger, index=True)
publish_time = Column(String(255), index=True)
user_link = Column(Text, default='')
user_nickname = Column(Text, default='')
user_avatar = Column(Text, default='')
tieba_id = Column(Integer, default='')
tieba_id = Column(String(255), default='')
tieba_name = Column(Text)
tieba_link = Column(Text)
total_replay_num = Column(Integer, default=0)
@@ -332,7 +332,7 @@ class TiebaNote(Base):
class TiebaComment(Base):
__tablename__ = 'tieba_comment'
id = Column(Integer, primary_key=True)
comment_id = Column(BigInteger, index=True)
comment_id = Column(String(255), index=True)
parent_comment_id = Column(String(255), default='')
content = Column(Text)
user_link = Column(Text, default='')
@@ -341,10 +341,10 @@ class TiebaComment(Base):
tieba_id = Column(String(255), default='')
tieba_name = Column(Text)
tieba_link = Column(Text)
publish_time = Column(BigInteger, index=True)
publish_time = Column(String(255), index=True)
ip_location = Column(Text, default='')
sub_comment_count = Column(Integer, default=0)
note_id = Column(BigInteger, index=True)
note_id = Column(String(255), index=True)
note_url = Column(Text)
add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger)
@@ -352,7 +352,7 @@ class TiebaComment(Base):
class TiebaCreator(Base):
__tablename__ = 'tieba_creator'
id = Column(Integer, primary_key=True)
user_id = Column(String(255))
user_id = Column(String(64))
user_name = Column(Text)
nickname = Column(Text)
avatar = Column(Text)
@@ -367,14 +367,14 @@ class TiebaCreator(Base):
class ZhihuContent(Base):
__tablename__ = 'zhihu_content'
id = Column(Integer, primary_key=True)
content_id = Column(BigInteger, index=True)
content_id = Column(String(64), index=True)
content_type = Column(Text)
content_text = Column(Text)
content_url = Column(Text)
question_id = Column(String(255))
title = Column(Text)
desc = Column(Text)
created_time = Column(BigInteger, index=True)
created_time = Column(String(32), index=True)
updated_time = Column(Text)
voteup_count = Column(Integer, default=0)
comment_count = Column(Integer, default=0)
@@ -395,17 +395,17 @@ class ZhihuContent(Base):
class ZhihuComment(Base):
__tablename__ = 'zhihu_comment'
id = Column(Integer, primary_key=True)
comment_id = Column(BigInteger, index=True)
parent_comment_id = Column(String(255))
comment_id = Column(String(64), index=True)
parent_comment_id = Column(String(64))
content = Column(Text)
publish_time = Column(BigInteger, index=True)
publish_time = Column(String(32), index=True)
ip_location = Column(Text)
sub_comment_count = Column(Integer, default=0)
like_count = Column(Integer, default=0)
dislike_count = Column(Integer, default=0)
content_id = Column(BigInteger, index=True)
content_id = Column(String(64), index=True)
content_type = Column(Text)
user_id = Column(String(255))
user_id = Column(String(64))
user_link = Column(Text)
user_nickname = Column(Text)
user_avatar = Column(Text)
@@ -415,7 +415,7 @@ class ZhihuComment(Base):
class ZhihuCreator(Base):
__tablename__ = 'zhihu_creator'
id = Column(Integer, primary_key=True)
user_id = Column(Integer, unique=True, index=True)
user_id = Column(String(64), unique=True, index=True)
user_link = Column(Text)
user_nickname = Column(Text)
user_avatar = Column(Text)

Binary file not shown.

View File

@@ -143,18 +143,17 @@ class XhsDbStoreImplement(AbstractStore):
result = await session.execute(stmt)
return result.first() is not None
async def store_comments(self, comments: List[Dict]):
if not comments:
async def store_comment(self, comment_item: Dict):
if not comment_item:
return
async with get_session() as session:
for comment_item in comments:
comment_id = comment_item.get("comment_id")
if not comment_id:
continue
if await self.comment_is_exist(session, comment_id):
await self.update_comment(session, comment_item)
else:
await self.add_comment(session, comment_item)
comment_id = comment_item.get("comment_id")
if not comment_id:
return
if await self.comment_is_exist(session, comment_id):
await self.update_comment(session, comment_item)
else:
await self.add_comment(session, comment_item)
async def add_comment(self, session: AsyncSession, comment_item: Dict):
add_ts = int(get_current_timestamp())