优化mongodb配置获取逻辑,移动存储基类位置。

This commit is contained in:
yt210
2025-11-07 17:42:28 +08:00
parent ef6948b305
commit b61ec54a72
8 changed files with 40 additions and 85 deletions

View File

@@ -1,8 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """MongoDB存储基类提供连接管理和通用存储方法"""
MongoDB存储基类
提供MongoDB连接管理和通用存储方法
"""
import asyncio import asyncio
from typing import Dict, List, Optional from typing import Dict, List, Optional
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase, AsyncIOMotorCollection from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase, AsyncIOMotorCollection
@@ -11,7 +8,7 @@ from tools import utils
class MongoDBConnection: class MongoDBConnection:
"""MongoDB连接管理单例""" """MongoDB连接管理单例模式)"""
_instance = None _instance = None
_client: Optional[AsyncIOMotorClient] = None _client: Optional[AsyncIOMotorClient] = None
_db: Optional[AsyncIOMotorDatabase] = None _db: Optional[AsyncIOMotorDatabase] = None
@@ -23,7 +20,7 @@ class MongoDBConnection:
return cls._instance return cls._instance
async def get_client(self) -> AsyncIOMotorClient: async def get_client(self) -> AsyncIOMotorClient:
"""获取MongoDB客户端""" """获取客户端"""
if self._client is None: if self._client is None:
async with self._lock: async with self._lock:
if self._client is None: if self._client is None:
@@ -31,7 +28,7 @@ class MongoDBConnection:
return self._client return self._client
async def get_db(self) -> AsyncIOMotorDatabase: async def get_db(self) -> AsyncIOMotorDatabase:
"""获取MongoDB数据库""" """获取数据库"""
if self._db is None: if self._db is None:
async with self._lock: async with self._lock:
if self._db is None: if self._db is None:
@@ -39,135 +36,93 @@ class MongoDBConnection:
return self._db return self._db
async def _connect(self): async def _connect(self):
"""建立MongoDB连接""" """建立连接"""
try: try:
mongo_config = db_config.mongodb_config mongo_config = db_config.mongodb_config
host = mongo_config.get("host", "localhost") host = mongo_config["host"]
port = mongo_config.get("port", 27017) port = mongo_config["port"]
user = mongo_config.get("user", "") user = mongo_config["user"]
password = mongo_config.get("password", "") password = mongo_config["password"]
db_name = mongo_config.get("db_name", "media_crawler") db_name = mongo_config["db_name"]
# 构建连接URL # 构建连接URL(有认证/无认证)
if user and password: if user and password:
connection_url = f"mongodb://{user}:{password}@{host}:{port}/" connection_url = f"mongodb://{user}:{password}@{host}:{port}/"
else: else:
connection_url = f"mongodb://{host}:{port}/" connection_url = f"mongodb://{host}:{port}/"
self._client = AsyncIOMotorClient(connection_url, serverSelectionTimeoutMS=5000) self._client = AsyncIOMotorClient(connection_url, serverSelectionTimeoutMS=5000)
# 测试连接 await self._client.server_info() # 测试连接
await self._client.server_info()
self._db = self._client[db_name] self._db = self._client[db_name]
utils.logger.info(f"[MongoDBConnection] Successfully connected to MongoDB at {host}:{port}, database: {db_name}") utils.logger.info(f"[MongoDBConnection] Connected to {host}:{port}/{db_name}")
except Exception as e: except Exception as e:
utils.logger.error(f"[MongoDBConnection] Failed to connect to MongoDB: {e}") utils.logger.error(f"[MongoDBConnection] Connection failed: {e}")
raise raise
async def close(self): async def close(self):
"""关闭MongoDB连接""" """关闭连接"""
if self._client is not None: if self._client is not None:
self._client.close() self._client.close()
self._client = None self._client = None
self._db = None self._db = None
utils.logger.info("[MongoDBConnection] MongoDB connection closed") utils.logger.info("[MongoDBConnection] Connection closed")
class MongoDBStoreBase: class MongoDBStoreBase:
"""MongoDB存储基类""" """MongoDB存储基类提供通用的CRUD操作"""
def __init__(self, collection_prefix: str): def __init__(self, collection_prefix: str):
""" """初始化存储基类
初始化MongoDB存储基类
Args: Args:
collection_prefix: 集合名称前缀xhs, douyin, bilibili等 collection_prefix: 平台前缀xhs/douyin/bilibili等
""" """
self.collection_prefix = collection_prefix self.collection_prefix = collection_prefix
self._connection = MongoDBConnection() self._connection = MongoDBConnection()
async def get_collection(self, collection_suffix: str) -> AsyncIOMotorCollection: async def get_collection(self, collection_suffix: str) -> AsyncIOMotorCollection:
""" """获取集合:{prefix}_{suffix}"""
获取MongoDB集合
Args:
collection_suffix: 集合名称后缀contents, comments, creators
Returns:
MongoDB集合对象
"""
db = await self._connection.get_db() db = await self._connection.get_db()
collection_name = f"{self.collection_prefix}_{collection_suffix}" collection_name = f"{self.collection_prefix}_{collection_suffix}"
return db[collection_name] return db[collection_name]
async def save_or_update(self, collection_suffix: str, query: Dict, data: Dict) -> bool: async def save_or_update(self, collection_suffix: str, query: Dict, data: Dict) -> bool:
""" """保存或更新数据upsert"""
保存或更新数据upsert操作
Args:
collection_suffix: 集合名称后缀
query: 查询条件
data: 要保存的数据
Returns:
是否成功
"""
try: try:
collection = await self.get_collection(collection_suffix) collection = await self.get_collection(collection_suffix)
result = await collection.update_one( await collection.update_one(query, {"$set": data}, upsert=True)
query,
{"$set": data},
upsert=True
)
return True return True
except Exception as e: except Exception as e:
utils.logger.error(f"[MongoDBStoreBase.save_or_update] Failed to save data to {self.collection_prefix}_{collection_suffix}: {e}") utils.logger.error(f"[MongoDBStoreBase] Save failed ({self.collection_prefix}_{collection_suffix}): {e}")
return False return False
async def find_one(self, collection_suffix: str, query: Dict) -> Optional[Dict]: async def find_one(self, collection_suffix: str, query: Dict) -> Optional[Dict]:
""" """查询单条数据"""
查询单条数据
Args:
collection_suffix: 集合名称后缀
query: 查询条件
Returns:
查询结果
"""
try: try:
collection = await self.get_collection(collection_suffix) collection = await self.get_collection(collection_suffix)
result = await collection.find_one(query) return await collection.find_one(query)
return result
except Exception as e: except Exception as e:
utils.logger.error(f"[MongoDBStoreBase.find_one] Failed to query from {self.collection_prefix}_{collection_suffix}: {e}") utils.logger.error(f"[MongoDBStoreBase] Find one failed ({self.collection_prefix}_{collection_suffix}): {e}")
return None return None
async def find_many(self, collection_suffix: str, query: Dict, limit: int = 0) -> List[Dict]: async def find_many(self, collection_suffix: str, query: Dict, limit: int = 0) -> List[Dict]:
""" """查询多条数据limit=0表示不限制"""
查询多条数据
Args:
collection_suffix: 集合名称后缀
query: 查询条件
limit: 限制返回数量0表示不限制
Returns:
查询结果列表
"""
try: try:
collection = await self.get_collection(collection_suffix) collection = await self.get_collection(collection_suffix)
cursor = collection.find(query) cursor = collection.find(query)
if limit > 0: if limit > 0:
cursor = cursor.limit(limit) cursor = cursor.limit(limit)
results = await cursor.to_list(length=None) return await cursor.to_list(length=None)
return results
except Exception as e: except Exception as e:
utils.logger.error(f"[MongoDBStoreBase.find_many] Failed to query from {self.collection_prefix}_{collection_suffix}: {e}") utils.logger.error(f"[MongoDBStoreBase] Find many failed ({self.collection_prefix}_{collection_suffix}): {e}")
return [] return []
async def create_index(self, collection_suffix: str, keys: List[tuple], unique: bool = False): async def create_index(self, collection_suffix: str, keys: List[tuple], unique: bool = False):
""" """创建索引keys=[("field", 1)]"""
创建索引
Args:
collection_suffix: 集合名称后缀
keys: 索引键列表例如[("note_id", 1)]
unique: 是否创建唯一索引
"""
try: try:
collection = await self.get_collection(collection_suffix) collection = await self.get_collection(collection_suffix)
await collection.create_index(keys, unique=unique) await collection.create_index(keys, unique=unique)
utils.logger.info(f"[MongoDBStoreBase.create_index] Created index on {self.collection_prefix}_{collection_suffix}") utils.logger.info(f"[MongoDBStoreBase] Index created on {self.collection_prefix}_{collection_suffix}")
except Exception as e: except Exception as e:
utils.logger.error(f"[MongoDBStoreBase.create_index] Failed to create index: {e}") utils.logger.error(f"[MongoDBStoreBase] Create index failed: {e}")

View File

@@ -31,7 +31,7 @@ from database.models import BilibiliVideoComment, BilibiliVideo, BilibiliUpInfo,
from tools.async_file_writer import AsyncFileWriter from tools.async_file_writer import AsyncFileWriter
from tools import utils, words from tools import utils, words
from var import crawler_type_var from var import crawler_type_var
from store.mongodb_store_base import MongoDBStoreBase from database.mongodb_store_base import MongoDBStoreBase
class BiliCsvStoreImplement(AbstractStore): class BiliCsvStoreImplement(AbstractStore):

View File

@@ -28,7 +28,7 @@ from database.models import DouyinAweme, DouyinAwemeComment, DyCreator
from tools import utils, words from tools import utils, words
from tools.async_file_writer import AsyncFileWriter from tools.async_file_writer import AsyncFileWriter
from var import crawler_type_var from var import crawler_type_var
from store.mongodb_store_base import MongoDBStoreBase from database.mongodb_store_base import MongoDBStoreBase
class DouyinCsvStoreImplement(AbstractStore): class DouyinCsvStoreImplement(AbstractStore):

View File

@@ -30,7 +30,7 @@ from database.db_session import get_session
from database.models import KuaishouVideo, KuaishouVideoComment from database.models import KuaishouVideo, KuaishouVideoComment
from tools import utils, words from tools import utils, words
from var import crawler_type_var from var import crawler_type_var
from store.mongodb_store_base import MongoDBStoreBase from database.mongodb_store_base import MongoDBStoreBase
def calculate_number_of_files(file_store_path: str) -> int: def calculate_number_of_files(file_store_path: str) -> int:

View File

@@ -31,7 +31,7 @@ from tools import utils, words
from database.db_session import get_session from database.db_session import get_session
from var import crawler_type_var from var import crawler_type_var
from tools.async_file_writer import AsyncFileWriter from tools.async_file_writer import AsyncFileWriter
from store.mongodb_store_base import MongoDBStoreBase from database.mongodb_store_base import MongoDBStoreBase
def calculate_number_of_files(file_store_path: str) -> int: def calculate_number_of_files(file_store_path: str) -> int:

View File

@@ -31,7 +31,7 @@ from tools import utils, words
from tools.async_file_writer import AsyncFileWriter from tools.async_file_writer import AsyncFileWriter
from database.db_session import get_session from database.db_session import get_session
from var import crawler_type_var from var import crawler_type_var
from store.mongodb_store_base import MongoDBStoreBase from database.mongodb_store_base import MongoDBStoreBase
def calculate_number_of_files(file_store_path: str) -> int: def calculate_number_of_files(file_store_path: str) -> int:

View File

@@ -18,7 +18,7 @@ from database.models import XhsNote, XhsNoteComment, XhsCreator
from tools.async_file_writer import AsyncFileWriter from tools.async_file_writer import AsyncFileWriter
from tools.time_util import get_current_timestamp from tools.time_util import get_current_timestamp
from var import crawler_type_var from var import crawler_type_var
from store.mongodb_store_base import MongoDBStoreBase from database.mongodb_store_base import MongoDBStoreBase
from tools import utils from tools import utils
class XhsCsvStoreImplement(AbstractStore): class XhsCsvStoreImplement(AbstractStore):

View File

@@ -31,7 +31,7 @@ from database.models import ZhihuContent, ZhihuComment, ZhihuCreator
from tools import utils, words from tools import utils, words
from var import crawler_type_var from var import crawler_type_var
from tools.async_file_writer import AsyncFileWriter from tools.async_file_writer import AsyncFileWriter
from store.mongodb_store_base import MongoDBStoreBase from database.mongodb_store_base import MongoDBStoreBase
def calculate_number_of_files(file_store_path: str) -> int: def calculate_number_of_files(file_store_path: str) -> int:
"""计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中 """计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中