删除 full text index 时的处理

This commit is contained in:
Benny
2023-03-14 21:32:39 +01:00
parent 4729420eef
commit 4310b7d429
5 changed files with 69 additions and 47 deletions

6
.gitignore vendored
View File

@@ -127,14 +127,13 @@ logs/*
/yyetsweb/yyets_sqlite.db
/yyetsweb/yyetsweb
/yyetsweb/assets.go
/yyetsweb/templates/static/*
/yyetsweb/templates/sponsor/*
/yyetsweb/templates/svg/*
/yyetsweb/templates/index.css
/yyetsweb/templates/logo*
/yyetsweb/templates/*.json
/yyetsweb/templates/*
/yyetsweb/templates/data/*
/yyetsweb/builds/
/builds/checksum-sha256sum.txt
@@ -144,3 +143,6 @@ logs/*
/env/yyets.env
!/docker-compose.override.yml
/docker-compose.override.yml
/yyetsweb/templates/dump/yyets_mongo.gz
/yyetsweb/templates/dump/yyets_mysql.zip
/yyetsweb/templates/dump/yyets_sqlite.zip

View File

@@ -16,7 +16,7 @@ tqdm==4.65.0
retry==0.9.2
pymysql==1.0.2
git+https://github.com/tgbot-collection/python-akismet
openpyxl==3.1.1
openpyxl==3.1.2
zhconv==1.4.3
jinja2==3.1.2
coloredlogs==15.0.1

View File

@@ -1319,10 +1319,12 @@ class SearchEngine(Base):
"data.info.id": 1,
"data.info.channel_cn": 1,
"data.info.channel": 1,
"_id": {"$toString": "$_id"},
"origin": "yyets",
}
douban_projection = {
"_id": 0,
"_id": {"$toString": "$_id"},
"id": "$resourceId",
"cnname": {"$first": "$resource.data.info.cnname"},
"enname": {"$first": "$resource.data.info.enname"},
@@ -1348,13 +1350,12 @@ class SearchEngine(Base):
"username": 1,
"date": 1,
"comment": "$content",
"_id": 0,
"commentID": {"$toString": "$_id"},
"origin": "comment",
"hasAvatar": {"$toBool": "$avatar"},
"resourceID": "$resource_id",
"resourceName": {"$first": "$resource.data.info.cnname"},
"id": {"$toString": "$_id"},
"_id": {"$toString": "$_id"},
}
comment_lookup = {
"from": "yyets",
@@ -1374,7 +1375,7 @@ class SearchEngine(Base):
return self.db["yyets"].aggregate(
[
{"$project": self.yyets_projection},
{"$replaceRoot": {"newRoot": {"$mergeObjects": [{"origin": "yyets"}, "$data.info"]}}},
{"$replaceRoot": {"newRoot": {"$mergeObjects": [{"origin": "yyets"}, "$data.info", {"_id": "$_id"}]}}},
]
)
@@ -1397,17 +1398,17 @@ class SearchEngine(Base):
def add_yyets(self):
logging.info("Adding yyets data to search engine")
data = list(self.__get_yyets())
self.yyets_index.add_documents(data)
self.yyets_index.add_documents(data, primary_key="_id")
def add_comment(self):
logging.info("Adding comment data to search engine")
data = list(self.__get_comment())
self.comment_index.add_documents(data, primary_key="commentID")
self.comment_index.add_documents(data, primary_key="_id")
def add_douban(self):
logging.info("Adding douban data to search engine")
data = list(self.__get_douban())
self.douban_index.add_documents(data)
self.douban_index.add_documents(data, primary_key="_id")
def search_yyets(self, keyword: "str"):
return self.yyets_index.search(keyword, {"matchingStrategy": "all"})["hits"]
@@ -1423,46 +1424,53 @@ class SearchEngine(Base):
self.add_yyets()
self.add_comment()
self.add_douban()
logging.info(f"Imported data to search engine in {time.time() - t0:.2f}s")
logging.info(f"Import data to search engine in {time.time() - t0:.2f}s")
def __monitor(self, col, fun):
cursor = self.db[col].watch()
for change in cursor:
op_type = change["operationType"]
_id = change["documentKey"]["_id"]
search_index = getattr(self, f"{col}_index")
logging.info("%s %s change stream for %s", col, op_type, _id)
if op_type == "delete":
search_index.delete_document(_id)
else:
data = fun(_id)
search_index.add_documents(data, primary_key="_id")
def monitor_yyets(self):
cursor = self.db.yyets.watch()
for change in cursor:
with contextlib.suppress(Exception):
key = change["documentKey"]["_id"]
data = self.db.yyets.find_one({"_id": key}, projection=self.yyets_projection)
index = data["data"]["info"]
logging.info("Updating yyets index: %s", index["cnname"])
self.yyets_index.add_documents([index])
def get_data(_id) -> list:
data = self.db.yyets.find_one({"_id": _id}, projection=self.yyets_projection)["data"]["info"]
data["_id"] = str(_id)
data["origin"] = "yyets"
return [data]
self.__monitor("yyets", get_data)
def monitor_douban(self):
cursor = self.db.douban.watch()
for change in cursor:
with contextlib.suppress(Exception):
key = change["documentKey"]["_id"]
def get_data(_id) -> list:
data = self.db.douban.aggregate(
[
{"$match": {"_id": key}},
{"$match": {"_id": _id}},
{"$lookup": self.douban_lookup},
{"$project": self.douban_projection},
]
)
data = next(data)
logging.info("Updating douban index: %s", data["name"])
self.douban_index.add_documents([data], primary_key="resourceId")
return list(data)
self.__monitor("douban", get_data)
def monitor_comment(self):
cursor = self.db.comment.watch()
for change in cursor:
with contextlib.suppress(Exception):
key = change["documentKey"]["_id"]
def get_data(_id) -> list:
data = self.db.comment.aggregate(
[
{"$match": {"_id": key}},
{"$match": {"_id": _id}},
{"$lookup": self.comment_lookup},
{"$project": self.comment_projection},
]
)
data = list(data)
logging.info("Updating comment index: %s", data[0]["commentID"])
self.comment_index.add_documents(data, primary_key="commentID")
return list(data)
self.__monitor("comment", get_data)

BIN
yyetsweb/gift.gzip Normal file

Binary file not shown.

View File

@@ -297,8 +297,20 @@ class ResourceHandler(BaseHandler):
return data
def make_some_fun(self):
referer = self.request.headers.get("referer")
if not referer and os.getenv("GIFT"):
self.set_header("Content-Type", "text/html")
self.set_header("Content-Encoding", "gzip")
with open("gift.gzip", "rb") as f:
return f.read()
@run_on_executor()
def search_resource(self):
if gift := self.make_some_fun():
logging.warning("Good luck to %s!", self.get_real_ip())
return gift
kw = self.get_query_argument("keyword").lower()
search_type = self.get_query_argument("type", "default")
self.set_header("search-engine", "Meilisearch" if os.getenv("MEILISEARCH") else "MongoDB")