Files
YYeTsBot/yyetsweb/Mongo.py

998 lines
40 KiB
Python
Raw Normal View History

2021-06-17 10:39:47 +08:00
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - mongodb.py
# 6/16/21 21:18
#
__author__ = "Benny <benny.think@gmail.com>"
2021-08-15 12:28:19 +08:00
import base64
import contextlib
2021-08-15 12:28:19 +08:00
import json
2021-07-11 15:53:39 +08:00
import logging
2021-06-17 10:39:47 +08:00
import os
2021-07-11 15:53:39 +08:00
import pathlib
2021-08-15 12:28:19 +08:00
import random
2021-07-11 15:53:39 +08:00
import re
import sys
2021-06-17 10:39:47 +08:00
import time
2021-08-15 12:28:19 +08:00
import uuid
2021-07-11 15:53:39 +08:00
from datetime import date, timedelta
2021-06-17 10:39:47 +08:00
from http import HTTPStatus
2021-07-10 23:28:44 +08:00
from urllib.parse import unquote
2021-06-17 10:39:47 +08:00
2021-07-11 15:53:39 +08:00
import pymongo
2021-06-17 10:39:47 +08:00
import requests
2021-07-11 15:53:39 +08:00
from bs4 import BeautifulSoup
from bson.objectid import ObjectId
2021-06-17 10:39:47 +08:00
from passlib.handlers.pbkdf2 import pbkdf2_sha256
2021-07-13 20:46:38 +08:00
from retry import retry
2021-08-15 12:28:19 +08:00
from tqdm import tqdm
2021-06-17 10:39:47 +08:00
2021-07-11 15:53:39 +08:00
from database import (AnnouncementResource, BlacklistResource, CaptchaResource,
2021-08-15 12:28:19 +08:00
CategoryResource, CommentChildResource,
CommentNewestResource, CommentReactionResource,
2021-07-24 13:45:57 +08:00
CommentResource, DoubanReportResource, DoubanResource,
2021-08-15 12:28:19 +08:00
GrafanaQueryResource, LikeResource, MetricsResource,
NameResource, NotificationResource, OtherResource, Redis,
ResourceLatestResource, ResourceResource, TopResource,
UserEmailResource, UserResource)
from utils import send_mail, ts_date
lib_path = pathlib.Path(__file__).parent.parent.joinpath("yyetsbot").resolve().as_posix()
sys.path.append(lib_path)
2021-07-31 17:49:17 +08:00
from fansub import BD2020, XL720, NewzmzOnline, ZhuixinfanOnline, ZimuxiaOnline
2021-06-17 10:39:47 +08:00
mongo_host = os.getenv("mongo") or "localhost"
2021-07-10 23:28:44 +08:00
DOUBAN_SEARCH = "https://www.douban.com/search?cat=1002&q={}"
DOUBAN_DETAIL = "https://movie.douban.com/subject/{}/"
2021-06-17 10:39:47 +08:00
class Mongo:
def __init__(self):
self.client = pymongo.MongoClient(host=mongo_host, connect=False,
connectTimeoutMS=5000, serverSelectionTimeoutMS=5000)
self.db = self.client["zimuzu"]
def __del__(self):
self.client.close()
def is_admin(self, username: str) -> bool:
data = self.db["users"].find_one({"username": username, "group": {"$in": ["admin"]}})
if data:
return True
class FakeMongoResource:
pass
class OtherMongoResource(OtherResource, Mongo):
def reset_top(self):
# before resetting, save top data to history
json_data = requests.get("http://127.0.0.1:8888/api/top").json()
last_month = time.strftime("%Y-%m", time.localtime(time.time() - 3600 * 24))
json_data["date"] = last_month
json_data["type"] = "top"
self.db["history"].insert_one(json_data)
# save all the views data to history
projection = {'_id': False, 'data.info.views': True, 'data.info.id': True}
data = self.db['yyets'].find({}, projection).sort("data.info.views", pymongo.DESCENDING)
result = {"date": last_month, "type": "detail"}
for datum in data:
rid = str(datum["data"]["info"]["id"])
views = datum["data"]["info"]["views"]
result[rid] = views
self.db["history"].insert_one(result)
# reset
self.db["yyets"].update_many({}, {"$set": {"data.info.views": 0}})
class AnnouncementMongoResource(AnnouncementResource, Mongo):
def get_announcement(self, page: int, size: int) -> dict:
condition = {}
count = self.db["announcement"].count_documents(condition)
2021-07-11 16:56:07 +08:00
data = self.db["announcement"].find(condition, projection={"_id": True, "ip": False}) \
2021-06-17 10:39:47 +08:00
.sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size)
2021-07-11 16:56:07 +08:00
data = list(data)
for i in data:
i["id"] = str(i["_id"])
i.pop("_id")
2021-06-17 10:39:47 +08:00
return {
2021-07-11 16:56:07 +08:00
"data": data,
2021-06-17 10:39:47 +08:00
"count": count,
}
def add_announcement(self, username, content, ip, browser):
construct = {
"username": username,
"ip": ip,
"date": ts_date(),
"browser": browser,
"content": content,
}
self.db["announcement"].insert_one(construct)
class BlacklistMongoResource(BlacklistResource):
def get_black_list(self):
keys = self.r.keys("*")
result = {}
for key in keys:
count = self.r.get(key)
ttl = self.r.ttl(key)
if ttl != -1:
result[key] = dict(count=count, ttl=ttl)
return result
class CommentMongoResource(CommentResource, Mongo):
def __init__(self):
super().__init__()
2021-06-22 20:01:10 +08:00
self.inner_page = 1
self.inner_size = 5
self.projection = {"ip": False, "parent_id": False}
2021-06-22 20:01:10 +08:00
@staticmethod
def convert_objectid(data):
# change _id to id, remove _id
2021-06-17 10:39:47 +08:00
for item in data:
item["id"] = str(item["_id"])
item.pop("_id")
2021-06-22 20:01:10 +08:00
for child in item.get("children", []):
child["id"] = str(child["_id"])
child.pop("_id")
def find_children(self, parent_data):
for item in parent_data:
children_ids = item.get("children", [])
condition = {"_id": {"$in": children_ids}, "deleted_at": {"$exists": False}, "type": "child"}
children_count = self.db["comment"].count_documents(condition)
2021-06-22 20:01:10 +08:00
children_data = self.db["comment"].find(condition, self.projection) \
.sort("_id", pymongo.DESCENDING).limit(self.inner_size).skip((self.inner_page - 1) * self.inner_size)
children_data = list(children_data)
self.get_user_group(children_data)
2021-08-15 12:28:19 +08:00
self.add_reactions(children_data)
item["children"] = []
2021-06-22 20:01:10 +08:00
if children_data:
item["children"].extend(children_data)
item["childrenCount"] = children_count
else:
item["childrenCount"] = 0
2021-06-17 10:39:47 +08:00
def get_user_group(self, data):
for comment in data:
username = comment["username"]
user = self.db["users"].find_one({"username": username})
group = user.get("group", ["user"])
comment["group"] = group
2021-08-15 12:28:19 +08:00
def add_reactions(self, data):
for comment in data:
cid = comment.get("id") or comment.get("_id")
cid = str(cid)
reactions = self.db["reactions"].find_one({"comment_id": cid},
projection={"_id": False, "comment_id": False}) or {}
for verb, users in reactions.items():
if users:
comment.setdefault("reactions", []).append({"verb": verb, "users": users})
def get_comment(self, resource_id: int, page: int, size: int, **kwargs) -> dict:
2021-06-22 20:01:10 +08:00
self.inner_page = kwargs.get("inner_page", 1)
self.inner_size = kwargs.get("inner_size", 5)
condition = {"resource_id": resource_id, "deleted_at": {"$exists": False}, "type": {"$ne": "child"}}
2021-06-17 10:39:47 +08:00
count = self.db["comment"].count_documents(condition)
2021-06-22 20:01:10 +08:00
data = self.db["comment"].find(condition, self.projection) \
.sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size)
2021-06-22 20:01:10 +08:00
data = list(data)
self.find_children(data)
self.convert_objectid(data)
self.get_user_group(data)
2021-08-15 12:28:19 +08:00
self.add_reactions(data)
2021-06-17 10:39:47 +08:00
return {
2021-06-22 20:01:10 +08:00
"data": data,
2021-06-17 10:39:47 +08:00
"count": count,
"resource_id": resource_id
}
def add_comment(self, captcha: str, captcha_id: int, content: str, resource_id: int,
2021-06-22 20:01:10 +08:00
ip: str, username: str, browser: str, parent_comment_id=None) -> dict:
2021-06-17 10:39:47 +08:00
returned = {"status_code": 0, "message": ""}
verify_result = CaptchaResource().verify_code(captcha, captcha_id)
if not verify_result["status"]:
returned["status_code"] = HTTPStatus.BAD_REQUEST
returned["message"] = verify_result["message"]
2021-06-17 10:39:47 +08:00
return returned
exists = self.db["yyets"].find_one({"data.info.id": resource_id})
if not exists:
returned["status_code"] = HTTPStatus.NOT_FOUND
returned["message"] = "资源不存在"
return returned
2021-06-22 20:01:10 +08:00
if parent_comment_id:
exists = self.db["comment"].find_one({"_id": ObjectId(parent_comment_id)})
if not exists:
returned["status_code"] = HTTPStatus.NOT_FOUND
returned["message"] = "评论不存在"
return returned
basic_comment = {
2021-06-17 10:39:47 +08:00
"username": username,
"ip": ip,
"date": ts_date(),
"browser": browser,
"content": content,
"resource_id": resource_id
}
2021-06-22 20:01:10 +08:00
if parent_comment_id is None:
basic_comment["type"] = "parent"
else:
2021-06-22 20:01:10 +08:00
basic_comment["type"] = "child"
# 无论什么评论都要插入一个新的document
inserted_id: str = self.db["comment"].insert_one(basic_comment).inserted_id
if parent_comment_id is not None:
# 对父评论的子评论需要给父评论加children id
self.db["comment"].find_one_and_update({"_id": ObjectId(parent_comment_id)},
{"$push": {"children": inserted_id}}
)
self.db["comment"].find_one_and_update({"_id": ObjectId(inserted_id)},
{"$set": {"parent_id": ObjectId(parent_comment_id)}}
)
2021-06-17 10:39:47 +08:00
returned["status_code"] = HTTPStatus.CREATED
returned["message"] = "评论成功"
2021-08-15 12:28:19 +08:00
# notification
if parent_comment_id:
# find username
self.db["notification"].find_one_and_update(
{"username": exists["username"]},
{"$push": {"unread": inserted_id}},
upsert=True
)
# send email
# TODO unsubscribe
parent_comment = self.db["comment"].find_one({"_id": ObjectId(parent_comment_id)})
if resource_id == 233:
link = f"https://yyets.dmesg.app/discuss#{parent_comment_id}"
else:
link = f"https://yyets.dmesg.app/resource?id={resource_id}#{parent_comment_id}"
user_info = self.db["users"].find_one({"username": parent_comment["username"], "email.verified": True})
if user_info:
subject = "[人人影视下载分享站] 你的评论有了新的回复"
pt_content = content.split("</reply>")[-1]
body = f"{username} 您好,<br>你的评论 {parent_comment['content']} 有了新的回复:<br>{pt_content}" \
f"<br>你可以<a href='{link}'>点此链接</a>查看<br><br>请勿回复此邮件"
send_mail(user_info["email"]["address"], subject, body)
2021-06-17 10:39:47 +08:00
return returned
2021-06-22 20:01:10 +08:00
def delete_comment(self, comment_id):
2021-06-17 10:39:47 +08:00
current_time = ts_date()
2021-06-22 20:01:10 +08:00
count = self.db["comment"].update_one({"_id": ObjectId(comment_id), "deleted_at": {"$exists": False}},
{"$set": {"deleted_at": current_time}}).modified_count
# 找到子评论,全部标记删除
parent_data = self.db["comment"].find_one({"_id": ObjectId(comment_id)})
if parent_data:
child_ids = parent_data.get("children", [])
else:
2021-06-22 20:01:10 +08:00
child_ids = []
count += self.db["comment"].update_many({"_id": {"$in": child_ids}, "deleted_at": {"$exists": False}},
{"$set": {"deleted_at": current_time}}).modified_count
2021-06-17 10:39:47 +08:00
returned = {"status_code": 0, "message": "", "count": -1}
if count == 0:
returned["status_code"] = HTTPStatus.NOT_FOUND
returned["count"] = 0
else:
returned["status_code"] = HTTPStatus.OK
returned["count"] = count
return returned
2021-07-26 20:24:32 +08:00
2021-08-15 12:28:19 +08:00
class CommentReactionMongoResource(CommentReactionResource, Mongo):
2021-07-26 20:24:32 +08:00
2021-08-15 12:28:19 +08:00
def react_comment(self, username, data):
# {"comment_id":"da23","😊":["user1","user2"]}
comment_id = data["comment_id"]
verb = data["verb"]
method = data["method"]
2021-07-26 20:24:32 +08:00
if not self.db["comment"].find_one({"_id": ObjectId(comment_id)}):
return {"status": False, "message": "Where is your comments?", "status_code": HTTPStatus.NOT_FOUND}
2021-08-15 12:28:19 +08:00
if method == "POST":
self.db["reactions"].update_one({"comment_id": comment_id},
{
"$addToSet": {verb: username}
},
upsert=True
)
code = HTTPStatus.CREATED
elif method == "DELETE":
self.db["reactions"].update_one({"comment_id": comment_id},
{
"$pull": {verb: username}
}
)
code = HTTPStatus.ACCEPTED
else:
code = HTTPStatus.BAD_REQUEST
2021-07-26 20:24:32 +08:00
return {"status": True, "message": "success",
2021-08-15 12:28:19 +08:00
"status_code": code}
2021-07-26 20:24:32 +08:00
2021-06-17 10:39:47 +08:00
class CommentChildMongoResource(CommentChildResource, CommentMongoResource, Mongo):
def __init__(self):
super().__init__()
self.page = 1
self.size = 5
self.projection = {"ip": False, "parent_id": False}
def get_comment(self, parent_id: str, page: int, size: int) -> dict:
condition = {"parent_id": ObjectId(parent_id), "deleted_at": {"$exists": False}, "type": "child"}
count = self.db["comment"].count_documents(condition)
data = self.db["comment"].find(condition, self.projection) \
.sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size)
data = list(data)
self.convert_objectid(data)
self.get_user_group(data)
return {
"data": data,
"count": count,
}
2021-07-11 11:37:45 +08:00
class CommentNewestMongoResource(CommentNewestResource, CommentMongoResource, Mongo):
def __init__(self):
super().__init__()
self.page = 1
self.size = 5
self.projection = {"ip": False, "parent_id": False, "children": False}
self.condition = {"deleted_at": {"$exists": False}}
def get_comment(self, page: int, size: int) -> dict:
# ID时间用户名用户组资源名资源id
condition = {"deleted_at": {"$exists": False}}
count = self.db["comment"].count_documents(condition)
data = self.db["comment"].find(condition, self.projection) \
.sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size)
data = list(data)
self.convert_objectid(data)
self.get_user_group(data)
for i in data:
resource_id = i.get("resource_id", 233)
res = self.db["yyets"].find_one({"data.info.id": resource_id})
i["cnname"] = res["data"]["info"]["cnname"]
return {
"data": data,
"count": count,
}
2021-06-17 10:39:47 +08:00
class GrafanaQueryMongoResource(GrafanaQueryResource, Mongo):
def get_grafana_data(self, date_series) -> str:
condition = {"date": {"$in": date_series}}
projection = {"_id": False}
return self.db["metrics"].find(condition, projection)
class MetricsMongoResource(MetricsResource, Mongo):
def set_metrics(self, metrics_type: str):
today = time.strftime("%Y-%m-%d", time.localtime())
self.db['metrics'].update_one(
{'date': today}, {'$inc': {metrics_type: 1}},
upsert=True
)
def get_metrics(self, from_date: str, to_date: str) -> dict:
start_int = [int(i) for i in from_date.split("-")]
end_int = [int(i) for i in to_date.split("-")]
sdate = date(*start_int) # start date
edate = date(*end_int) # end date
date_range = [str(sdate + timedelta(days=x)) for x in range((edate - sdate).days + 1)]
condition = {"date": {"$in": date_range}}
result = self.db['metrics'].find(condition, {'_id': False}).sort("date", pymongo.DESCENDING)
return dict(metrics=list(result))
class NameMongoResource(NameResource, Mongo):
def get_names(self, is_readable: [str, bool]) -> dict:
if is_readable:
aggregation = [
{
"$project": {
"name": {
"$concat": [
"$data.info.area",
"$data.info.channel_cn",
": ",
"$data.info.cnname",
" ",
"$data.info.enname",
" ",
"$data.info.aliasname"
]
},
"_id": False
}
}
]
query_cursor = self.db["yyets"].aggregate(aggregation)
else:
projection = {'_id': False,
'data.info.cnname': True,
'data.info.enname': True,
'data.info.aliasname': True,
'data.info.channel_cn': True,
}
query_cursor = self.db["yyets"].find({}, projection)
data = []
for i in query_cursor:
data.extend(i.values())
return dict(data=data)
class ResourceMongoResource(ResourceResource, Mongo):
redis = Redis().r
def fansub_search(self, class_name: str, kw: str):
class_ = globals().get(class_name)
result = class_().search_preview(kw)
result.pop("class")
if result:
2021-07-02 20:41:52 +08:00
return list(result.values())
else:
return []
2021-06-17 10:39:47 +08:00
def get_resource_data(self, resource_id: int, username: str) -> dict:
data = self.db["yyets"].find_one_and_update(
{"data.info.id": resource_id},
{'$inc': {'data.info.views': 1}},
{'_id': False})
if username:
user_like_data = self.db["users"].find_one({"username": username})
if user_like_data and resource_id in user_like_data.get("like", []):
data["is_like"] = True
else:
data["is_like"] = False
return data
def search_resource(self, keyword: str) -> dict:
projection = {'_id': False,
'data.info': True,
}
data = self.db["yyets"].find({
"$or": [
{"data.info.cnname": {'$regex': f'.*{keyword}.*', "$options": "-i"}},
{"data.info.enname": {'$regex': f'.*{keyword}.*', "$options": "-i"}},
{"data.info.aliasname": {'$regex': f'.*{keyword}.*', "$options": "-i"}},
]},
projection
)
data = list(data)
returned = {}
if data:
returned = dict(data=data)
returned["extra"] = []
else:
extra = self.fansub_search(ZimuxiaOnline.__name__, keyword) or \
self.fansub_search(NewzmzOnline.__name__, keyword) or \
self.fansub_search(ZhuixinfanOnline.__name__, keyword) or \
2021-07-31 17:49:17 +08:00
self.fansub_search(XL720.__name__, keyword) or \
self.fansub_search(BD2020.__name__, keyword)
returned["data"] = []
returned["extra"] = extra
return returned
2021-06-17 10:39:47 +08:00
2021-08-15 12:28:19 +08:00
def patch_resource(self, new_data: dict):
rid = new_data["resource_id"]
new_data.pop("resource_id")
old_data = self.db["yyets"].find_one(
{"data.info.id": rid},
)
new_data["season_cn"] = self.convert_season(new_data["season_num"])
# 1. totally empty resource:
if len(old_data["data"]["list"]) == 0:
new_data["season_cn"] = self.convert_season(new_data["season_num"])
old_data["data"]["list"].append(new_data)
else:
for season in old_data["data"]["list"]:
if new_data["season_num"] in [season["season_num"], int(season["season_num"])]:
user_format = new_data["formats"][0]
for u in new_data["items"][user_format]:
season["items"][user_format].append(u)
self.db["yyets"].find_one_and_replace(
{"data.info.id": rid},
old_data
)
def add_resource(self, new_data: dict):
rid = self.get_appropriate_id()
new_data["data"]["info"]["id"] = rid
self.db["yyets"].insert_one(new_data)
return {"status": True, "message": "success", "id": rid}
def delete_resource(self, data: dict):
rid = data["resource_id"]
meta = data.get("meta")
if meta:
db_data = self.db["yyets"].find_one({"data.info.id": rid})
for season in db_data["data"]["list"]:
for episode in season["items"].values():
for v in episode:
if v["episode"] == meta["episode"] and v["name"] == meta["name"] and \
v["size"] == meta["size"] and v["dateline"] == meta["dateline"]:
episode.remove(v)
# replace it
self.db["yyets"].find_one_and_replace({"data.info.id": rid}, db_data)
else:
self.db["yyets"].delete_one({"data.info.id": rid})
def get_appropriate_id(self):
col = self.db["yyets"]
random_id = random.randint(50000, 80000)
data = col.find_one({"data.info.id": random_id}, projection={"_id": True})
if data:
return self.get_appropriate_id()
else:
return random_id
@staticmethod
def convert_season(number: [int, str]):
pass
if number in (0, "0"):
return "正片"
else:
return f"{number}"
2021-06-17 10:39:47 +08:00
class TopMongoResource(TopResource, Mongo):
projection = {'_id': False, 'data.info': True}
def get_most(self) -> list:
projection = {"_id": False, "like": True}
data = self.db['users'].find({}, projection)
most_like = {}
for item in data:
for _id in item.get("like", []):
most_like[_id] = most_like.get(_id, 0) + 1
most = sorted(most_like, key=most_like.get)
most.reverse()
most_like_data = self.db["yyets"].find({"data.info.id": {"$in": most}}, self.projection).limit(15)
return list(most_like_data)
def get_top_resource(self) -> dict:
area_dict = dict(ALL={"$regex": ".*"}, US="美国", JP="日本", KR="韩国", UK="英国")
all_data = {}
for abbr, area in area_dict.items():
data = self.db["yyets"].find({"data.info.area": area, "data.info.id": {"$ne": 233}}, self.projection). \
2021-06-17 10:39:47 +08:00
sort("data.info.views", pymongo.DESCENDING).limit(15)
all_data[abbr] = list(data)
area_dict["ALL"] = "全部"
all_data["class"] = area_dict
return all_data
2021-08-15 12:28:19 +08:00
class LikeMongoResource(LikeResource, Mongo):
2021-06-17 10:39:47 +08:00
projection = {'_id': False, 'data.info': True}
def get_user_like(self, username: str) -> list:
like_list = self.db["users"].find_one({"username": username}).get("like", [])
data = self.db["yyets"].find({"data.info.id": {"$in": like_list}}, self.projection) \
.sort("data.info.views", pymongo.DESCENDING)
return list(data)
2021-08-15 12:28:19 +08:00
def add_remove_fav(self, resource_id: int, username: str) -> dict:
returned = {"status_code": 0, "message": ""}
like_list: list = self.db["users"].find_one({"username": username}).get("like", [])
if resource_id in like_list:
returned["status_code"] = HTTPStatus.OK
returned["message"] = "已取消收藏"
like_list.remove(resource_id)
else:
returned["status_code"] = HTTPStatus.CREATED
returned["message"] = "已添加收藏"
like_list.append(resource_id)
value = dict(like=like_list)
self.db["users"].update_one({"username": username}, {'$set': value})
return returned
2021-06-17 10:39:47 +08:00
class UserMongoResource(UserResource, Mongo):
2021-08-15 12:28:19 +08:00
def login_user(self, username: str, password: str, captcha: str, captcha_id: str, ip: str, browser: str) -> dict:
# verify captcha in the first place.
redis = Redis().r
correct_captcha = redis.get(captcha_id)
if correct_captcha is None:
return {"status_code": HTTPStatus.BAD_REQUEST, "message": "验证码已过期", "status": False}
elif correct_captcha.lower() == captcha.lower():
redis.expire(captcha_id, 0)
else:
return {"status_code": HTTPStatus.FORBIDDEN, "message": "验证码错误", "status": False}
# check user account is locked.
data = self.db["users"].find_one({"username": username}) or {}
if data.get("status", {}).get("disable"):
return {"status_code": HTTPStatus.FORBIDDEN,
"status": False,
"message": data.get("status", {}).get("reason")}
2021-06-17 10:39:47 +08:00
returned_value = {"status_code": 0, "message": ""}
if data:
# try to login
stored_password = data["password"]
if pbkdf2_sha256.verify(password, stored_password):
returned_value["status_code"] = HTTPStatus.OK
else:
returned_value["status_code"] = HTTPStatus.FORBIDDEN
returned_value["message"] = "用户名或密码错误"
else:
2021-08-15 12:28:19 +08:00
# register
2021-06-17 10:39:47 +08:00
hash_value = pbkdf2_sha256.hash(password)
try:
self.db["users"].insert_one(dict(username=username, password=hash_value,
date=ts_date(), ip=ip, browser=browser)
)
returned_value["status_code"] = HTTPStatus.CREATED
except Exception as e:
returned_value["status_code"] = HTTPStatus.INTERNAL_SERVER_ERROR
returned_value["message"] = str(e)
2021-08-15 12:28:19 +08:00
returned_value["username"] = data.get("username")
returned_value["group"] = data.get("group", ["user"])
2021-06-17 10:39:47 +08:00
return returned_value
def get_user_info(self, username: str) -> dict:
projection = {"_id": False, "password": False}
data = self.db["users"].find_one({"username": username}, projection)
2021-08-15 12:28:19 +08:00
data.update(group=data.get("group", ["user"]))
2021-06-17 10:39:47 +08:00
return data
def update_user_last(self, username: str, now_ip: str) -> None:
self.db["users"].update_one({"username": username},
{"$set": {"lastDate": (ts_date()), "lastIP": now_ip}}
2021-06-17 10:39:47 +08:00
)
2021-07-10 23:28:44 +08:00
2021-08-15 12:28:19 +08:00
def update_user_info(self, username: str, data: dict) -> dict:
redis = Redis().r
valid_fields = ["email"]
valid_data = {}
for field in valid_fields:
if data.get(field):
valid_data[field] = data[field]
if valid_data.get("email") and not re.findall(r"\S@\S", valid_data.get("email")):
return {"status_code": HTTPStatus.BAD_REQUEST, "status": False, "message": "email format error "}
elif valid_data.get("email"):
# rate limit
user_email = valid_data.get("email")
timeout_key = f"timeout-{user_email}"
if redis.get(timeout_key):
return {"status_code": HTTPStatus.TOO_MANY_REQUESTS,
"status": False,
"message": f"try again in {redis.ttl(timeout_key)}s"}
verify_code = random.randint(10000, 99999)
valid_data["email"] = {"verified": False, "address": user_email}
# send email confirm
subject = "[人人影视下载分享站] 请验证你的邮箱"
body = f"{username} 您好,<br>请输入如下验证码完成你的邮箱认证。验证码有效期为24小时。<br>" \
f"如果您未有此请求,请忽略此邮件。<br><br>验证码: {verify_code}"
redis.set(timeout_key, username, ex=1800)
redis.hset(user_email, mapping={"code": verify_code, "wrong": 0})
redis.expire(user_email, 24 * 3600)
send_mail(user_email, subject, body)
self.db["users"].update_one(
{"username": username},
{"$set": valid_data}
)
return {"status_code": HTTPStatus.CREATED, "status": True, "message": "success"}
2021-07-10 23:28:44 +08:00
class DoubanMongoResource(DoubanResource, Mongo):
def get_douban_data(self, rid: int) -> dict:
2021-07-24 20:09:50 +08:00
with contextlib.suppress(Exception):
return self.find_douban(rid)
return {"posterData": None}
2021-07-10 23:28:44 +08:00
def get_douban_image(self, rid: int) -> bytes:
db_data = self.get_douban_data(rid)
return db_data["posterData"]
2021-07-10 23:28:44 +08:00
2021-07-13 20:46:38 +08:00
@retry(IndexError, tries=3, delay=5)
2021-07-10 23:28:44 +08:00
def find_douban(self, resource_id: int):
session = requests.Session()
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
session.headers.update({"User-Agent": ua})
douban_col = self.db["douban"]
yyets_col = self.db["yyets"]
data = douban_col.find_one({"resourceId": resource_id}, {"_id": False, "raw": False})
2021-07-10 23:28:44 +08:00
if data:
logging.info("Existing data for %s", resource_id)
return data
2021-07-24 20:09:50 +08:00
# data not found, craw from douban
2021-07-10 23:28:44 +08:00
projection = {"data.info.cnname": True, "data.info.enname": True, "data.info.aliasname": True}
names = yyets_col.find_one({"data.info.id": resource_id}, projection=projection)
if names is None:
return {}
cname = names["data"]["info"]["cnname"]
logging.info("cnname for douban is %s", cname)
search_html = session.get(DOUBAN_SEARCH.format(cname)).text
logging.info("Analysis search html...length %s", len(search_html))
2021-07-10 23:28:44 +08:00
soup = BeautifulSoup(search_html, 'html.parser')
douban_item = soup.find_all("div", class_="content")
fwd_link = unquote(douban_item[0].a["href"])
douban_id = re.findall(r"https://movie.douban.com/subject/(\d*)/&query=", fwd_link)[0]
final_data = self.get_craw_data(cname, douban_id, resource_id, search_html, session)
douban_col.insert_one(final_data.copy())
final_data.pop("raw")
return final_data
2021-07-10 23:28:44 +08:00
@staticmethod
def get_craw_data(cname, douban_id, resource_id, search_html, session):
detail_link = DOUBAN_DETAIL.format(douban_id)
2021-07-10 23:28:44 +08:00
detail_html = session.get(detail_link).text
logging.info("Analysis detail html...%s", detail_link)
soup = BeautifulSoup(detail_html, 'html.parser')
directors = [i.text for i in (soup.find_all("a", rel="v:directedBy"))]
2021-07-12 20:59:16 +08:00
release_date = poster_image_link = rating = year_text = intro = writers = episode_count = episode_duration = ""
with contextlib.suppress(IndexError):
episode_duration = soup.find_all("span", property="v:runtime")[0].text
for i in soup.find_all("span", class_="pl"):
if i.text == "编剧":
writers = re.sub(r"\s", "", list(i.next_siblings)[1].text).split("/")
if i.text == "集数:":
episode_count = str(i.nextSibling)
if i.text == "单集片长:" and not episode_duration:
episode_duration = str(i.nextSibling)
actors = [i.text for i in soup.find_all("a", rel="v:starring")]
genre = [i.text for i in soup.find_all("span", property="v:genre")]
2021-07-12 20:59:16 +08:00
with contextlib.suppress(IndexError):
release_date = soup.find_all("span", property="v:initialReleaseDate")[0].text
with contextlib.suppress(IndexError):
poster_image_link = soup.find_all("div", id="mainpic")[0].a.img["src"]
with contextlib.suppress(IndexError):
rating = soup.find_all("strong", class_="ll rating_num")[0].text
with contextlib.suppress(IndexError):
year_text = re.sub(r"[()]", "", soup.find_all("span", class_="year")[0].text)
with contextlib.suppress(IndexError):
intro = re.sub(r"\s", "", soup.find_all("span", property="v:summary")[0].text)
2021-07-10 23:28:44 +08:00
final_data = {
"name": cname,
"raw": {
"search_url": DOUBAN_SEARCH.format(cname),
"detail_url": detail_link,
"search_html": search_html,
"detail_html": detail_html
},
"doubanId": int(douban_id),
"doubanLink": detail_link,
"posterLink": poster_image_link,
"posterData": session.get(poster_image_link).content,
"resourceId": resource_id,
2021-07-10 23:28:44 +08:00
"rating": rating,
"actors": actors,
"directors": directors,
"genre": genre,
"releaseDate": release_date,
"episodeCount": episode_count,
"episodeDuration": episode_duration,
"writers": writers,
2021-07-10 23:28:44 +08:00
"year": year_text,
"introduction": intro
}
return final_data
2021-07-18 11:51:39 +08:00
class DoubanReportMongoResource(DoubanReportResource, Mongo):
def get_error(self) -> dict:
return dict(data=list(self.db["douban_error"].find(projection={"_id": False})))
def report_error(self, captcha: str, captcha_id: int, content: str, resource_id: int) -> dict:
returned = {"status_code": 0, "message": ""}
verify_result = CaptchaResource().verify_code(captcha, captcha_id)
if not verify_result["status"]:
returned["status_code"] = HTTPStatus.BAD_REQUEST
returned["message"] = verify_result["message"]
return returned
count = self.db["douban_error"].update_one(
{"resource_id": resource_id},
{"$push": {"content": content}}, upsert=True).matched_count
return dict(count=count)
2021-08-15 12:28:19 +08:00
class NotificationMongoResource(NotificationResource, Mongo):
def get_notification(self, username, page, size):
# .sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size)
notify = self.db["notification"].find_one({"username": username}, projection={"_id": False})
if not notify:
return {}
# size is shared
unread = notify.get("unread", [])
id_list = []
for item in unread[(page - 1) * size:size * page]:
id_list.append(item)
notify["unread_item"] = self.get_content(id_list)
size = size - len(unread)
read = notify.get("read", [])
id_list = []
for item in read[(page - 1) * size:size * page]:
id_list.append(item)
notify["read_item"] = self.get_content(id_list)
notify.pop("unread", None)
notify.pop("read", None)
notify["unread_count"] = len(unread)
notify["read_count"] = len(read)
return notify
def get_content(self, id_list):
comments = self.db["comment"].find({"_id": {"$in": id_list}},
projection={"ip": False, "parent_id": False}
).sort("_id", pymongo.DESCENDING)
comments = list(comments)
for comment in comments:
comment["id"] = str(comment["_id"])
comment.pop("_id")
reply_to_id = re.findall(r'"(.*)"', comment["content"])[0]
rtc = self.db["comment"].find_one({"_id": ObjectId(reply_to_id)},
projection={"content": True, "_id": False})
comment["reply_to_content"] = rtc["content"]
return comments
def update_notification(self, username, verb, comment_id):
if verb == "read":
v1, v2 = "read", "unread"
else:
v1, v2 = "unread", "read"
self.db["notification"].find_one_and_update(
{"username": username},
{
"$push": {v1: ObjectId(comment_id)},
"$pull": {v2: ObjectId(comment_id)}
}
)
return {}
class UserEmailMongoResource(UserEmailResource, Mongo):
def verify_email(self, username, code):
r = Redis().r
email = self.db["users"].find_one({"username": username})["email"]["address"]
verify_data = r.hgetall(email)
wrong_count = int(verify_data["wrong"])
MAX = 10
if wrong_count >= MAX:
self.db["users"].update_one({"username": username},
{"$set": {"status": {"disable": True, "reason": "verify email crack"}}}
)
return {"status": False, "status_code": HTTPStatus.FORBIDDEN, "message": "Account locked. Please stay away"}
correct_code = verify_data["code"]
if correct_code == code:
r.expire(email, 0)
r.expire(f"timeout-{email}", 0)
self.db["users"].update_one({"username": username},
{"$set": {"email.verified": True}}
)
return {"status": True, "status_code": HTTPStatus.CREATED, "message": "success"}
else:
r.hset(email, "wrong", wrong_count + 1)
return {"status": False,
"status_code": HTTPStatus.FORBIDDEN,
"message": f"verification code is incorrect. You have {MAX - wrong_count} attempts remaining"}
class CategoryMongoResource(CategoryResource, Mongo):
def get_category(self, query: dict):
page, size, douban = query["page"], query["size"], query["douban"]
query.pop("page")
query.pop("size")
query.pop("douban")
query_dict = {}
for key, value in query.items():
query_dict[f"data.info.{key}"] = value
logging.info("Query dict %s", query_dict)
projection = {"_id": False, "data.list": False}
data = self.db["yyets"].find(query_dict, projection=projection).limit(size).skip((page - 1) * size)
count = self.db["yyets"].count_documents(query_dict)
f = []
for item in data:
if douban:
douban_data = self.db["douban"].find_one({"resourceId": item["data"]["info"]["id"]},
projection=projection)
if douban_data:
douban_data["posterData"] = base64.b64encode(douban_data["posterData"]).decode("u8")
item["data"]["info"]["douban"] = douban_data
else:
item["data"]["info"]["douban"] = {}
f.append(item["data"]["info"])
return dict(data=f, count=count)
class ResourceLatestMongoResource(ResourceLatestResource, Mongo):
@staticmethod
def get_latest_resource() -> dict:
redis = Redis().r
key = "latest-resource"
latest = redis.get(key)
if latest:
logging.info("Cache hit for latest resource")
latest = json.loads(latest)
latest["data"] = latest["data"][:100]
else:
logging.warning("Cache miss for latest resource")
latest = ResourceLatestMongoResource().query_db()
redis.set(key, json.dumps(latest, ensure_ascii=False))
return latest
def query_db(self) -> dict:
col = self.db["yyets"]
projection = {"_id": False, "status": False, "info": False}
episode_data = {}
for res in tqdm(col.find(projection=projection), total=col.count()):
for season in res["data"]["list"]:
for item in season["items"].values():
for single in item:
ts = single["dateline"]
res_name = res["data"]["info"]["cnname"]
name = "{}-{}".format(res_name, single["name"])
size = single["size"]
episode_data[name] = {"timestamp": ts, "size": size, "resource_id": res["data"]["info"]["id"],
"res_name": res_name, "date": ts_date(int(ts))}
sorted_res: list = sorted(episode_data.items(), key=lambda x: x[1]["timestamp"], reverse=True)
limited_res = dict(sorted_res[:100])
ok = []
for k, v in limited_res.items():
t = {"name": k}
t.update(v)
ok.append(t)
return dict(data=ok)
def refresh_latest_resource(self):
redis = Redis().r
logging.info("Getting new resources...")
latest = self.query_db()
redis.set("latest-resource", json.dumps(latest, ensure_ascii=False))
logging.info("latest-resource data refreshed.")