refactor code

This commit is contained in:
Benny
2023-03-19 17:35:39 +01:00
committed by GitHub
parent 4310b7d429
commit 0979b8a4f5
43 changed files with 3288 additions and 3385 deletions

Submodule YYeTsFE updated: 5592638dd4...0b0b3669fc

View File

@@ -8,7 +8,7 @@ pymongo==4.3.3
tornado==6.2
captcha==0.4
passlib==1.7.4
fakeredis==2.10.0
fakeredis==2.10.1
pytz==2022.7.1
filetype==1.2.0
requests[socks]

View File

@@ -9,21 +9,19 @@ import os
import sys
from shutil import rmtree
from setuptools import find_packages, setup, Command
from setuptools import Command, setup
# Package meta-data.
NAME = 'yyets'
DESCRIPTION = 'https://yyets.dmesg.app/ wrapper'
URL = 'https://github.com/tgbot-collection/YYeTsBot'
EMAIL = 'benny.think@gmail.com'
AUTHOR = 'BennyThink'
REQUIRES_PYTHON = '>=3.6.0'
VERSION = '1.0.0'
NAME = "yyets"
DESCRIPTION = "https://yyets.dmesg.app/ wrapper"
URL = "https://github.com/tgbot-collection/YYeTsBot"
EMAIL = "benny.think@gmail.com"
AUTHOR = "BennyThink"
REQUIRES_PYTHON = ">=3.6.0"
VERSION = "1.0.0"
# What packages are required for this module to be executed?
REQUIRED = [
"requests"
]
REQUIRED = ["requests"]
# What packages are optional?
EXTRAS = {
@@ -40,8 +38,8 @@ here = os.path.abspath(os.path.dirname(__file__))
# Import the README and use it as the long-description.
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
try:
with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = '\n' + f.read()
with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
long_description = "\n" + f.read()
except FileNotFoundError:
long_description = DESCRIPTION
@@ -49,22 +47,22 @@ except FileNotFoundError:
about = {}
if not VERSION:
project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
with open(os.path.join(here, project_slug, '__version__.py')) as f:
with open(os.path.join(here, project_slug, "__version__.py")) as f:
exec(f.read(), about)
else:
about['__version__'] = VERSION
about["__version__"] = VERSION
class UploadCommand(Command):
"""Support setup.py upload."""
description = 'Build and publish the package.'
description = "Build and publish the package."
user_options = []
@staticmethod
def status(s):
"""Prints things in bold."""
print('\033[1m{0}\033[0m'.format(s))
print("\033[1m{0}\033[0m".format(s))
def initialize_options(self):
pass
@@ -74,20 +72,20 @@ class UploadCommand(Command):
def run(self):
try:
self.status('Removing previous builds…')
rmtree(os.path.join(here, 'dist'))
self.status("Removing previous builds…")
rmtree(os.path.join(here, "dist"))
except OSError:
pass
self.status('Building Source and Wheel (universal) distribution…')
os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
self.status("Building Source and Wheel (universal) distribution…")
os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
self.status('Uploading the package to PyPI via Twine…')
os.system('twine upload dist/*')
self.status("Uploading the package to PyPI via Twine…")
os.system("twine upload dist/*")
self.status('Pushing git tags…')
os.system('git tag v{0}'.format(about['__version__']))
os.system('git push --tags')
self.status("Pushing git tags…")
os.system("git tag v{0}".format(about["__version__"]))
os.system("git push --tags")
sys.exit()
@@ -95,37 +93,36 @@ class UploadCommand(Command):
# Where the magic happens:
setup(
name=NAME,
version=about['__version__'],
version=about["__version__"],
description=DESCRIPTION,
long_description=long_description,
long_description_content_type='text/markdown',
long_description_content_type="text/markdown",
author=AUTHOR,
author_email=EMAIL,
python_requires=REQUIRES_PYTHON,
url=URL,
# packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
# If your package is a single module, use this instead of 'packages':
packages=['yyets'],
packages=["yyets"],
# entry_points={
# 'console_scripts': ['mycli=mymodule:cli'],
# },
install_requires=REQUIRED,
extras_require=EXTRAS,
include_package_data=True,
license='MIT',
license="MIT",
classifiers=[
# Trove classifiers
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy'
"License :: OSI Approved :: MIT License",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
],
# $ setup.py publish support.
cmdclass={
'upload': UploadCommand,
"upload": UploadCommand,
},
)
)

File diff suppressed because it is too large Load Diff

View File

@@ -1,69 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - SQLite.py
# 6/17/21 12:53
#
__author__ = "Benny <benny.think@gmail.com>"
import json
import logging
import sqlite3
from database import ResourceResource
logging.warning("\n\n%s\n### SQLite adapter is immature! Only search and view resource is available for now. ###\n%s\n",
"#" * 87, "#" * 87)
class SQLite:
def __init__(self):
self.con = sqlite3.connect("yyets.sqlite", check_same_thread=False)
self.cur = self.con.cursor()
def __del__(self):
self.con.close()
class FakeSQLiteResource:
pass
class ResourceSQLiteResource(ResourceResource, SQLite):
def get_resource_data(self, resource_id: int, username=None) -> dict:
self.cur.execute("SELECT data FROM yyets WHERE id=?", (resource_id,))
data = self.cur.fetchone()
return json.loads(data[0])
def search_resource(self, keyword: str) -> dict:
Query = """
SELECT id, cnname, enname, aliasname FROM yyets WHERE
cnname LIKE ? or enname LIKE ? or aliasname LIKE ?;
"""
keyword = f"%{keyword}%"
self.cur.execute(Query, (keyword, keyword, keyword))
data = self.cur.fetchall()
final_data = []
for item in data:
single = {
"data": {
"info": {
"id": item[0],
"cnname": item[1],
"enname": item[2],
"aliasname": item[3],
}
}
}
final_data.append(single)
return dict(data=list(final_data))
if __name__ == '__main__':
r = SQLite()
print(globals())
# r.get_resource_data(80000)
# a = r.search_resource("NIGERUHA")
# print(json.dumps(a, ensure_ascii=False))

View File

@@ -1,67 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - add_year.py
# 4/8/21 18:39
#
__author__ = "Benny <benny.think@gmail.com>"
import logging
import re
import time
from tqdm import tqdm
from common import Mongo
logging.basicConfig(level=logging.INFO)
def ts_year(ts: str) -> int:
return int(time.strftime("%Y", time.localtime(int(ts))))
def extract_year(name: str) -> int:
try:
r = int(re.findall(r"\.(19\d{2}|20\d{2})\.", name)[0])
except:
r = None
return r
col = Mongo().client["zimuzu"]["yyets"]
data = col.find()
for datum in tqdm(data):
list_data = datum["data"]["list"]
translate_year = []
filename_year = []
for single in list_data:
dl = single["items"].values()
for i in dl:
for j in i:
if d := ts_year(j["dateline"]):
translate_year.append(d)
if d := extract_year(j["name"]):
filename_year.append(d)
translate_year = list(set(translate_year))
filename_year = list(set(filename_year)) # more accurate
final_year = []
if filename_year:
final_year = filename_year.copy()
elif translate_year:
final_year = translate_year
_id = datum["data"]["info"]["id"]
name = datum["data"]["info"]["cnname"]
should_write = True
for y in final_year:
if y <= 1900:
final_year.remove(y)
logging.warning("%s is %s, popping %s", name, final_year, y)
col.update_one({"data.info.id": _id}, {"$set": {"data.info.year": final_year}})

Binary file not shown.

View File

@@ -1,23 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - ban_user.py
# 3/26/22 10:26
#
__author__ = "Benny <benny.think@gmail.com>"
from tqdm import tqdm
from common import Mongo
client = Mongo()
user_col = client.db["users"]
with open("ban_user.txt", "r") as f:
for line in tqdm(f, desc="Banning user..."):
user, reason = line.split(maxsplit=1)
ban = {"disable": True, "reason": reason}
user_col.update_one({"username": user}, {"$set": {"status": ban}})
print("Done!")

View File

@@ -1,39 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - change_datetime.py
# 6/15/21 14:15
#
__author__ = "Benny <benny.think@gmail.com>"
import time
from common import Mongo
client = Mongo().client
comment = client["zimuzu"]["comment"] # date
users = client["zimuzu"]["users"] # date
all_comments = list(comment.find())
all_users = list(users.find())
for item in all_comments:
object_id = item["_id"]
old_date = time.strptime(item["date"], "%a %b %d %H:%M:%S %Y")
new_date = time.strftime("%Y-%m-%d %H:%M:%S", old_date)
condition = {"_id": object_id}
update = {"$set": {"date": new_date}}
comment.find_one_and_update(condition, update)
for item in all_users:
# unique for username
object_id = item["_id"]
old_date = time.strptime(item["date"], "%a %b %d %H:%M:%S %Y")
new_date = time.strftime("%Y-%m-%d %H:%M:%S", old_date)
condition = {"_id": object_id}
update = {"$set": {"date": new_date}}
users.find_one_and_update(condition, update)

View File

@@ -14,8 +14,12 @@ import pymongo
class Mongo:
def __init__(self):
self.client = pymongo.MongoClient(host=os.getenv("MONGO", "localhost"), connect=False,
connectTimeoutMS=5000, serverSelectionTimeoutMS=5000)
self.client = pymongo.MongoClient(
host=os.getenv("MONGO", "localhost"),
connect=False,
connectTimeoutMS=5000,
serverSelectionTimeoutMS=5000,
)
self.db = self.client["zimuzu"]
def __del__(self):

View File

@@ -1,36 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - douban_data.py
# 7/24/21 19:28
#
__author__ = "Benny <benny.think@gmail.com>"
import logging
import pathlib
import sys
logging.basicConfig(level=logging.INFO)
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(lib_path)
from Mongo import DoubanMongoResource
from tqdm import tqdm
m = DoubanMongoResource()
m.db["douban"].update_many({}, {"$unset": {"raw": ""}})
logging.info("raw data deleted.")
# only writers are wrong
# wrong_field = ["actors", "directors", "genre", "writers"]
wrong_field = ["writers"]
# String 2 "string" 4 array
for field in wrong_field:
incorrect_data = m.db["douban"].find({field: {"$not": {"$type": 4}}})
for datum in tqdm(incorrect_data):
logging.info("fixing %s", datum)
new_field = datum[field].split()
m.db["douban"].update_one({"_id": datum["_id"]}, {"$set": {field: new_field}})
logging.info("finish")

View File

@@ -16,16 +16,16 @@ import requests
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(lib_path)
from Mongo import DoubanMongoResource
from databases.douban import Douban
parser = argparse.ArgumentParser(description='豆瓣数据修复')
parser.add_argument('resource_id', metavar='r', type=int, help='resource id')
parser.add_argument('douban_id', metavar='d', type=int, help='douban id')
parser = argparse.ArgumentParser(description="豆瓣数据修复")
parser.add_argument("resource_id", metavar="r", type=int, help="resource id")
parser.add_argument("douban_id", metavar="d", type=int, help="douban id")
args = parser.parse_args()
resource_id = args.resource_id
douban_id = args.douban_id
douban = DoubanMongoResource()
douban = Douban()
session = requests.Session()
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
session.headers.update({"User-Agent": ua})

View File

@@ -1,46 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - douban.py
# 7/11/21 10:17
#
__author__ = "Benny <benny.think@gmail.com>"
import contextlib
import logging
import pathlib
import sys
import requests
from tqdm import tqdm
logging.basicConfig(level=logging.INFO)
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(lib_path)
from Mongo import DoubanMongoResource
def sync_douban():
douban = DoubanMongoResource()
session = requests.Session()
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
session.headers.update({"User-Agent": ua})
yyets_data = douban.db["yyets"].find()
douban_data = douban.db["douban"].find()
id1 = [i["data"]["info"]["id"] for i in yyets_data]
id2 = [i["resourceId"] for i in douban_data]
rids = list(set(id1).difference(id2))
logging.info("resource id complete %d", len(rids))
for rid in tqdm(rids):
with contextlib.suppress(Exception):
d = douban.find_douban(rid)
logging.info("Processed %s, length %d", rid, len(d))
logging.info("ALL FINISH!")
if __name__ == '__main__':
sync_douban()

View File

@@ -1,28 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - format_order.py
# 2/9/21 16:24
#
__author__ = "Benny <benny.think@gmail.com>"
import pymongo
client = pymongo.MongoClient(host="mongo")
db = client["zimuzu"]
col = db["yyets"]
all_data = col.find().sort("data.info.id")
for resource in all_data:
for index in range(len(resource["data"]["list"])):
season = resource["data"]["list"][index]
if season["formats"][0] == "APP":
order = season["formats"][1:]
order.append("APP")
rid = resource["data"]["info"]["id"]
set_value = {"$set": {f"data.list.{index}.formats": order}}
print(f"{rid}-{index}->{set_value}")
col.find_one_and_update({"data.info.id": rid}, set_value)
client.close()

View File

@@ -33,9 +33,13 @@ date_series = generate_date_series("2021-02-01", "2021-03-14")
inserted = []
for date in date_series:
inserted.append({"date": date,
"access": random.randint(1, 50),
"search": random.randint(1, 50),
"resource": random.randint(1, 50)})
inserted.append(
{
"date": date,
"access": random.randint(1, 50),
"search": random.randint(1, 50),
"resource": random.randint(1, 50),
}
)
col.insert_many(inserted)

View File

@@ -14,9 +14,10 @@ import openpyxl
web_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(web_path)
from Mongo import Mongo
from tqdm import tqdm
from yyetsweb.utils import ts_date
from common.utils import ts_date
from databases.base import Mongo
wb = openpyxl.open("aliyun.xlsx")
@@ -37,7 +38,7 @@ template = {
"browser": "cli",
"content": "",
"resource_id": 234,
"type": "parent"
"type": "parent",
}
col = Mongo().db["comment"]
share_doc = {
@@ -54,10 +55,10 @@ share_doc = {
"area": "",
"show_type": "",
"expire": "1610401225",
"views": 0
"views": 0,
},
"list": []
}
"list": [],
},
}
Mongo().db["yyets"].update_one({"data.info.id": 234}, {"$set": share_doc}, upsert=True)

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env python3
# coding: utf-8
# YYeTsBot - __init__.py
# 2023-03-17 18:57
from common.utils import setup_logger
setup_logger()

View File

@@ -22,10 +22,10 @@ import pymysql
import pymysql.cursors
from tqdm import tqdm
from utils import setup_logger
from common.utils import setup_logger
setup_logger()
data_path = pathlib.Path(__file__).parent.joinpath("templates", "dump")
data_path = pathlib.Path(__file__).parent.parent.joinpath("templates", "dump")
data_path.mkdir(exist_ok=True)
sqlite_file = data_path.joinpath("yyets.db")
@@ -47,17 +47,52 @@ def MongoDB():
def read_resource():
logging.info("Reading resource from mongo")
client = MongoDB()
data = client["zimuzu"]["yyets"].find(projection={"_id": False})
data = client["zimuzu"]["yyets"].aggregate(
[
{
"$project": {
"data.info.id": 1,
"data.info.cnname": 1,
"data.info.enname": 1,
"data.info.aliasname": 1,
"data.info.views": 1,
"data.info.area": 1,
"fullDocument": "$data",
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$data.info",
{"data": "$fullDocument"},
]
}
}
},
]
)
return data
def read_comment():
logging.info("Reding comment from mongo")
logging.info("Reading comment from mongo")
client = MongoDB()
data = client["zimuzu"]["comment"].find(
projection={"_id": False, "username": False, "ip": False, "browser": False}
res = client["zimuzu"]["comment"].aggregate(
[
{
"$project": {
"_id": 0,
"content": 1,
"date": 1,
"resource_id": 1,
"browser": "browser",
"username": "username",
}
}
]
)
return data
return res
def prepare_mysql():
@@ -72,17 +107,14 @@ def prepare_mysql():
aliasname varchar(256) null,
area varchar(32),
views int null,
data longtext null,
douban longtext null,
image blob null
data longtext null
) charset utf8mb4;
"""
comment_sql = """
create table comment
(
content longtext null,
date varchar(256) null,
id int null,
content longtext null,
resource_id varchar(256) null,
browser varchar(256) null,
username varchar(256) null
@@ -111,17 +143,15 @@ def prepare_sqlite():
aliasname varchar(256) null,
area varchar(32),
views int null,
data longtext null,
douban longtext null,
image blob null
data longtext null
);
"""
comment_sql = """
create table comment
(
date varchar(256) null,
content longtext null,
date varchar(256) null,
id int null,
resource_id varchar(256) null,
browser varchar(256) null,
username varchar(256) null
@@ -142,22 +172,13 @@ def dump_resource():
client = MongoDB()
db = client["zimuzu"]
for each in tqdm(res, total=db["yyets"].count_documents({})):
data = each["data"]["info"]
resource_id = data["id"]
cnname = data["cnname"]
enname = data["enname"]
aliasname = data["aliasname"]
views = data["views"]
area = data["area"]
data = json.dumps(each, ensure_ascii=False)
batch_data.append(
(resource_id, cnname, enname, aliasname, area, views, data, "", "")
)
line = list(each.values())
line[-1] = json.dumps(line[-1], ensure_ascii=False)
batch_data.append(line)
mb.append(each)
if len(batch_data) == CHUNK_SIZE:
sql1 = "insert into yyets values (%s, %s, %s, %s, %s, %s, %s, %s,%s)"
sql2 = "insert into yyets values (?, ?, ?, ?, ?,?,?,?,?)"
sql1 = "insert into yyets values (%s, %s, %s, %s, %s, %s, %s)"
sql2 = "insert into yyets values (?, ?, ?, ?, ?,?,?)"
insert_func(batch_data, mb, sql1, sql2, "yyets")
batch_data = []
mb = []
@@ -187,18 +208,11 @@ def dump_comment():
mb = []
client = MongoDB()
for each in tqdm(res, total=client["zimuzu"]["comment"].count_documents({})):
content = each["content"]
date = each["date"]
id = each.get("id", 0)
resource_id = each["resource_id"]
browser = "Fake Browser"
username = "Anonymous"
batch_data.append((content, date, id, resource_id, browser, username))
each.update(browser=browser, username=username)
batch_data.append(list(each.values()))
mb.append(each)
if len(batch_data) == CHUNK_SIZE:
sql1 = "insert into comment values (%s, %s, %s, %s, %s, %s)"
sql2 = "insert into comment values ( ?, ?, ?,?, ?,?)"
sql1 = "insert into comment values (%s, %s, %s, %s,%s)"
sql2 = "insert into comment values ( ?, ?, ?, ?,?)"
insert_func(batch_data, mb, sql1, sql2, "comment")
batch_data = []
mb = []
@@ -256,4 +270,6 @@ def entry_dump():
if __name__ == "__main__":
t0 = time.time()
entry_dump()
logging.info("Total time used: %.2fs" % (time.time() - t0))

View File

@@ -1,5 +1,6 @@
#!/usr/bin/env python3
# coding: utf-8
import contextlib
import logging
import os
import random
@@ -9,11 +10,10 @@ from copy import deepcopy
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
from Mongo import Mongo
from utils import setup_logger
setup_logger()
from databases.base import Mongo
from databases.douban import Douban
class BaseSync:
@@ -65,7 +65,9 @@ class BaseSync:
class Zhuixinfan(BaseSync):
def run(self):
zhuixinfan = "http://www.fanxinzhui.com/rr/{}"
start = (self.sync.find_one({"name": "zhuixinfan"}) or {}).get("resource_id", os.getenv("ZHUIXINFAN_START", 20))
start = (self.sync.find_one({"name": "zhuixinfan"}) or {}).get(
"resource_id", os.getenv("ZHUIXINFAN_START", 20)
)
end = os.getenv("ZHUIXINFAN_END", 2500)
for i in range(start, end):
url = zhuixinfan.format(i)
@@ -121,16 +123,27 @@ class Zhuixinfan(BaseSync):
for item in links:
content = item["href"]
if "ed2k" in content:
resource["files"].append({"way": "1", "way_cn": "电驴", "address": content, "passwd": ""})
resource["files"].append(
{"way": "1", "way_cn": "电驴", "address": content, "passwd": ""}
)
elif "magnet" in content:
resource["files"].append({"way": "2", "way_cn": "磁力", "address": content, "passwd": ""})
resource["files"].append(
{"way": "2", "way_cn": "磁力", "address": content, "passwd": ""}
)
elif "pan.baidu" in content:
baidu_password = res.span.a.nextSibling.nextSibling.text
resource["files"].append(
{"way": "13", "way_cn": "百度网盘", "address": content, "passwd": baidu_password}
{
"way": "13",
"way_cn": "百度网盘",
"address": content,
"passwd": baidu_password,
}
)
elif "weiyun" in content:
resource["files"].append({"way": "14", "way_cn": "微云", "address": content, "passwd": ""})
resource["files"].append(
{"way": "14", "way_cn": "微云", "address": content, "passwd": ""}
)
else:
logging.debug("Unknown link: %s", content)
@@ -148,10 +161,15 @@ class Zhuixinfan(BaseSync):
self.yyets.update_one(already_cond, {"$set": {"data.info.source": source}})
elif exists:
logging.info("Updating new data.info.id: %s", source)
self.yyets.update_one({"data.info.source": source}, {"$set": {"data.list": data["data"]["list"]}})
self.yyets.update_one(
{"data.info.source": source},
{"$set": {"data.list": data["data"]["list"]}},
)
else:
last_id = 90000
last = self.yyets.find_one({"data.info.id": {"$gte": last_id}}, sort=[("data.info.id", -1)])
last = self.yyets.find_one(
{"data.info.id": {"$gte": last_id}}, sort=[("data.info.id", -1)]
)
if last:
last_id = last["data"]["info"]["id"] + 1
logging.info("Inserting data.info.id: %s", last_id)
@@ -195,13 +213,19 @@ class YYSub(BaseSync):
structure["data"]["info"]["enname"] = data["enname"]
structure["data"]["info"]["aliasname"] = data["aliasname"]
structure["data"]["info"]["channel"] = data["channel"]
structure["data"]["info"]["channel_cn"] = data["channel_cn"] or channel_cn
structure["data"]["info"]["channel_cn"] = (
data["channel_cn"] or channel_cn
)
structure["data"]["info"]["area"] = data["area"]
structure["data"]["list"] = []
structure["data"]["info"]["source"] = f"https://www.yysub.net/resource/{i}"
structure["data"]["info"][
"source"
] = f"https://www.yysub.net/resource/{i}"
self.insert_data(structure.copy())
self.sync.update_one({"name": "yysub"}, {"$set": {"resource_id": end}}, upsert=True)
self.sync.update_one(
{"name": "yysub"}, {"$set": {"resource_id": end}}, upsert=True
)
logging.info("YYsub Finished")
def insert_data(self, data):
@@ -209,6 +233,38 @@ class YYSub(BaseSync):
self.yyets.update_one({"data.info.id": rid}, {"$set": data}, upsert=True)
def sync_douban():
douban = Douban()
session = requests.Session()
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4280.88 Safari/537.36"
session.headers.update({"User-Agent": ua})
yyets_data = douban.db["yyets"].aggregate(
[
{"$group": {"_id": None, "ids": {"$push": "$data.info.id"}}},
{"$project": {"_id": 0, "ids": 1}},
]
)
douban_data = douban.db["douban"].aggregate(
[
{"$group": {"_id": None, "ids": {"$push": "$resourceId"}}},
{"$project": {"_id": 0, "ids": 1}},
]
)
id1 = next(yyets_data)["ids"]
id2 = next(douban_data)["ids"]
rids = list(set(id1).difference(id2))
rids.remove(233)
logging.info("resource id complete %d", len(rids))
for rid in tqdm(rids):
with contextlib.suppress(Exception):
d = douban.find_douban(rid)
logging.info("Processed %s, length %d", rid, len(d))
logging.info("ALL FINISH!")
if __name__ == "__main__":
a = Zhuixinfan()
# a.build_data(open("1.html").read(), "https://www.zhuixinfan.com/resource/1.html")

View File

@@ -34,7 +34,7 @@ def setup_logger():
def ts_date(ts=None):
# all the time save in db should be CST
# Let's always set the timezone to CST
timestamp = ts or time.time()
return datetime.fromtimestamp(timestamp, pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S")
@@ -45,7 +45,7 @@ def _format_addr(s):
def generate_body(context):
template = pathlib.Path(__file__).parent.joinpath("templates", "email_template.html")
template = pathlib.Path(__file__).parent.parent.joinpath("templates", "email_template.html")
with open(template) as f:
return Template(f.read()).render(**context)
@@ -84,7 +84,12 @@ def check_spam(ip, ua, author, content) -> int:
akismet = Akismet(token, blog="https://yyets.dmesg.app/")
return akismet.check(
ip, ua, comment_author=author, blog_lang="zh_cn", comment_type="comment", comment_content=content
ip,
ua,
comment_author=author,
blog_lang="zh_cn",
comment_type="comment",
comment_content=content,
)
return 0
@@ -101,16 +106,24 @@ class Cloudflare:
return self.session.get(self.endpoint).json()["result"]["expression"]
def ban_new_ip(self, ip):
logging.info("Blacklisting IP %s", ip)
logging.warning("Adding %s to cloudflare managed challenge list", ip)
expr = self.get_old_expr()
if ip not in expr:
body = {"id": self.filter_id, "paused": False, "expression": f"{expr} or (ip.src eq {ip})"}
body = {
"id": self.filter_id,
"paused": False,
"expression": f"{expr} or (ip.src eq {ip})",
}
resp = self.session.put(self.endpoint, json=body)
logging.info(resp.json())
def clear_fw(self):
logging.info("Clearing firewall rules")
body = {"id": self.filter_id, "paused": False, "expression": "(ip.src eq 192.168.2.1)"}
body = {
"id": self.filter_id,
"paused": False,
"expression": "(ip.src eq 192.168.2.1)",
}
self.session.put(self.endpoint, json=body)

View File

@@ -1,239 +0,0 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - base_db.py
# 6/16/21 20:31
#
__author__ = "Benny <benny.think@gmail.com>"
import base64
import json
import logging
import os
import random
import re
import string
import fakeredis
import redis
from captcha.image import ImageCaptcha
from utils import setup_logger
setup_logger()
captcha_ex = 60 * 10
predefined_str = re.sub(r"[1l0oOI]", "", string.ascii_letters + string.digits)
class Redis:
def __init__(self):
self.r = redis.StrictRedis(host=os.getenv("REDIS", "localhost"), decode_responses=True)
try:
self.r.ping()
except redis.exceptions.ConnectionError:
# we can't connect to redis, either generated by pyinstaller(_MEIPASS), or forget to run it.
logging.info("%s Disable redis for standalone exe! %s", "#" * 10, "#" * 10)
self.r = fakeredis.FakeStrictRedis()
def __del__(self):
self.r.close()
@classmethod
def cache(cls, timeout: int):
def func(fun):
def inner(*args, **kwargs):
func_name = fun.__name__
cache_value = cls().r.get(func_name)
if cache_value:
logging.info("Retrieving %s data from redis", func_name)
return json.loads(cache_value)
else:
logging.info("Cache expired. Executing %s", func_name)
res = fun(*args, **kwargs)
cls().r.set(func_name, json.dumps(res), ex=timeout)
return res
return inner
return func
class OtherResource:
def reset_top(self):
pass
class UserResource:
def login_user(self, username: str, password: str, captcha: str, captcha_id: str, ip: str, browser: str) -> dict:
pass
def get_user_info(self, username: str) -> dict:
pass
def update_user_last(self, username: str, now_ip: str) -> None:
pass
def update_user_info(self, username: str, data: dict) -> dict:
pass
class TopResource:
def get_most(self) -> list:
pass
def get_top_resource(self) -> dict:
pass
class LikeResource:
def get_user_like(self, username: str) -> list:
pass
def add_remove_fav(self, resource_id: int, username: str) -> str:
pass
class NameResource:
def get_names(self, is_readable: [str, bool]) -> dict:
pass
class CommentResource:
def get_comment(self, resource_id: int, page: int, size: int, **kwargs) -> dict:
pass
def add_comment(
self,
captcha: str,
captcha_id: int,
content: str,
resource_id: int,
ip: str,
username: str,
browser: str,
comment_id=None,
) -> dict:
pass
def delete_comment(self, comment_id: str):
pass
class CommentReactionResource:
def react_comment(self, username, data):
pass
class CommentChildResource:
def get_comment(self, parent_id: str, page: int, size: int) -> dict:
pass
class CommentNewestResource:
def get_comment(self, page: int, size: int, keyword=None) -> dict:
pass
class CaptchaResource:
redis = Redis()
def get_captcha(self, captcha_id):
chars = "".join([random.choice(predefined_str) for _ in range(4)])
image = ImageCaptcha()
data = image.generate(chars)
self.redis.r.set(captcha_id, chars, ex=captcha_ex)
return f"data:image/png;base64,{base64.b64encode(data.getvalue()).decode('ascii')}"
def verify_code(self, user_input, captcha_id) -> dict:
correct_code = self.redis.r.get(captcha_id)
if not correct_code:
return {"status": False, "message": "验证码已过期"}
if user_input.lower() == correct_code.lower():
self.redis.r.delete(correct_code)
return {"status": True, "message": "验证通过"}
else:
return {"status": False, "message": "验证码错误"}
class MetricsResource:
def set_metrics(self, metrics_type: str):
pass
def get_metrics(self, from_date: str, to_date: str) -> dict:
pass
class ResourceResource:
def get_resource_data(self, resource_id: int, username: str) -> dict:
pass
def search_resource(self, keyword: str, search_type: str) -> dict:
pass
def patch_resource(self, data: dict):
pass
def add_resource(self, data: dict):
pass
def delete_resource(self, data: dict):
pass
class GrafanaQueryResource:
def get_grafana_data(self, date_series) -> str:
pass
class BlacklistResource(Redis):
def get_black_list(self):
pass
class AnnouncementResource:
def get_announcement(self, page: int, size: int) -> dict:
pass
def add_announcement(self, username, content, ip, browser):
pass
class DoubanResource:
def get_douban_data(self, rid: int) -> dict:
pass
def get_douban_image(self, rid: int) -> bytes:
pass
class DoubanReportResource:
def report_error(self, captcha: str, captcha_id: int, content: str, resource_id: int) -> dict:
pass
def get_error(self) -> dict:
pass
class NotificationResource:
def get_notification(self, username, page, size):
pass
def update_notification(self, username, verb, comment_id):
pass
class UserEmailResource:
def verify_email(self, username, code):
pass
class CategoryResource:
def get_category(self, query: dict):
pass
class ResourceLatestResource:
@staticmethod
def get_latest_resource() -> dict:
pass

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env python3
# coding: utf-8
# YYeTsBot - __init__.py
# 2023-03-17 18:57
import logging
import pathlib
import sys
from common.utils import Cloudflare, setup_logger
cf = Cloudflare()
setup_logger()
DOUBAN_SEARCH = "https://www.douban.com/search?cat=1002&q={}"
DOUBAN_DETAIL = "https://movie.douban.com/subject/{}/"
lib_path = (
pathlib.Path(__file__)
.parent.parent.parent.joinpath("yyetsbot")
.resolve()
.as_posix()
)
sys.path.append(lib_path)
from fansub import BD2020, XL720, NewzmzOnline, ZhuixinfanOnline, ZimuxiaOnline
logging.info(
"Loading fansub...%s",
(BD2020, XL720, NewzmzOnline, ZhuixinfanOnline, ZimuxiaOnline),
)

271
yyetsweb/databases/base.py Normal file
View File

@@ -0,0 +1,271 @@
#!/usr/bin/env python3
# coding: utf-8
import json
import logging
import os
import time
import fakeredis
import meilisearch
import pymongo
import redis
faker_redis = fakeredis.FakeStrictRedis()
class Mongo:
def __init__(self):
self.client = pymongo.MongoClient(
host=os.getenv("MONGO", "localhost"),
connect=False,
connectTimeoutMS=5000,
serverSelectionTimeoutMS=5000,
)
self.db = self.client["zimuzu"]
super().__init__()
def __del__(self):
self.client.close()
def is_admin(self, username: str) -> bool:
data = self.db["users"].find_one(
{"username": username, "group": {"$in": ["admin"]}}
)
if data:
return True
def is_user_blocked(self, username: str) -> str:
r = self.db["users"].find_one({"username": username, "status.disable": True})
if r:
return r["status"]["reason"]
def is_old_user(self, username: str) -> bool:
return bool(self.db["users"].find_one({"username": username, "oldUser": True}))
class Redis:
def __init__(self):
try:
self.r = redis.StrictRedis(
host=os.getenv("REDIS", "localhost"), decode_responses=True
)
self.r.ping()
except redis.exceptions.ConnectionError:
logging.warning("%s Using fakeredis now... %s", "#" * 10, "#" * 10)
self.r = faker_redis
super().__init__()
def __del__(self):
self.r.close()
@classmethod
def cache(cls, timeout: int):
def func(fun):
def inner(*args, **kwargs):
func_name = fun.__name__
cache_value = cls().r.get(func_name)
if cache_value:
logging.info("Retrieving %s data from redis", func_name)
return json.loads(cache_value)
else:
logging.info("Cache expired. Executing %s", func_name)
res = fun(*args, **kwargs)
cls().r.set(func_name, json.dumps(res), ex=timeout)
return res
return inner
return func
class SearchEngine(Mongo):
yyets_projection = {
"data.info.cnname": 1,
"data.info.enname": 1,
"data.info.aliasname": 1,
"data.info.area": 1,
"data.info.id": 1,
"data.info.channel_cn": 1,
"data.info.channel": 1,
"_id": {"$toString": "$_id"},
"origin": "yyets",
}
douban_projection = {
"_id": {"$toString": "$_id"},
"id": "$resourceId",
"cnname": {"$first": "$resource.data.info.cnname"},
"enname": {"$first": "$resource.data.info.enname"},
"aliasname": {"$first": "$resource.data.info.aliasname"},
"area": {"$first": "$resource.data.info.area"},
"channel_cn": {"$first": "$resource.data.info.channel_cn"},
"channel": {"$first": "$resource.data.info.channel"},
"origin": "yyets",
"actors": 1,
"directors": 1,
"genres": 1,
"writers": 1,
"introduction": 1,
}
douban_lookup = {
"from": "yyets",
"localField": "resourceId",
"foreignField": "data.info.id",
"as": "resource",
}
comment_projection = {
"username": 1,
"date": 1,
"comment": "$content",
"commentID": {"$toString": "$_id"},
"origin": "comment",
"hasAvatar": {"$toBool": "$avatar"},
"resourceID": "$resource_id",
"resourceName": {"$first": "$resource.data.info.cnname"},
"_id": {"$toString": "$_id"},
}
comment_lookup = {
"from": "yyets",
"localField": "resource_id",
"foreignField": "data.info.id",
"as": "resource",
}
def __init__(self):
self.search_client = meilisearch.Client(os.getenv("MEILISEARCH"), "masterKey")
self.yyets_index = self.search_client.index("yyets")
self.comment_index = self.search_client.index("comment")
self.douban_index = self.search_client.index("douban")
super().__init__()
def __del__(self):
pass
def __get_yyets(self):
return self.db["yyets"].aggregate(
[
{"$project": self.yyets_projection},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{"origin": "yyets"},
"$data.info",
{"_id": "$_id"},
]
}
}
},
]
)
def __get_comment(self):
return self.db["comment"].aggregate(
[
{"$lookup": self.comment_lookup},
{"$project": self.comment_projection},
]
)
def __get_douban(self):
return self.db["douban"].aggregate(
[
{"$lookup": self.douban_lookup},
{"$project": self.douban_projection},
]
)
def add_yyets(self):
logging.info("Adding yyets data to search engine")
data = list(self.__get_yyets())
self.yyets_index.add_documents(data, primary_key="_id")
def add_comment(self):
logging.info("Adding comment data to search engine")
data = list(self.__get_comment())
self.comment_index.add_documents(data, primary_key="_id")
def add_douban(self):
logging.info("Adding douban data to search engine")
data = list(self.__get_douban())
self.douban_index.add_documents(data, primary_key="_id")
def search_yyets(self, keyword: "str"):
return self.yyets_index.search(keyword, {"matchingStrategy": "all"})["hits"]
def search_comment(self, keyword: "str"):
return self.comment_index.search(keyword, {"matchingStrategy": "all"})["hits"]
def search_douban(self, keyword: "str"):
return self.douban_index.search(keyword, {"matchingStrategy": "all"})["hits"]
def run_import(self):
t0 = time.time()
self.add_yyets()
self.add_comment()
self.add_douban()
logging.info(f"Import data to search engine in {time.time() - t0:.2f}s")
def __monitor(self, col, fun):
cursor = self.db[col].watch()
for change in cursor:
op_type = change["operationType"]
_id = change["documentKey"]["_id"]
search_index = getattr(self, f"{col}_index")
logging.info("%s %s change stream for %s", col, op_type, _id)
if op_type == "delete":
search_index.delete_document(_id)
else:
data = fun(_id)
search_index.add_documents(data, primary_key="_id")
def monitor_yyets(self):
def get_data(_id) -> list:
data = self.db.yyets.find_one(
{"_id": _id}, projection=self.yyets_projection
)["data"]["info"]
data["_id"] = str(_id)
data["origin"] = "yyets"
return [data]
self.__monitor("yyets", get_data)
def monitor_douban(self):
def get_data(_id) -> list:
data = self.db.douban.aggregate(
[
{"$match": {"_id": _id}},
{"$lookup": self.douban_lookup},
{"$project": self.douban_projection},
]
)
return list(data)
self.__monitor("douban", get_data)
def monitor_comment(self):
def get_data(_id) -> list:
data = self.db.comment.aggregate(
[
{"$match": {"_id": _id}},
{"$lookup": self.comment_lookup},
{"$project": self.comment_projection},
]
)
return list(data)
self.__monitor("comment", get_data)
class DBDump:
pass
class Index:
pass
class NotFound:
pass

View File

@@ -0,0 +1,484 @@
#!/usr/bin/env python3
# coding: utf-8
import contextlib
import os
import re
from http import HTTPStatus
import pymongo
from bson import ObjectId
from common.utils import check_spam, send_mail, ts_date
from databases.base import Mongo
from databases.other import Captcha, SpamProcess
class Comment(Mongo):
def __init__(self):
super().__init__()
self.inner_page = 1
self.inner_size = 5
self.projection = {"ip": False, "parent_id": False}
@staticmethod
def convert_objectid(data):
# change _id to id, remove _id
for item in data:
item["id"] = str(item["_id"])
item.pop("_id")
for child in item.get("children", []):
with contextlib.suppress(Exception):
child["id"] = str(child["_id"])
child.pop("_id")
def find_children(self, parent_data):
for item in parent_data:
children_ids = item.get("children", [])
condition = {
"_id": {"$in": children_ids},
"deleted_at": {"$exists": False},
"type": "child",
}
children_count = self.db["comment"].count_documents(condition)
children_data = (
self.db["comment"]
.find(condition, self.projection)
.sort("_id", pymongo.DESCENDING)
.limit(self.inner_size)
.skip((self.inner_page - 1) * self.inner_size)
)
children_data = list(children_data)
self.get_user_group(children_data)
self.add_reactions(children_data)
item["children"] = []
if children_data:
item["children"].extend(children_data)
item["childrenCount"] = children_count
else:
item["childrenCount"] = 0
def get_user_group(self, data):
whitelist = os.getenv("whitelist", "").split(",")
for comment in data:
username = comment["username"]
user = self.db["users"].find_one({"username": username}) or {}
group = user.get("group", ["user"])
comment["group"] = group
comment["hasAvatar"] = bool(user.get("avatar"))
if username in whitelist:
comment["group"].append("publisher")
def add_reactions(self, data):
for comment in data:
cid = comment.get("id") or comment.get("_id")
cid = str(cid)
reactions = (
self.db["reactions"].find_one(
{"comment_id": cid}, projection={"_id": False, "comment_id": False}
)
or {}
)
for verb, users in reactions.items():
if users:
comment.setdefault("reactions", []).append(
{"verb": verb, "users": users}
)
def get_comment(self, resource_id: int, page: int, size: int, **kwargs) -> dict:
self.inner_page = kwargs.get("inner_page", 1)
self.inner_size = kwargs.get("inner_size", 5)
comment_id = kwargs.get("comment_id")
condition = {
"resource_id": resource_id,
"deleted_at": {"$exists": False},
"type": {"$ne": "child"},
}
if comment_id:
# 搜索某个评论id的结果
condition = {
"deleted_at": {"$exists": False},
"$or": [
# 如果是子评论id搜索子评论会将整个父评论带出
{"children": {"$in": [ObjectId(comment_id)]}},
# 如果是父评论id搜索父评论并且排除子评论的记录
{"_id": ObjectId(comment_id), "type": {"$ne": "child"}},
],
}
count = self.db["comment"].count_documents(condition)
data = (
self.db["comment"]
.find(condition, self.projection)
.sort("_id", pymongo.DESCENDING)
.limit(size)
.skip((page - 1) * size)
)
data = list(data)
self.find_children(data)
self.convert_objectid(data)
self.get_user_group(data)
self.add_reactions(data)
return {"data": data, "count": count, "resource_id": resource_id}
def add_comment(
self,
captcha: str,
captcha_id: int,
content: str,
resource_id: int,
ip: str,
username: str,
browser: str,
parent_comment_id=None,
) -> dict:
user_data = self.db["users"].find_one({"username": username})
# old user is allowed to comment without verification
if (
not self.is_old_user(username)
and user_data.get("email", {}).get("verified", False) is False
):
return {
"status_code": HTTPStatus.TEMPORARY_REDIRECT,
"message": "你需要验证邮箱才能评论,请到个人中心进行验证",
}
returned = {"status_code": 0, "message": ""}
# check if this user is blocked
reason = self.is_user_blocked(username)
if reason:
return {"status_code": HTTPStatus.FORBIDDEN, "message": reason}
if check_spam(ip, browser, username, content) != 0:
document = {
"username": username,
"ip": ip,
"date": ts_date(),
"browser": browser,
"content": content,
"resource_id": resource_id,
}
inserted_id = self.db["spam"].insert_one(document).inserted_id
document["_id"] = str(inserted_id)
SpamProcess.request_approval(document)
return {
"status_code": HTTPStatus.FORBIDDEN,
"message": f"possible spam, reference id: {inserted_id}",
}
user_group = user_data.get("group", [])
if not user_group:
# admin don't have to verify code
verify_result = Captcha().verify_code(captcha, captcha_id)
if os.getenv("PYTHON_DEV"):
pass
elif not verify_result["status"]:
returned["status_code"] = HTTPStatus.BAD_REQUEST
returned["message"] = verify_result["message"]
return returned
exists = self.db["yyets"].find_one({"data.info.id": resource_id})
if not exists:
returned["status_code"] = HTTPStatus.NOT_FOUND
returned["message"] = "资源不存在"
return returned
if parent_comment_id:
exists = self.db["comment"].find_one({"_id": ObjectId(parent_comment_id)})
if not exists:
returned["status_code"] = HTTPStatus.NOT_FOUND
returned["message"] = "评论不存在"
return returned
basic_comment = {
"username": username,
"ip": ip,
"date": ts_date(),
"browser": browser,
"content": content,
"resource_id": resource_id,
}
if parent_comment_id is None:
basic_comment["type"] = "parent"
else:
basic_comment["type"] = "child"
# 无论什么评论都要插入一个新的document
inserted_id: str = self.db["comment"].insert_one(basic_comment).inserted_id
if parent_comment_id is not None:
# 对父评论的子评论需要给父评论加children id
self.db["comment"].find_one_and_update(
{"_id": ObjectId(parent_comment_id)},
{"$push": {"children": inserted_id}},
)
self.db["comment"].find_one_and_update(
{"_id": ObjectId(inserted_id)},
{"$set": {"parent_id": ObjectId(parent_comment_id)}},
)
returned["status_code"] = HTTPStatus.CREATED
returned["message"] = "评论成功"
# notification
if parent_comment_id:
# find username
self.db["notification"].find_one_and_update(
{"username": exists["username"]},
{"$push": {"unread": inserted_id}},
upsert=True,
)
# send email
parent_comment = self.db["comment"].find_one(
{"_id": ObjectId(parent_comment_id)}
)
if resource_id == 233:
link = f"https://yyets.dmesg.app/discuss#{parent_comment_id}"
else:
link = f"https://yyets.dmesg.app/resource?id={resource_id}#{parent_comment_id}"
user_info = self.db["users"].find_one(
{"username": parent_comment["username"], "email.verified": True}
)
if user_info:
subject = "[人人影视下载分享站] 你的评论有了新的回复"
pt_content = content.split("</reply>")[-1]
text = (
f"你的评论 {parent_comment['content']} 有了新的回复:<br>{pt_content}"
f"<br>你可以<a href='{link}'>点此链接</a>查看<br><br>请勿回复此邮件"
)
context = {"username": username, "text": text}
send_mail(user_info["email"]["address"], subject, context)
return returned
def delete_comment(self, comment_id):
current_time = ts_date()
count = (
self.db["comment"]
.update_one(
{"_id": ObjectId(comment_id), "deleted_at": {"$exists": False}},
{"$set": {"deleted_at": current_time}},
)
.modified_count
)
# 找到子评论,全部标记删除
parent_data = self.db["comment"].find_one({"_id": ObjectId(comment_id)})
if parent_data:
child_ids = parent_data.get("children", [])
else:
child_ids = []
count += (
self.db["comment"]
.update_many(
{"_id": {"$in": child_ids}, "deleted_at": {"$exists": False}},
{"$set": {"deleted_at": current_time}},
)
.modified_count
)
returned = {"status_code": 0, "message": "", "count": -1}
if count == 0:
returned["status_code"] = HTTPStatus.NOT_FOUND
returned["count"] = 0
else:
returned["status_code"] = HTTPStatus.OK
returned["count"] = count
return returned
class CommentReaction(Mongo):
def react_comment(self, username, data):
# {"comment_id":"da23","😊":["user1","user2"]}
comment_id = data["comment_id"]
verb = data["verb"]
method = data["method"]
if not self.db["comment"].find_one({"_id": ObjectId(comment_id)}):
return {
"status": False,
"message": "Where is your comments?",
"status_code": HTTPStatus.NOT_FOUND,
}
if method == "POST":
self.db["reactions"].update_one(
{"comment_id": comment_id}, {"$addToSet": {verb: username}}, upsert=True
)
code = HTTPStatus.CREATED
elif method == "DELETE":
self.db["reactions"].update_one(
{"comment_id": comment_id}, {"$pull": {verb: username}}
)
code = HTTPStatus.ACCEPTED
else:
code = HTTPStatus.BAD_REQUEST
return {"status": True, "message": "success", "status_code": code}
class CommentChild(Comment, Mongo):
def __init__(self):
super().__init__()
self.page = 1
self.size = 5
self.projection = {"ip": False, "parent_id": False}
def get_comment(self, parent_id: str, page: int, size: int) -> dict:
condition = {
"parent_id": ObjectId(parent_id),
"deleted_at": {"$exists": False},
"type": "child",
}
count = self.db["comment"].count_documents(condition)
data = (
self.db["comment"]
.find(condition, self.projection)
.sort("_id", pymongo.DESCENDING)
.limit(size)
.skip((page - 1) * size)
)
data = list(data)
self.convert_objectid(data)
self.get_user_group(data)
return {
"data": data,
"count": count,
}
class CommentNewest(Comment, Mongo):
def __init__(self):
super().__init__()
self.page = 1
self.size = 5
self.projection = {"ip": False, "parent_id": False, "children": False}
self.condition: "dict" = {"deleted_at": {"$exists": False}}
def get_comment(self, page: int, size: int, keyword="") -> dict:
# ID时间用户名用户组资源名资源id
count = self.db["comment"].count_documents(self.condition)
data = (
self.db["comment"]
.find(self.condition, self.projection)
.sort("_id", pymongo.DESCENDING)
.limit(size)
.skip((page - 1) * size)
)
data = list(data)
self.convert_objectid(data)
self.get_user_group(data)
self.extra_info(data)
return {
"data": data,
"count": count,
}
def extra_info(self, data):
for i in data:
resource_id = i.get("resource_id", 233)
res = self.db["yyets"].find_one({"data.info.id": resource_id})
if res:
i["cnname"] = res["data"]["info"]["cnname"]
class CommentSearch(CommentNewest):
def get_comment(self, page: int, size: int, keyword="") -> dict:
self.projection.pop("children")
self.condition.update(content={"$regex": f".*{keyword}.*", "$options": "i"})
data = list(
self.db["comment"]
.find(self.condition, self.projection)
.sort("_id", pymongo.DESCENDING)
.limit(size)
.skip((page - 1) * size)
)
self.convert_objectid(data)
self.get_user_group(data)
self.extra_info(data)
self.fill_children(data)
# final step - remove children
for i in data:
i.pop("children", None)
return {
"data": data,
}
def fill_children(self, data):
for item in data:
child_id: "list" = item.get("children", [])
children = list(
self.db["comment"]
.find({"_id": {"$in": child_id}}, self.projection)
.sort("_id", pymongo.DESCENDING)
)
self.convert_objectid(children)
self.get_user_group(children)
self.extra_info(children)
data.extend(children)
class Notification(Mongo):
def get_notification(self, username, page, size):
# .sort("_id", pymongo.DESCENDING).limit(size).skip((page - 1) * size)
notify = self.db["notification"].find_one(
{"username": username}, projection={"_id": False}
)
if not notify:
return {
"username": username,
"unread_item": [],
"read_item": [],
"unread_count": 0,
"read_count": 0,
}
# size is shared
unread = notify.get("unread", [])
id_list = []
for item in unread[(page - 1) * size : size * page]:
id_list.append(item)
notify["unread_item"] = self.get_content(id_list)
size = size - len(unread)
read = notify.get("read", [])
id_list = []
for item in read[(page - 1) * size : size * page]:
id_list.append(item)
notify["read_item"] = self.get_content(id_list)
notify.pop("unread", None)
notify.pop("read", None)
notify["unread_count"] = len(unread)
notify["read_count"] = len(read)
return notify
def get_content(self, id_list):
comments = (
self.db["comment"]
.find(
{"_id": {"$in": id_list}}, projection={"ip": False, "parent_id": False}
)
.sort("_id", pymongo.DESCENDING)
)
comments = list(comments)
for comment in comments:
comment["id"] = str(comment["_id"])
comment.pop("_id")
reply_to_id = re.findall(r'"(.*)"', comment["content"])[0]
rtc = self.db["comment"].find_one(
{"_id": ObjectId(reply_to_id)},
projection={"content": True, "_id": False},
)
comment["reply_to_content"] = rtc["content"]
return comments
def update_notification(self, username, verb, comment_id):
if verb == "read":
v1, v2 = "read", "unread"
else:
v1, v2 = "unread", "read"
self.db["notification"].find_one_and_update(
{"username": username},
{"$push": {v1: ObjectId(comment_id)}, "$pull": {v2: ObjectId(comment_id)}},
)
return {}

View File

@@ -0,0 +1,161 @@
#!/usr/bin/env python3
# coding: utf-8
import contextlib
import logging
import re
from http import HTTPStatus
from urllib.parse import unquote
import requests
from bs4 import BeautifulSoup
from retry import retry
from databases import DOUBAN_DETAIL, DOUBAN_SEARCH
from databases.base import Mongo
from databases.other import Captcha
class Douban(Mongo):
def get_douban_data(self, rid: int) -> dict:
with contextlib.suppress(Exception):
return self.find_douban(rid)
return {"posterData": None}
def get_douban_image(self, rid: int) -> bytes:
db_data = self.get_douban_data(rid)
return db_data["posterData"]
@retry(IndexError, tries=3, delay=5)
def find_douban(self, resource_id: int):
session = requests.Session()
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
session.headers.update({"User-Agent": ua})
douban_col = self.db["douban"]
yyets_col = self.db["yyets"]
data = douban_col.find_one(
{"resourceId": resource_id}, {"_id": False, "raw": False}
)
if data:
logging.info("Existing data for %s", resource_id)
return data
# data not found, craw from douban
projection = {
"data.info.cnname": True,
"data.info.enname": True,
"data.info.aliasname": True,
}
names = yyets_col.find_one({"data.info.id": resource_id}, projection=projection)
if names is None:
return {}
cname = names["data"]["info"]["cnname"]
logging.info("cnname for douban is %s", cname)
search_html = session.get(DOUBAN_SEARCH.format(cname)).text
logging.info("Analysis search html...length %s", len(search_html))
soup = BeautifulSoup(search_html, "html.parser")
douban_item = soup.find_all("div", class_="content")
fwd_link = unquote(douban_item[0].a["href"])
douban_id = re.findall(
r"https://movie\.douban\.com/subject/(\d*)/.*", fwd_link
)[0]
final_data = self.get_craw_data(
cname, douban_id, resource_id, search_html, session
)
douban_col.insert_one(final_data.copy())
final_data.pop("raw")
return final_data
@staticmethod
def get_craw_data(cname, douban_id, resource_id, search_html, session):
detail_link = DOUBAN_DETAIL.format(douban_id)
detail_html = session.get(detail_link).text
logging.info("Analysis detail html...%s", detail_link)
soup = BeautifulSoup(detail_html, "html.parser")
directors = [i.text for i in (soup.find_all("a", rel="v:directedBy"))]
release_date = (
poster_image_link
) = rating = year_text = intro = writers = episode_count = episode_duration = ""
with contextlib.suppress(IndexError):
episode_duration = soup.find_all("span", property="v:runtime")[0].text
for i in soup.find_all("span", class_="pl"):
if i.text == "编剧":
writers = re.sub(r"\s", "", list(i.next_siblings)[1].text).split("/")
if i.text == "集数:":
episode_count = str(i.nextSibling)
if i.text == "单集片长:" and not episode_duration:
episode_duration = str(i.nextSibling)
actors = [i.text for i in soup.find_all("a", rel="v:starring")]
genre = [i.text for i in soup.find_all("span", property="v:genre")]
with contextlib.suppress(IndexError):
release_date = soup.find_all("span", property="v:initialReleaseDate")[
0
].text
with contextlib.suppress(IndexError):
poster_image_link = soup.find_all("div", id="mainpic")[0].a.img["src"]
with contextlib.suppress(IndexError):
rating = soup.find_all("strong", class_="ll rating_num")[0].text
with contextlib.suppress(IndexError):
year_text = re.sub(
r"[()]", "", soup.find_all("span", class_="year")[0].text
)
with contextlib.suppress(IndexError):
intro = re.sub(
r"\s", "", soup.find_all("span", property="v:summary")[0].text
)
final_data = {
"name": cname,
"raw": {
"search_url": DOUBAN_SEARCH.format(cname),
"detail_url": detail_link,
"search_html": search_html,
"detail_html": detail_html,
},
"doubanId": int(douban_id),
"doubanLink": detail_link,
"posterLink": poster_image_link,
"posterData": session.get(poster_image_link).content,
"resourceId": resource_id,
"rating": rating,
"actors": actors,
"directors": directors,
"genre": genre,
"releaseDate": release_date,
"episodeCount": episode_count,
"episodeDuration": episode_duration,
"writers": writers,
"year": year_text,
"introduction": intro,
}
return final_data
class DoubanReport(Mongo):
def get_error(self) -> dict:
return dict(data=list(self.db["douban_error"].find(projection={"_id": False})))
def report_error(
self, captcha: str, captcha_id: int, content: str, resource_id: int
) -> dict:
returned = {"status_code": 0, "message": ""}
verify_result = Captcha().verify_code(captcha, captcha_id)
if not verify_result["status"]:
returned["status_code"] = HTTPStatus.BAD_REQUEST
returned["message"] = verify_result["message"]
return returned
count = (
self.db["douban_error"]
.update_one(
{"resource_id": resource_id},
{"$push": {"content": content}},
upsert=True,
)
.matched_count
)
return dict(count=count)

View File

@@ -0,0 +1,40 @@
#!/usr/bin/env python3
# coding: utf-8
import time
from datetime import date, timedelta
import pymongo
from databases.base import Mongo
class GrafanaQuery(Mongo):
def get_grafana_data(self, date_series) -> str:
condition = {"date": {"$in": date_series}}
projection = {"_id": False}
return self.db["metrics"].find(condition, projection)
class Metrics(Mongo):
def set_metrics(self, metrics_type: str):
today = time.strftime("%Y-%m-%d", time.localtime())
self.db["metrics"].update_one(
{"date": today}, {"$inc": {metrics_type: 1}}, upsert=True
)
def get_metrics(self, from_date: str, to_date: str) -> dict:
start_int = [int(i) for i in from_date.split("-")]
end_int = [int(i) for i in to_date.split("-")]
sdate = date(*start_int) # start date
edate = date(*end_int) # end date
date_range = [
str(sdate + timedelta(days=x)) for x in range((edate - sdate).days + 1)
]
condition = {"date": {"$in": date_range}}
result = (
self.db["metrics"]
.find(condition, {"_id": False})
.sort("date", pymongo.DESCENDING)
)
return dict(metrics=list(result))

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
# coding: utf-8
from common.utils import ts_date
from databases.base import Mongo
class OAuthRegister(Mongo):
def add_user(self, username, ip, browser, uid, source: "str"):
uid = str(uid)
# username = "Benny"
user = self.db["users"].find_one({"uid": uid, "source": source})
if user and user.get("password"):
# 直接注册的用户
return {"status": "fail", "message": "第三方登录失败,用户名已存在"}
elif user:
# 已存在的oauth用户
return {"status": "success", "message": "欢迎回来,即将跳转首页", "username": username}
else:
# 第一次oauth登录假定一定会成功
# TODO GitHub可以改用户名的但是uid不会变也许需要加unique index
self.db["users"].insert_one(
{
"username": username,
"date": ts_date(),
"ip": ip,
"browser": browser,
"oldUser": True,
"source": source,
"uid": uid,
}
)
return {
"status": "success",
"message": "第三方登录成功,即将跳转首页",
"username": username,
}
class GitHubOAuth2Login(OAuthRegister):
pass
class GoogleOAuth2Login(OAuthRegister):
pass
class TwitterOAuth2Login(OAuthRegister):
pass
class MSOAuth2Login(OAuthRegister):
pass
class FacebookAuth2Login(OAuthRegister):
pass

199
yyetsweb/databases/other.py Normal file
View File

@@ -0,0 +1,199 @@
#!/usr/bin/env python3
# coding: utf-8
import base64
import json
import logging
import os
import random
import re
import string
import time
import pymongo
import requests
from bson import ObjectId
from captcha.image import ImageCaptcha
from common.utils import ts_date
from databases import cf
from databases.base import Mongo, Redis
captcha_ex = 60 * 10
predefined_str = re.sub(r"[1l0oOI]", "", string.ascii_letters + string.digits)
class Announcement(Mongo):
def get_announcement(self, page: int, size: int) -> dict:
condition = {}
count = self.db["announcement"].count_documents(condition)
data = (
self.db["announcement"]
.find(condition, projection={"_id": True, "ip": False})
.sort("_id", pymongo.DESCENDING)
.limit(size)
.skip((page - 1) * size)
)
data = list(data)
for i in data:
i["id"] = str(i["_id"])
i.pop("_id")
return {
"data": data,
"count": count,
}
def add_announcement(self, username, content, ip, browser):
construct = {
"username": username,
"ip": ip,
"date": ts_date(),
"browser": browser,
"content": content,
}
self.db["announcement"].insert_one(construct)
class Blacklist(Redis):
def get_black_list(self):
keys = self.r.keys("*")
result = {}
for key in keys:
count = self.r.get(key)
ttl = self.r.ttl(key)
if ttl != -1:
result[key] = dict(count=count, ttl=ttl)
return result
class Category(Mongo):
def get_category(self, query: dict):
page, size, douban = query["page"], query["size"], query["douban"]
query.pop("page")
query.pop("size")
query.pop("douban")
query_dict = {}
for key, value in query.items():
query_dict[f"data.info.{key}"] = value
logging.info("Query dict %s", query_dict)
projection = {"_id": False, "data.list": False}
data = (
self.db["yyets"]
.find(query_dict, projection=projection)
.limit(size)
.skip((page - 1) * size)
)
count = self.db["yyets"].count_documents(query_dict)
f = []
for item in data:
if douban:
douban_data = self.db["douban"].find_one(
{"resourceId": item["data"]["info"]["id"]}, projection=projection
)
if douban_data:
douban_data["posterData"] = base64.b64encode(
douban_data["posterData"]
).decode("u8")
item["data"]["info"]["douban"] = douban_data
else:
item["data"]["info"]["douban"] = {}
f.append(item["data"]["info"])
return dict(data=f, count=count)
class SpamProcess(Mongo):
def ban_spam(self, obj_id: "str"):
obj_id = ObjectId(obj_id)
logging.info("Deleting spam %s", obj_id)
spam = self.db["spam"].find_one({"_id": obj_id})
username = spam["username"]
self.db["spam"].delete_many({"username": username})
# self.db["comment"].delete_many({"username": username})
cf.ban_new_ip(spam["ip"])
return {"status": True}
def restore_spam(self, obj_id: "str"):
obj_id = ObjectId(obj_id)
spam = self.db["spam"].find_one({"_id": obj_id}, projection={"_id": False})
logging.info("Restoring spam %s", spam)
self.db["comment"].insert_one(spam)
self.db["spam"].delete_one({"_id": obj_id})
return {"status": True}
@staticmethod
def request_approval(document: "dict"):
token = os.getenv("TOKEN")
owner = os.getenv("OWNER")
obj_id = document["_id"]
data = {
"text": json.dumps(document, ensure_ascii=False, indent=4),
"chat_id": owner,
"reply_markup": {
"inline_keyboard": [
[
{"text": "approve", "callback_data": f"approve{obj_id}"},
{"text": "ban", "callback_data": f"ban{obj_id}"},
]
]
},
}
api = f"https://api.telegram.org/bot{token}/sendMessage"
resp = requests.post(api, json=data).json()
logging.info("Telegram response: %s", resp)
class Other(Mongo):
def reset_top(self):
# before resetting, save top data to history
json_data = requests.get("http://127.0.0.1:8888/api/top").json()
last_month = time.strftime("%Y-%m", time.localtime(time.time() - 3600 * 24))
json_data["date"] = last_month
json_data["type"] = "top"
self.db["history"].insert_one(json_data)
# save all the views data to history
projection = {"_id": False, "data.info.views": True, "data.info.id": True}
data = (
self.db["yyets"]
.find({}, projection)
.sort("data.info.views", pymongo.DESCENDING)
)
result = {"date": last_month, "type": "detail"}
for datum in data:
rid = str(datum["data"]["info"]["id"])
views = datum["data"]["info"]["views"]
result[rid] = views
self.db["history"].insert_one(result)
# reset
self.db["yyets"].update_many({}, {"$set": {"data.info.views": 0}})
def import_ban_user(self):
usernames = self.db["users"].find(
{"status.disable": True}, projection={"username": True}
)
r = Redis().r
r.delete("user_blacklist")
logging.info("Importing ban users to redis...%s", usernames)
for username in [u["username"] for u in usernames]:
r.hset("user_blacklist", username, 100)
r.close()
class Captcha(Redis):
def get_captcha(self, captcha_id):
chars = "".join([random.choice(predefined_str) for _ in range(4)])
image = ImageCaptcha()
data = image.generate(chars)
self.r.set(captcha_id, chars, ex=captcha_ex)
return (
f"data:image/png;base64,{base64.b64encode(data.getvalue()).decode('ascii')}"
)
def verify_code(self, user_input, captcha_id) -> dict:
correct_code = self.r.get(captcha_id)
if not correct_code:
return {"status": False, "message": "验证码已过期"}
if user_input.lower() == correct_code.lower():
self.r.delete(correct_code)
return {"status": True, "message": "验证通过"}
else:
return {"status": False, "message": "验证码错误"}

View File

@@ -0,0 +1,342 @@
#!/usr/bin/env python3
# coding: utf-8
import contextlib
import json
import logging
import os
import random
import pymongo
import requests
import zhconv
from tqdm import tqdm
from common.utils import ts_date
from databases.base import Mongo, Redis, SearchEngine
from databases.comment import CommentSearch
class Resource(SearchEngine):
def fansub_search(self, class_name: str, kw: str):
class_ = globals().get(class_name)
result = class_().search_preview(kw)
result.pop("class")
if result:
return list(result.values())
else:
return []
def get_resource_data(self, resource_id: int, username: str) -> dict:
data: "dict" = self.db["yyets"].find_one_and_update(
{"data.info.id": resource_id},
{"$inc": {"data.info.views": 1}},
{"_id": False},
)
if not data:
return {}
if username:
user_like_data = self.db["users"].find_one({"username": username})
if user_like_data and resource_id in user_like_data.get("like", []):
data["is_like"] = True
else:
data["is_like"] = False
return data
def search_resource(self, keyword: str, search_type: "str") -> dict:
if os.getenv("MEILISEARCH"):
return self.meili_search(keyword, search_type)
else:
return self.mongodb_search(keyword)
def meili_search(self, keyword: "str", search_type: "str") -> dict:
returned = {"data": [], "comment": [], "extra": []}
if search_type == "default":
yyets = self.search_yyets(keyword)
comment = self.search_comment(keyword)
returned["data"] = yyets
returned["comment"] = comment
return returned
elif search_type == "douban":
douban = self.search_douban(keyword)
returned["data"] = douban
return returned
elif search_type == "fansub":
# TODO disable fansub for now
# fansub = self.search_extra(keyword)
# returned["extra"] = fansub
return returned
else:
return returned
def mongodb_search(self, keyword: str) -> dict:
# convert any text to zh-hans - only for traditional search with MongoDB
keyword = zhconv.convert(keyword, "zh-hans")
zimuzu_data = []
returned = {"data": [], "extra": [], "comment": []}
projection = {"_id": False, "data.info": True}
resource_data = self.db["yyets"].find(
{
"$or": [
{"data.info.cnname": {"$regex": f".*{keyword}.*", "$options": "i"}},
{"data.info.enname": {"$regex": f".*{keyword}.*", "$options": "i"}},
{
"data.info.aliasname": {
"$regex": f".*{keyword}.*",
"$options": "i",
}
},
]
},
projection,
)
for item in resource_data:
item["data"]["info"]["origin"] = "yyets"
zimuzu_data.append(item["data"]["info"])
# get comment
r = CommentSearch().get_comment(1, 2**10, keyword)
c_search = []
for c in r.get("data", []):
comment_rid = c["resource_id"]
d = self.db["yyets"].find_one(
{"data.info.id": comment_rid}, projection={"data.info": True}
)
if d:
c_search.append(
{
"username": c["username"],
"date": c["date"],
"comment": c["content"],
"commentID": c["id"],
"resourceID": comment_rid,
"resourceName": d["data"]["info"]["cnname"],
"origin": "comment",
"hasAvatar": c["hasAvatar"],
}
)
# zimuzu -> comment -> extra
if zimuzu_data:
returned["data"] = zimuzu_data
elif not c_search:
# only returned when no data found
returned["extra"] = self.search_extra(keyword)
# comment data will always be returned
returned["comment"] = c_search
return returned
def search_extra(self, keyword: "str") -> list:
order = os.getenv(
"ORDER", "YYeTsOffline,ZimuxiaOnline,NewzmzOnline,ZhuixinfanOnline"
).split(",")
order.pop(0)
extra = []
with contextlib.suppress(requests.exceptions.RequestException):
for name in order:
extra = self.fansub_search(name, keyword)
if extra:
break
return extra
def patch_resource(self, new_data: dict):
rid = new_data["resource_id"]
new_data.pop("resource_id")
old_data = self.db["yyets"].find_one(
{"data.info.id": rid},
)
new_data["season_cn"] = self.convert_season(new_data["season_num"])
# 1. totally empty resource:
if len(old_data["data"]["list"]) == 0:
new_data["season_cn"] = self.convert_season(new_data["season_num"])
old_data["data"]["list"].append(new_data)
else:
for season in old_data["data"]["list"]:
if new_data["season_num"] in [
season["season_num"],
int(season["season_num"]),
]:
user_format = new_data["formats"][0]
for u in new_data["items"][user_format]:
season["items"][user_format].append(u)
self.db["yyets"].find_one_and_replace({"data.info.id": rid}, old_data)
def add_resource(self, new_data: dict):
rid = self.get_appropriate_id()
new_data["data"]["info"]["id"] = rid
self.db["yyets"].insert_one(new_data)
return {"status": True, "message": "success", "id": rid}
def delete_resource(self, data: dict):
rid = data["resource_id"]
meta = data.get("meta")
if meta:
db_data = self.db["yyets"].find_one({"data.info.id": rid})
for season in db_data["data"]["list"]:
for episode in season["items"].values():
for v in episode:
if (
v["episode"] == meta["episode"]
and v["name"] == meta["name"]
and v["size"] == meta["size"]
and v["dateline"] == meta["dateline"]
):
episode.remove(v)
# replace it
self.db["yyets"].find_one_and_replace({"data.info.id": rid}, db_data)
else:
self.db["yyets"].delete_one({"data.info.id": rid})
def get_appropriate_id(self):
col = self.db["yyets"]
random_id = random.randint(50000, 80000)
data = col.find_one({"data.info.id": random_id}, projection={"_id": True})
if data:
return self.get_appropriate_id()
else:
return random_id
@staticmethod
def convert_season(number: [int, str]):
pass
if number in (0, "0"):
return "正片"
else:
return f"{number}"
class Top(Mongo):
projection = {"_id": False, "data.info": True}
def get_most(self) -> list:
projection = {"_id": False, "like": True}
data = self.db["users"].find({}, projection)
most_like = {}
for item in data:
for _id in item.get("like", []):
most_like[_id] = most_like.get(_id, 0) + 1
most = sorted(most_like, key=most_like.get)
most.reverse()
most_like_data = (
self.db["yyets"]
.find({"data.info.id": {"$in": most}}, self.projection)
.limit(15)
)
return list(most_like_data)
def get_top_resource(self) -> dict:
area_dict = dict(ALL={"$regex": ".*"}, US="美国", JP="日本", KR="韩国", UK="英国")
all_data = {"ALL": "全部"}
for abbr, area in area_dict.items():
data = (
self.db["yyets"]
.find(
{"data.info.area": area, "data.info.id": {"$ne": 233}},
self.projection,
)
.sort("data.info.views", pymongo.DESCENDING)
.limit(15)
)
all_data[abbr] = list(data)
all_data["class"] = area_dict
return all_data
class ResourceLatest(Mongo):
@staticmethod
def get_latest_resource() -> dict:
redis = Redis().r
key = "latest-resource"
latest = redis.get(key)
if latest:
logging.info("Cache hit for latest resource")
latest = json.loads(latest)
latest["data"] = latest["data"][:100]
else:
logging.warning("Cache miss for latest resource")
latest = ResourceLatest().query_db()
redis.set(key, json.dumps(latest, ensure_ascii=False))
return latest
def query_db(self) -> dict:
col = self.db["yyets"]
projection = {"_id": False, "status": False, "info": False}
episode_data = {}
for res in tqdm(col.find(projection=projection), total=col.count_documents({})):
for season in res["data"].get("list", []):
for item in season["items"].values():
for single in item:
ts = single["dateline"]
res_name = res["data"]["info"]["cnname"]
name = "{}-{}".format(res_name, single["name"])
size = single["size"]
episode_data[name] = {
"timestamp": ts,
"size": size,
"resource_id": res["data"]["info"]["id"],
"res_name": res_name,
"date": ts_date(int(ts)),
}
sorted_res: list = sorted(
episode_data.items(), key=lambda x: x[1]["timestamp"], reverse=True
)
limited_res = dict(sorted_res[:100])
ok = []
for k, v in limited_res.items():
t = {"name": k}
t.update(v)
ok.append(t)
return dict(data=ok)
def refresh_latest_resource(self):
redis = Redis().r
logging.info("Getting new resources...")
latest = self.query_db()
redis.set("latest-resource", json.dumps(latest, ensure_ascii=False))
logging.info("latest-resource data refreshed.")
class Name(Mongo):
def get_names(self, is_readable: [str, bool]) -> dict:
if is_readable:
aggregation = [
{
"$project": {
"name": {
"$concat": [
"$data.info.area",
"$data.info.channel_cn",
": ",
"$data.info.cnname",
" ",
"$data.info.enname",
" ",
"$data.info.aliasname",
]
},
"_id": False,
}
}
]
query_cursor = self.db["yyets"].aggregate(aggregation)
else:
projection = {
"_id": False,
"data.info.cnname": True,
"data.info.enname": True,
"data.info.aliasname": True,
"data.info.channel_cn": True,
}
query_cursor = self.db["yyets"].find({}, projection)
data = []
for i in query_cursor:
data.extend(i.values())
return dict(data=data)

232
yyetsweb/databases/user.py Normal file
View File

@@ -0,0 +1,232 @@
#!/usr/bin/env python3
# coding: utf-8
import os
import random
import re
from http import HTTPStatus
import filetype
import pymongo
from passlib.handlers.pbkdf2 import pbkdf2_sha256
from common.utils import send_mail, ts_date
from databases.base import Mongo, Redis
class Like(Mongo):
projection = {"_id": False, "data.info": True}
def get_user_like(self, username: str) -> list:
like_list = self.db["users"].find_one({"username": username}).get("like", [])
data = (
self.db["yyets"]
.find({"data.info.id": {"$in": like_list}}, self.projection)
.sort("data.info.views", pymongo.DESCENDING)
)
return list(data)
def add_remove_fav(self, resource_id: int, username: str) -> dict:
returned = {"status_code": 0, "message": ""}
like_list: list = (
self.db["users"].find_one({"username": username}).get("like", [])
)
if resource_id in like_list:
returned["status_code"] = HTTPStatus.OK
returned["message"] = "已取消收藏"
like_list.remove(resource_id)
else:
returned["status_code"] = HTTPStatus.CREATED
returned["message"] = "已添加收藏"
like_list.append(resource_id)
value = dict(like=like_list)
self.db["users"].update_one({"username": username}, {"$set": value})
return returned
class User(Mongo, Redis):
def login_user(
self,
username: str,
password: str,
captcha: str,
captcha_id: str,
ip: str,
browser: str,
) -> dict:
# verify captcha in the first place.
correct_captcha = self.r.get(captcha_id)
if correct_captcha is None:
return {
"status_code": HTTPStatus.BAD_REQUEST,
"message": "验证码已过期",
"status": False,
}
elif correct_captcha.lower() == captcha.lower():
self.r.expire(captcha_id, 0)
else:
return {
"status_code": HTTPStatus.FORBIDDEN,
"message": "验证码错误",
"status": False,
}
# check user account is locked.
data = self.db["users"].find_one({"username": username}) or {}
if data.get("status", {}).get("disable"):
return {
"status_code": HTTPStatus.FORBIDDEN,
"status": False,
"message": data.get("status", {}).get("reason"),
}
returned_value = {"status_code": 0, "message": ""}
if data:
stored_password = data["password"]
if pbkdf2_sha256.verify(password, stored_password):
returned_value["status_code"] = HTTPStatus.OK
else:
returned_value["status_code"] = HTTPStatus.FORBIDDEN
returned_value["message"] = "用户名或密码错误"
else:
if os.getenv("DISABLE_REGISTER"):
return {"status_code": HTTPStatus.BAD_REQUEST, "message": "本站已经暂停注册"}
# register
hash_value = pbkdf2_sha256.hash(password)
try:
self.db["users"].insert_one(
dict(
username=username,
password=hash_value,
date=ts_date(),
ip=ip,
browser=browser,
)
)
returned_value["status_code"] = HTTPStatus.CREATED
except Exception as e:
returned_value["status_code"] = HTTPStatus.INTERNAL_SERVER_ERROR
returned_value["message"] = str(e)
returned_value["username"] = data.get("username")
returned_value["group"] = data.get("group", ["user"])
return returned_value
def get_user_info(self, username: str) -> dict:
projection = {"_id": False, "password": False}
data = self.db["users"].find_one({"username": username}, projection)
data.update(group=data.get("group", ["user"]))
data["hasAvatar"] = bool(data.pop("avatar", None))
return data
def update_user_last(self, username: str, now_ip: str) -> None:
self.db["users"].update_one(
{"username": username},
{"$set": {"lastDate": (ts_date()), "lastIP": now_ip}},
)
def update_user_info(self, username: str, data: dict) -> dict:
redis = Redis().r
valid_fields = ["email"]
valid_data = {}
for field in valid_fields:
if data.get(field):
valid_data[field] = data[field]
email_regex = r"@gmail\.com|@outlook\.com|@qq\.com|@163\.com"
if valid_data.get("email") and not re.findall(
email_regex, valid_data.get("email"), re.IGNORECASE
):
return {
"status_code": HTTPStatus.BAD_REQUEST,
"status": False,
"message": "不支持的邮箱",
}
elif valid_data.get("email"):
# rate limit
user_email = valid_data.get("email")
timeout_key = f"timeout-{username}"
if redis.get(timeout_key):
return {
"status_code": HTTPStatus.TOO_MANY_REQUESTS,
"status": False,
"message": f"验证次数过多,请于{redis.ttl(timeout_key)}秒后尝试",
}
verify_code = random.randint(10000, 99999)
valid_data["email"] = {"verified": False, "address": user_email}
# send email confirm
subject = "[人人影视下载分享站] 请验证你的邮箱"
text = (
f"请输入如下验证码完成你的邮箱认证。验证码有效期为24小时。<br>"
f"如果您未有此请求,请忽略此邮件。<br><br>验证码: {verify_code}"
)
context = {"username": username, "text": text}
send_mail(user_email, subject, context)
# 发送成功才设置缓存
redis.set(timeout_key, username, ex=1800)
redis.hset(user_email, mapping={"code": verify_code, "wrong": 0})
redis.expire(user_email, 24 * 3600)
self.db["users"].update_one({"username": username}, {"$set": valid_data})
return {
"status_code": HTTPStatus.CREATED,
"status": True,
"message": "邮件已经成功发送",
}
class UserAvatar(User, Mongo):
def add_avatar(self, username, avatar):
self.db["users"].update_one(
{"username": username}, {"$set": {"avatar": avatar}}
)
return {"status_code": HTTPStatus.CREATED, "message": "头像上传成功"}
def get_avatar(self, username):
user = self.db["users"].find_one({"username": username})
img = user.get("avatar", b"")
mime = filetype.guess_mime(img)
return {"image": img, "content_type": mime}
class UserEmail(Mongo):
def verify_email(self, username, code):
r = Redis().r
email = self.db["users"].find_one({"username": username})["email"]["address"]
verify_data = r.hgetall(email)
wrong_count = int(verify_data["wrong"])
MAX = 10
if wrong_count >= MAX:
self.db["users"].update_one(
{"username": username},
{"$set": {"status": {"disable": True, "reason": "verify email crack"}}},
)
return {
"status": False,
"status_code": HTTPStatus.FORBIDDEN,
"message": "账户已被封锁",
}
correct_code = verify_data["code"]
if correct_code == code:
r.expire(email, 0)
r.expire(f"timeout-{email}", 0)
self.db["users"].update_one(
{"username": username}, {"$set": {"email.verified": True}}
)
return {
"status": True,
"status_code": HTTPStatus.CREATED,
"message": "邮箱已经验证成功",
}
else:
r.hset(email, "wrong", wrong_count + 1)
return {
"status": False,
"status_code": HTTPStatus.FORBIDDEN,
"message": f"验证码不正确。你还可以尝试 {MAX - wrong_count}",
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,11 @@
#!/usr/bin/env python3
# coding: utf-8
import json
from tornado import escape
from common.utils import Cloudflare, setup_logger
setup_logger()
cf = Cloudflare()
escape.json_encode = lambda value: json.dumps(value, ensure_ascii=False)

120
yyetsweb/handlers/base.py Normal file
View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
# coding: utf-8
import contextlib
import importlib
import json
import logging
import pathlib
from concurrent.futures import ThreadPoolExecutor
from http import HTTPStatus
from pathlib import Path
from tornado import gen, web
from tornado.concurrent import run_on_executor
from databases.base import Redis
from handlers import cf
index = (
pathlib.Path(__file__).parent.parent.joinpath("templates", "index.html").as_posix()
)
filename = Path(__file__).name.split(".")[0]
class BaseHandler(web.RequestHandler):
key = "user_blacklist"
filename = filename
executor = ThreadPoolExecutor(200)
def __init__(self, application, request, **kwargs):
super().__init__(application, request, **kwargs)
self.json = {}
with contextlib.suppress(ValueError):
self.json: dict = json.loads(self.request.body)
class_name = self.__class__.__name__.split("Handler")[0]
module = importlib.import_module(f"databases.{self.filename}")
self.instance = getattr(module, class_name)()
self.r = Redis().r
def prepare(self):
if self.check_request():
self.set_status(HTTPStatus.FORBIDDEN)
self.finish()
def data_received(self, chunk):
pass
def check_request(self):
ban = self.__ip_check()
user = self.__user_check()
result = ban or user
if result:
self.ban()
return result
def get_real_ip(self):
x_real = self.request.headers.get("X-Real-IP")
remote_ip = self.request.remote_ip
logging.debug("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip)
return x_real or remote_ip
def ban(self):
ip = self.get_real_ip()
self.r.incr(ip)
count = int(self.r.get(ip))
# ban rule: (count-10)*600
if count <= 10:
ex = 120
else:
ex = (count - 10) * 600
if count >= 30:
cf.ban_new_ip(ip)
self.r.set(ip, count, ex)
user = self.get_current_user()
if user:
self.r.hincrby(self.key, user)
def get_current_user(self) -> str:
username = self.get_secure_cookie("username") or b""
return username.decode("u8")
def __user_check(self):
count = self.r.hget(self.key, self.get_current_user()) or 0
count = int(count)
if count >= 20:
return True
def __ip_check(self):
d = self.r.get(self.get_real_ip()) or 0
if int(d) >= 10:
return True
def write_error(self, status_code, **kwargs):
if status_code in [
HTTPStatus.FORBIDDEN,
HTTPStatus.UNAUTHORIZED,
HTTPStatus.NOT_FOUND,
HTTPStatus.INTERNAL_SERVER_ERROR,
]:
self.write(str(kwargs.get("exc_info")))
class IndexHandler(BaseHandler):
@run_on_executor()
def send_index(self):
with open(index, encoding="u8") as f:
html = f.read()
return html
@gen.coroutine
def get(self):
resp = yield self.send_index()
self.write(resp)
class NotFoundHandler(BaseHandler):
def get(self):
# for react app
self.render(index)

View File

@@ -0,0 +1,224 @@
#!/usr/bin/env python3
# coding: utf-8
import re
from http import HTTPStatus
from pathlib import Path
from tornado import gen, web
from tornado.concurrent import run_on_executor
from handlers.base import BaseHandler
filename = Path(__file__).name.split(".")[0]
class CommentHandler(BaseHandler):
filename = filename
@staticmethod
def hide_phone(data: list):
for item in data:
if item["username"].isdigit() and len(item["username"]) == 11:
item["username"] = re.sub(
r"(\d{3})\d{4}(\d{4})", r"\g<1>****\g<2>", item["username"]
)
return data
@run_on_executor()
def get_comment(self):
resource_id = int(self.get_argument("resource_id", "0"))
size = int(self.get_argument("size", "5"))
page = int(self.get_argument("page", "1"))
inner_size = int(self.get_argument("inner_size", "3"))
inner_page = int(self.get_argument("inner_page", "1"))
comment_id = self.get_argument("comment_id", None)
if not resource_id:
self.set_status(HTTPStatus.BAD_REQUEST)
return {"status": False, "message": "请提供resource id"}
comment_data = self.instance.get_comment(
resource_id,
page,
size,
inner_size=inner_size,
inner_page=inner_page,
comment_id=comment_id,
)
self.hide_phone((comment_data["data"]))
return comment_data
@run_on_executor()
def add_comment(self):
payload = self.json
captcha = payload["captcha"]
captcha_id = payload["id"]
content = payload["content"]
resource_id = payload["resource_id"]
comment_id = payload.get("comment_id")
real_ip = self.get_real_ip()
username = self.get_current_user()
browser = self.request.headers["user-agent"]
result = self.instance.add_comment(
captcha,
captcha_id,
content,
resource_id,
real_ip,
username,
browser,
comment_id,
)
self.set_status(result["status_code"])
return result
@run_on_executor()
def delete_comment(self):
# need resource_id & id
# payload = {"id": "obj_id"}
payload = self.json
username = self.get_current_user()
comment_id = payload["comment_id"]
if self.instance.is_admin(username):
result = self.instance.delete_comment(comment_id)
self.set_status(result["status_code"])
return result
else:
self.set_status(HTTPStatus.UNAUTHORIZED)
return {"count": 0, "message": "You're unauthorized to delete comment."}
@gen.coroutine
def get(self):
resp = yield self.get_comment()
self.write(resp)
@gen.coroutine
@web.authenticated
def post(self):
resp = yield self.add_comment()
self.write(resp)
@gen.coroutine
@web.authenticated
def delete(self):
resp = yield self.delete_comment()
self.write(resp)
class CommentReactionHandler(BaseHandler):
filename = filename
@run_on_executor()
def comment_reaction(self):
self.json.update(method=self.request.method)
username = self.get_current_user()
result = self.instance.react_comment(username, self.json)
self.set_status(result.get("status_code"))
return result
@gen.coroutine
@web.authenticated
def post(self):
resp = yield self.comment_reaction()
self.write(resp)
@gen.coroutine
@web.authenticated
def delete(self):
resp = yield self.comment_reaction()
self.write(resp)
class CommentChildHandler(CommentHandler):
filename = filename
@run_on_executor()
def get_comment(self):
parent_id = self.get_argument("parent_id", "0")
size = int(self.get_argument("size", "3"))
page = int(self.get_argument("page", "1"))
if not parent_id:
self.set_status(HTTPStatus.BAD_REQUEST)
return {"status": False, "message": "请提供 parent_id"}
comment_data = self.instance.get_comment(parent_id, page, size)
self.hide_phone((comment_data["data"]))
return comment_data
@gen.coroutine
def get(self):
resp = yield self.get_comment()
self.write(resp)
class CommentNewestHandler(CommentHandler):
filename = filename
@run_on_executor()
def get_comment(self):
size = int(self.get_argument("size", "5"))
page = int(self.get_argument("page", "1"))
comment_data = self.instance.get_comment(page, size)
self.hide_phone((comment_data["data"]))
return comment_data
@gen.coroutine
def get(self):
resp = yield self.get_comment()
self.write(resp)
class CommentSearchHandler(CommentHandler):
filename = filename
@run_on_executor()
def search_comment(self):
size = int(self.get_argument("size", "5"))
page = int(self.get_argument("page", "1"))
keyword = self.get_argument("keyword", "")
comment_data = self.instance.get_comment(page, size, keyword)
self.hide_phone((comment_data["data"]))
return comment_data
@gen.coroutine
def get(self):
resp = yield self.search_comment()
self.write(resp)
class NotificationHandler(BaseHandler):
filename = filename
@run_on_executor()
def get_notification(self):
username = self.get_current_user()
size = int(self.get_argument("size", "5"))
page = int(self.get_argument("page", "1"))
return self.instance.get_notification(username, page, size)
@run_on_executor()
def update_notification(self):
username = self.get_current_user()
verb = self.json["verb"]
comment_id = self.json["comment_id"]
if verb not in ["read", "unread"]:
self.set_status(HTTPStatus.BAD_REQUEST)
return {"status": False, "message": "verb: read or unread"}
self.set_status(HTTPStatus.CREATED)
return self.instance.update_notification(username, verb, comment_id)
@gen.coroutine
@web.authenticated
def get(self):
resp = yield self.get_notification()
self.write(resp)
@gen.coroutine
@web.authenticated
def patch(self):
resp = yield self.update_notification()
self.write(resp)

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
# coding: utf-8
from http import HTTPStatus
from pathlib import Path
import filetype
from tornado import gen
from tornado.concurrent import run_on_executor
from handlers.base import BaseHandler
filename = Path(__file__).name.split(".")[0]
class DoubanHandler(BaseHandler):
filename = filename
@run_on_executor()
def douban_data(self):
rid = self.get_query_argument("resource_id")
data = self.instance.get_douban_data(int(rid))
data.pop("posterData")
return data
def get_image(self) -> bytes:
rid = self.get_query_argument("resource_id")
return self.instance.get_douban_image(int(rid))
@gen.coroutine
def get(self):
_type = self.get_query_argument("type", None)
if _type == "image":
data = self.get_image()
self.set_header("content-type", filetype.guess_mime(data))
self.write(data)
else:
resp = yield self.douban_data()
self.write(resp)
class DoubanReportHandler(BaseHandler):
class_name = "DoubanReportResource"
@run_on_executor()
def get_error(self):
return self.instance.get_error()
@run_on_executor()
def report_error(self):
data = self.json
user_captcha = data["captcha_id"]
captcha_id = data["id"]
content = data["content"]
resource_id = data["resource_id"]
returned = self.instance.report_error(
user_captcha, captcha_id, content, resource_id
)
status_code = returned.get("status_code", HTTPStatus.CREATED)
self.set_status(status_code)
return self.instance.report_error(
user_captcha, captcha_id, content, resource_id
)
@gen.coroutine
def post(self):
resp = yield self.report_error()
self.write(resp)
@gen.coroutine
def get(self):
resp = yield self.get_error()
self.write(resp)

View File

@@ -0,0 +1,128 @@
#!/usr/bin/env python3
# coding: utf-8
import json
import time
from datetime import date, timedelta
from http import HTTPStatus
from pathlib import Path
from tornado import gen
from tornado.concurrent import run_on_executor
from handlers.base import BaseHandler
filename = Path(__file__).name.split(".")[0]
class MetricsHandler(BaseHandler):
filename = filename
@run_on_executor()
def set_metrics(self):
payload = self.json
metrics_type = payload["type"]
self.instance.set_metrics(metrics_type)
self.set_status(HTTPStatus.CREATED)
return {}
@run_on_executor()
def get_metrics(self):
if not self.instance.is_admin(self.get_current_user()):
self.set_status(HTTPStatus.NOT_FOUND)
return ""
# only return latest 7 days. with days parameter to generate different range
from_date = self.get_query_argument("from", None)
to_date = self.get_query_argument("to", None)
if to_date is None:
to_date = time.strftime("%Y-%m-%d", time.localtime())
if from_date is None:
from_date = time.strftime(
"%Y-%m-%d", time.localtime(time.time() - 3600 * 24 * 7)
)
return self.instance.get_metrics(from_date, to_date)
@gen.coroutine
def get(self):
resp = yield self.get_metrics()
self.write(resp)
@gen.coroutine
def post(self):
resp = yield self.set_metrics()
self.write(resp)
class GrafanaIndexHandler(BaseHandler):
def get(self):
self.write({})
class GrafanaSearchHandler(BaseHandler):
def post(self):
data = [
"resource",
"top",
"home",
"search",
"extra",
"discuss",
"multiDownload",
"download",
"user",
"share",
"me",
"database",
"help",
"backOld",
"favorite",
"unFavorite",
"comment",
]
self.write(json.dumps(data))
class GrafanaQueryHandler(BaseHandler):
filename = filename
@staticmethod
def generate_date_series(start: str, end: str) -> list:
start_int = [int(i) for i in start.split("-")]
end_int = [int(i) for i in end.split("-")]
sdate = date(*start_int) # start date
edate = date(*end_int) # end date
delta = edate - sdate # as timedelta
days = []
for i in range(delta.days + 1):
day = sdate + timedelta(days=i)
days.append(day.strftime("%Y-%m-%d"))
return days
@staticmethod
def time_str_int(text):
return time.mktime(time.strptime(text, "%Y-%m-%d"))
def post(self):
payload = self.json
start = payload["range"]["from"].split("T")[0]
end = payload["range"]["to"].split("T")[0]
date_series = self.generate_date_series(start, end)
targets = [i["target"] for i in payload["targets"] if i["target"]]
grafana_data = []
for target in targets:
data_points = []
result = self.instance.get_grafana_data(date_series)
i: dict
for i in result:
datum = (
[i[target], self.time_str_int(i["date"]) * 1000]
if i.get(target)
else []
)
data_points.append(datum)
temp = {"target": target, "datapoints": data_points}
grafana_data.append(temp)
self.write(json.dumps(grafana_data))

174
yyetsweb/handlers/oauth.py Normal file
View File

@@ -0,0 +1,174 @@
#!/usr/bin/env python3
# coding: utf-8
import logging
import os
from pathlib import Path
from urllib.parse import urlencode
import requests
from tornado.auth import GoogleOAuth2Mixin, OAuth2Mixin, TwitterMixin
from handlers.base import BaseHandler
filename = Path(__file__).name.split(".")[0]
class OAuth2Handler(BaseHandler, OAuth2Mixin):
filename = filename
_OAUTH_AUTHORIZE_URL = ""
_OAUTH_ACCESS_TOKEN_URL = ""
_OAUTH_API_REQUEST_URL = ""
def add_oauth_user(self, username, unique, source):
logging.info("User %s login with %s now...", username, source)
ip = self.get_real_ip()
browser = self.request.headers["user-agent"]
result = self.instance.add_user(username, ip, browser, unique, source)
if result["status"] == "success":
self.set_secure_cookie("username", username, 365)
self.redirect("/login?" + urlencode(result))
def get_authenticated_user(
self, client_id: str, client_secret: str, code: str, extra_fields: dict = None
):
args = {"code": code, "client_id": client_id, "client_secret": client_secret}
if extra_fields:
args.update(extra_fields)
return requests.post(
self._OAUTH_ACCESS_TOKEN_URL,
headers={"Accept": "application/json"},
data=args,
).json()
def oauth2_sync_request(self, access_token, extra_fields=None):
return requests.get(
self._OAUTH_API_REQUEST_URL,
headers={"Authorization": f"Bearer {access_token}"},
params=extra_fields,
).json()
def get_secret(self, settings_key):
settings = self.settings.get(settings_key)
client_id = settings.get("key")
client_secret = settings.get("secret")
redirect_uri = os.getenv("DOMAIN") + self.request.path
return client_id, client_secret, redirect_uri
class GitHubOAuth2LoginHandler(OAuth2Handler):
_OAUTH_AUTHORIZE_URL = "https://github.com/login/oauth/authorize"
_OAUTH_ACCESS_TOKEN_URL = "https://github.com/login/oauth/access_token"
_OAUTH_API_REQUEST_URL = "https://api.github.com/user"
def get(self):
client_id, client_secret, redirect_uri = self.get_secret("github_oauth")
code = self.get_argument("code", None)
if code:
access = self.get_authenticated_user(client_id, client_secret, code)
resp = self.oauth2_sync_request(access["access_token"])
username = resp["login"]
db_id = resp["id"]
self.add_oauth_user(username, db_id, "GitHub")
else:
self.authorize_redirect(
redirect_uri=redirect_uri,
client_id=client_id,
scope=[],
response_type="code",
)
class MSOAuth2LoginHandler(OAuth2Handler):
_OAUTH_AUTHORIZE_URL = (
"https://login.microsoftonline.com/common/oauth2/v2.0/authorize"
)
_OAUTH_ACCESS_TOKEN_URL = (
"https://login.microsoftonline.com/common/oauth2/v2.0/token"
)
_OAUTH_API_REQUEST_URL = "https://graph.microsoft.com/v1.0/me"
def get(self):
client_id, client_secret, redirect_uri = self.get_secret("ms_oauth")
code = self.get_argument("code", None)
if code:
access = self.get_authenticated_user(
client_id,
client_secret,
code,
{"grant_type": "authorization_code", "redirect_uri": redirect_uri},
)
resp = self.oauth2_sync_request(access["access_token"])
email = resp["userPrincipalName"]
uid = resp["id"]
self.add_oauth_user(email, uid, "Microsoft")
else:
self.authorize_redirect(
redirect_uri=redirect_uri,
client_id=client_id,
scope=["https://graph.microsoft.com/User.Read"],
response_type="code",
)
class GoogleOAuth2LoginHandler(GoogleOAuth2Mixin, OAuth2Handler):
async def get(self):
redirect_uri = os.getenv("DOMAIN") + self.request.path
code = self.get_argument("code", None)
if code:
access = await self.get_authenticated_user(
redirect_uri=redirect_uri, code=code
)
user = await self.oauth2_request(
"https://www.googleapis.com/oauth2/v1/userinfo",
access_token=access["access_token"],
)
email = user["email"]
# Google's email can't be changed
self.add_oauth_user(email, email, "Google")
else:
self.authorize_redirect(
redirect_uri=redirect_uri,
client_id=self.settings["google_oauth"]["key"],
scope=["email"],
response_type="code",
extra_params={"approval_prompt": "auto"},
)
class TwitterOAuth2LoginHandler(TwitterMixin, OAuth2Handler):
async def get(self):
if self.get_argument("oauth_token", None):
user = await self.get_authenticated_user()
username = user["username"]
id_str = user["id_str"]
self.add_oauth_user(username, id_str, "Twitter")
else:
await self.authorize_redirect(extra_params={"x_auth_access_type": "read"})
class FacebookAuth2LoginHandler(OAuth2Handler):
_OAUTH_AUTHORIZE_URL = "https://www.facebook.com/v16.0/dialog/oauth"
_OAUTH_ACCESS_TOKEN_URL = "https://graph.facebook.com/oauth/access_token"
_OAUTH_API_REQUEST_URL = "https://graph.facebook.com/me"
def get(self):
client_id, client_secret, redirect_uri = self.get_secret("fb_oauth")
code = self.get_argument("code", None)
if code:
access = self.get_authenticated_user(
client_id, client_secret, code, {"redirect_uri": redirect_uri}
)
resp = self.oauth2_sync_request(
access["access_token"], {"fields": "name,id"}
)
# Facebook doesn't allow to get email except for business accounts
uid = resp["id"]
email = "{}_{}".format(resp["name"], uid)
self.add_oauth_user(email, uid, "Facebook")
else:
self.authorize_redirect(
redirect_uri=redirect_uri,
client_id=client_id,
)

197
yyetsweb/handlers/other.py Normal file
View File

@@ -0,0 +1,197 @@
#!/usr/bin/env python3
# coding: utf-8
import logging
import os
import pathlib
import time
from hashlib import sha1
from http import HTTPStatus
from pathlib import Path
from tornado import gen, web
from tornado.concurrent import run_on_executor
from common.utils import ts_date
from databases.base import Redis
from handlers.base import BaseHandler
filename = Path(__file__).name.split(".")[0]
class AnnouncementHandler(BaseHandler):
filename = filename
@run_on_executor()
def get_announcement(self):
size = int(self.get_argument("size", "5"))
page = int(self.get_argument("page", "1"))
return self.instance.get_announcement(page, size)
@run_on_executor()
def add_announcement(self):
username = self.get_current_user()
if not self.instance.is_admin(username):
self.set_status(HTTPStatus.FORBIDDEN)
return {"message": "只有管理员可以设置公告"}
payload = self.json
content = payload["content"]
real_ip = self.get_real_ip()
browser = self.request.headers["user-agent"]
self.instance.add_announcement(username, content, real_ip, browser)
self.set_status(HTTPStatus.CREATED)
return {"message": "添加成功"}
@gen.coroutine
def get(self):
resp = yield self.get_announcement()
self.write(resp)
@gen.coroutine
@web.authenticated
def post(self):
resp = yield self.add_announcement()
self.write(resp)
class DBDumpHandler(BaseHandler):
@staticmethod
def sizeof_fmt(num: int, suffix="B"):
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, "Yi", suffix)
@staticmethod
def checksum(file_path) -> str:
sha = sha1()
try:
with open(file_path, "rb") as f:
sha.update(f.read())
checksum = sha.hexdigest()
except Exception as e:
checksum = str(e)
return checksum
@run_on_executor()
@Redis.cache(3600)
def get_hash(self):
file_list = [
"templates/dump/yyets_mongo.gz",
"templates/dump/yyets_mysql.zip",
"templates/dump/yyets_sqlite.zip",
]
result = {}
for fp in file_list:
checksum = self.checksum(fp)
creation = ts_date(os.stat(fp).st_ctime)
size = self.sizeof_fmt(os.stat(fp).st_size)
result[Path(fp).name] = {
"checksum": checksum,
"date": creation,
"size": size,
}
return result
@gen.coroutine
def get(self):
resp = yield self.get_hash()
self.write(resp)
class CategoryHandler(BaseHandler):
filename = filename
@run_on_executor()
def get_data(self):
self.json = {k: self.get_argument(k) for k in self.request.arguments}
self.json["size"] = int(self.json.get("size", 15))
self.json["page"] = int(self.json.get("page", 1))
self.json["douban"] = self.json.get("douban", False)
return self.instance.get_category(self.json)
@gen.coroutine
def get(self):
resp = yield self.get_data()
self.write(resp)
class CaptchaHandler(BaseHandler):
filename = filename
@run_on_executor()
def verify_captcha(self):
data = self.json
captcha_id = data.get("id", None)
userinput = data.get("captcha", None)
if captcha_id is None or userinput is None:
self.set_status(HTTPStatus.BAD_REQUEST)
return "Please supply id or captcha parameter."
returned = self.instance.verify_code(userinput, captcha_id)
status_code = returned.get("status")
if not status_code:
self.set_status(HTTPStatus.FORBIDDEN)
return returned
@run_on_executor()
def captcha(self):
request_id = self.get_argument("id", None)
if request_id is None:
self.set_status(HTTPStatus.BAD_REQUEST)
return "Please supply id parameter."
return self.instance.get_captcha(request_id)
@gen.coroutine
def get(self):
resp = yield self.captcha()
self.write(resp)
@gen.coroutine
def post(self):
resp = yield self.verify_captcha()
self.write(resp)
class BlacklistHandler(BaseHandler):
filename = filename
@run_on_executor()
def get_black_list(self):
return self.instance.get_black_list()
@gen.coroutine
def get(self):
resp = yield self.get_black_list()
self.write(resp)
class SpamProcessHandler(BaseHandler):
filename = filename
def process(self, method):
obj_id = self.json.get("obj_id")
token = self.json.get("token")
ua = self.request.headers["user-agent"]
ip = self.get_real_ip()
logging.info("Authentication %s(%s) for spam API now...", ua, ip)
if token == os.getenv("TOKEN"):
return getattr(self.instance, method)(obj_id)
else:
self.set_status(HTTPStatus.FORBIDDEN)
return {
"status": False,
"message": "This token is not allowed to access this API",
}
@gen.coroutine
def post(self):
self.write(self.process("restore_spam"))
@gen.coroutine
def delete(self):
self.write(self.process("ban_spam"))

View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python3
# coding: utf-8
import logging
import os
from http import HTTPStatus
from pathlib import Path
from tornado import gen
from tornado.concurrent import run_on_executor
from handlers import cf
from handlers.base import BaseHandler
filename = Path(__file__).name.split(".")[0]
class ResourceHandler(BaseHandler):
filename = filename
@run_on_executor()
def get_resource_data(self):
resource_id = int(self.get_query_argument("id"))
username = self.get_current_user()
if str(resource_id) in os.getenv("HIDDEN_RESOURCE", "").split(","):
self.set_status(HTTPStatus.NOT_FOUND)
return {"status": 0, "info": "资源已隐藏"}
data = self.instance.get_resource_data(resource_id, username)
if not data:
self.ban()
self.set_status(HTTPStatus.NOT_FOUND)
data = {}
return data
@run_on_executor()
def search_resource(self):
referer = self.request.headers.get("referer")
if not referer:
cf.ban_new_ip(self.get_real_ip())
kw = self.get_query_argument("keyword").lower()
search_type = self.get_query_argument("type", "default")
self.set_header(
"search-engine", "Meilisearch" if os.getenv("MEILISEARCH") else "MongoDB"
)
return self.instance.search_resource(kw, search_type)
@gen.coroutine
def get(self):
if self.get_query_argument("id", None):
resp = yield self.get_resource_data()
elif self.get_query_argument("keyword", None):
resp = yield self.search_resource()
else:
resp = "error"
self.write(resp)
class ResourceLatestHandler(BaseHandler):
filename = filename
@run_on_executor()
def get_latest(self):
size = int(self.get_query_argument("size", "100"))
result = self.instance.get_latest_resource()
result["data"] = result["data"][:size]
return result
@gen.coroutine
def get(self):
resp = yield self.get_latest()
self.write(resp)
class TopHandler(BaseHandler):
filename = filename
def get_user_like(self) -> list:
username = self.get_current_user()
return self.instance.get_user_like(username)
def get_most(self) -> list:
return self.instance.get_most()
@run_on_executor()
def get_top_resource(self):
return self.instance.get_top_resource()
@gen.coroutine
def get(self):
resp = yield self.get_top_resource()
self.write(resp)
class NameHandler(BaseHandler):
filename = filename
@run_on_executor()
def get_names(self):
is_readable = self.get_query_argument("human", None)
return self.instance.get_names(is_readable)
@gen.coroutine
def get(self):
resp = yield self.get_names()
self.write(resp)

170
yyetsweb/handlers/user.py Normal file
View File

@@ -0,0 +1,170 @@
#!/usr/bin/env python3
# coding: utf-8
from http import HTTPStatus
from pathlib import Path
from tornado import gen, web
from tornado.concurrent import run_on_executor
from handlers.base import BaseHandler
filename = Path(__file__).name.split(".")[0]
class UserHandler(BaseHandler):
filename = filename
def set_login(self, username):
self.set_secure_cookie("username", username, 365)
@run_on_executor()
def login(self):
data = self.json
username = data["username"]
password = data["password"]
captcha = data.get("captcha")
captcha_id = data.get("captcha_id", "")
ip = self.get_real_ip()
browser = self.request.headers["user-agent"]
response = self.instance.login_user(
username, password, captcha, captcha_id, ip, browser
)
if response["status_code"] in (HTTPStatus.CREATED, HTTPStatus.OK):
self.set_login(username)
else:
self.set_status(response["status_code"])
return response
@run_on_executor()
def update_info(self):
result = self.instance.update_user_info(self.current_user, self.json)
self.set_status(result.get("status_code", HTTPStatus.IM_A_TEAPOT))
return result
@run_on_executor()
def get_user_info(self) -> dict:
username = self.get_current_user()
if username:
data = self.instance.get_user_info(username)
else:
# self.set_status(HTTPStatus.UNAUTHORIZED)
self.clear_cookie("username")
data = {"message": "Please try to login"}
return data
@gen.coroutine
def post(self):
resp = yield self.login()
self.write(resp)
@gen.coroutine
def get(self):
resp = yield self.get_user_info()
self.write(resp)
# everytime we receive a GET request to this api, we'll update last_date and last_ip
username = self.get_current_user()
if username:
now_ip = self.get_real_ip()
self.instance.update_user_last(username, now_ip)
@gen.coroutine
@web.authenticated
def patch(self):
resp = yield self.update_info()
self.write(resp)
class UserAvatarHandler(BaseHandler):
filename = filename
@run_on_executor()
def update_avatar(self):
username = self.get_current_user()
if not username:
self.set_status(HTTPStatus.UNAUTHORIZED)
self.clear_cookie("username")
return {"message": "Please try to login"}
file = self.request.files["image"][0]["body"]
if len(file) > 10 * 1024 * 1024:
self.set_status(HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
return {"message": "图片大小不可以超过10MB"}
return self.instance.add_avatar(username, file)
@run_on_executor()
def get_avatar(self, username):
data = self.instance.get_avatar(username)
if data["image"]:
self.set_header("Content-Type", data["content_type"])
return data["image"]
self.set_status(HTTPStatus.NOT_FOUND)
return b""
@gen.coroutine
def post(self, _):
resp = yield self.update_avatar()
self.write(resp)
@gen.coroutine
def get(self, username):
resp = yield self.get_avatar(username)
self.write(resp)
@gen.coroutine
def head(self, username):
resp = yield self.get_avatar(username)
self.write(resp)
class LikeHandler(BaseHandler):
filename = filename
@run_on_executor()
def like_data(self):
username = self.get_current_user()
return {"LIKE": self.instance.get_user_like(username)}
@gen.coroutine
@web.authenticated
def get(self):
resp = yield self.like_data()
self.write(resp)
@run_on_executor()
def add_remove_fav(self):
data = self.json
resource_id = int(data["resource_id"])
username = self.get_current_user()
if username:
response = self.instance.add_remove_fav(resource_id, username)
self.set_status(response["status_code"])
else:
response = {"message": "请先登录"}
self.set_status(HTTPStatus.UNAUTHORIZED)
return response["message"]
@gen.coroutine
@web.authenticated
def patch(self):
resp = yield self.add_remove_fav()
self.write(resp)
class UserEmailHandler(BaseHandler):
filename = filename
@run_on_executor()
def verify_email(self):
result = self.instance.verify_email(self.get_current_user(), self.json["code"])
self.set_status(result.get("status_code"))
return result
@gen.coroutine
@web.authenticated
def post(self):
resp = yield self.verify_email()
self.write(resp)

View File

@@ -19,49 +19,52 @@ from apscheduler.triggers.cron import CronTrigger
from tornado import httpserver, ioloop, options, web
from tornado.log import enable_pretty_logging
from Mongo import OtherMongoResource, ResourceLatestMongoResource, SearchEngine
from commands.douban_sync import sync_douban
from dump_db import entry_dump
from handler import (
AnnouncementHandler,
BlacklistHandler,
CaptchaHandler,
CategoryHandler,
from common.dump_db import entry_dump
from common.sync import YYSub, sync_douban
from common.utils import setup_logger
from databases.base import SearchEngine
from databases.other import Other
from handlers.base import IndexHandler, NotFoundHandler
from handlers.comment import (
CommentChildHandler,
CommentHandler,
CommentNewestHandler,
CommentReactionHandler,
DBDumpHandler,
DoubanHandler,
DoubanReportHandler,
FacebookAuth2LoginHandler,
GitHubOAuth2LoginHandler,
GoogleOAuth2LoginHandler,
NotificationHandler,
)
from handlers.douban import DoubanHandler, DoubanReportHandler
from handlers.grafana import (
GrafanaIndexHandler,
GrafanaQueryHandler,
GrafanaSearchHandler,
IndexHandler,
LikeHandler,
MetricsHandler,
)
from handlers.oauth import (
FacebookAuth2LoginHandler,
GitHubOAuth2LoginHandler,
GoogleOAuth2LoginHandler,
MSOAuth2LoginHandler,
TwitterOAuth2LoginHandler,
)
from handlers.other import (
AnnouncementHandler,
BlacklistHandler,
CaptchaHandler,
CategoryHandler,
DBDumpHandler,
SpamProcessHandler,
)
from handlers.resources import (
NameHandler,
NotFoundHandler,
NotificationHandler,
ResourceHandler,
ResourceLatestHandler,
SpamProcessHandler,
TopHandler,
TwitterOAuth2LoginHandler,
UserAvatarHandler,
UserEmailHandler,
UserHandler,
)
from sync import YYSub
from utils import Cloudflare, setup_logger
from handlers.user import LikeHandler, UserAvatarHandler, UserEmailHandler, UserHandler
enable_pretty_logging()
setup_logger()
cf = Cloudflare()
if os.getenv("debug"):
logging.getLogger().setLevel(logging.DEBUG)
@@ -111,10 +114,22 @@ class RunServer:
"cookie_secret": os.getenv("cookie_secret", "eo2kcgpKwXj8Q3PKYj6nIL1J4j3b58DX"),
"default_handler_class": NotFoundHandler,
"login_url": "/login",
"google_oauth": {"key": os.getenv("GOOGLE_CLIENT_ID"), "secret": os.getenv("GOOGLE_CLIENT_SECRET")},
"github_oauth": {"key": os.getenv("GITHUB_CLIENT_ID"), "secret": os.getenv("GITHUB_CLIENT_SECRET")},
"ms_oauth": {"key": os.getenv("MS_CLIENT_ID"), "secret": os.getenv("MS_CLIENT_SECRET")},
"fb_oauth": {"key": os.getenv("FB_CLIENT_ID"), "secret": os.getenv("FB_CLIENT_SECRET")},
"google_oauth": {
"key": os.getenv("GOOGLE_CLIENT_ID"),
"secret": os.getenv("GOOGLE_CLIENT_SECRET"),
},
"github_oauth": {
"key": os.getenv("GITHUB_CLIENT_ID"),
"secret": os.getenv("GITHUB_CLIENT_SECRET"),
},
"ms_oauth": {
"key": os.getenv("MS_CLIENT_ID"),
"secret": os.getenv("MS_CLIENT_SECRET"),
},
"fb_oauth": {
"key": os.getenv("FB_CLIENT_ID"),
"secret": os.getenv("FB_CLIENT_SECRET"),
},
"twitter_consumer_key": os.getenv("TWITTER_CONSUMER_KEY"),
"twitter_consumer_secret": os.getenv("TWITTER_CONSUMER_SECRET"),
}
@@ -142,12 +157,10 @@ if __name__ == "__main__":
timez = pytz.timezone("Asia/Shanghai")
engine = SearchEngine()
scheduler = BackgroundScheduler(timezone=timez)
scheduler.add_job(OtherMongoResource().reset_top, trigger=CronTrigger.from_crontab("0 0 1 * *"))
scheduler.add_job(Other().reset_top, trigger=CronTrigger.from_crontab("0 0 1 * *"))
scheduler.add_job(sync_douban, trigger=CronTrigger.from_crontab("1 1 1 * *"))
scheduler.add_job(entry_dump, trigger=CronTrigger.from_crontab("2 2 1 * *"))
scheduler.add_job(ResourceLatestMongoResource().refresh_latest_resource, "interval", hours=1)
scheduler.add_job(OtherMongoResource().import_ban_user, "interval", seconds=300)
scheduler.add_job(cf.clear_fw, trigger=CronTrigger.from_crontab("0 0 */5 * *"))
scheduler.add_job(Other().import_ban_user, "interval", seconds=300)
scheduler.add_job(YYSub().run, trigger=CronTrigger.from_crontab("0 1 * * *"))
scheduler.start()

View File

@@ -17,12 +17,11 @@ class YYeTsTest(AsyncHTTPTestCase):
class TestIndex(YYeTsTest):
def test_homepage(self):
response = self.fetch('/')
response = self.fetch("/")
self.assertEqual(response.code, 200)
self.assertTrue(b'<!doctype html>' in response.body)
self.assertTrue(b"<!doctype html>" in response.body)
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()