Files
YYeTsBot/web/server.py

443 lines
13 KiB
Python
Raw Normal View History

2021-02-06 08:58:58 +08:00
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - server.py
# 2/5/21 21:02
#
__author__ = "Benny <benny.think@gmail.com>"
import os
import contextlib
2021-02-07 10:33:12 +08:00
import logging
import json
2021-03-14 14:07:37 +08:00
import time
2021-03-14 18:31:01 +08:00
from datetime import date, timedelta
2021-02-07 09:32:23 +08:00
2021-02-09 20:04:54 +08:00
import redis
2021-02-07 09:32:23 +08:00
import pymongo
2021-02-06 17:21:33 +08:00
from http import HTTPStatus
2021-02-06 08:58:58 +08:00
from concurrent.futures import ThreadPoolExecutor
from tornado import web, ioloop, httpserver, gen, options
from tornado.log import enable_pretty_logging
from tornado import escape
2021-02-06 08:58:58 +08:00
from tornado.concurrent import run_on_executor
2021-02-07 09:32:23 +08:00
from apscheduler.schedulers.background import BackgroundScheduler
2021-02-06 08:58:58 +08:00
2021-02-07 10:33:12 +08:00
from crypto import decrypt
2021-02-06 08:58:58 +08:00
enable_pretty_logging()
2021-02-06 19:58:40 +08:00
mongo_host = os.getenv("mongo") or "localhost"
2021-02-10 10:01:35 +08:00
if os.getenv("debug"):
logging.basicConfig(level=logging.DEBUG)
2021-02-06 19:58:40 +08:00
class Mongo:
def __init__(self):
self.client = pymongo.MongoClient(host=mongo_host, connect=False)
self.db = self.client["zimuzu"]
def __del__(self):
self.client.close()
2021-02-06 08:58:58 +08:00
2021-02-09 20:04:54 +08:00
class Redis:
def __init__(self):
self.r = redis.StrictRedis(host="redis", decode_responses=True, db=2)
def __del__(self):
self.r.close()
2021-02-06 08:58:58 +08:00
class BaseHandler(web.RequestHandler):
2021-02-06 19:58:40 +08:00
mongo = Mongo()
2021-02-06 08:58:58 +08:00
def data_received(self, chunk):
pass
2021-02-09 20:04:54 +08:00
class AntiCrawler:
def __init__(self, instance):
self.tornado = instance
self.redis = Redis()
def execute(self) -> bool:
header_result = self.header_check()
ban_check = self.ban_check()
if header_result or ban_check:
return True
def header_check(self):
cypher_text = self.tornado.request.headers.get("ne1", "")
referer = self.tornado.request.headers.get("Referer")
param = self.tornado.get_query_argument("id")
uri = self.tornado.request.uri
logging.info("Verifying: Referer:[%s] ct:[%s], uri:[%s], id:[%s]", referer, cypher_text, uri, param)
if (referer is None) or (param not in referer):
return True
try:
passphrase = param
result = decrypt(cypher_text, passphrase).decode('u8')
except Exception:
logging.error("Decrypt failed")
result = ""
if result != self.tornado.request.uri:
return True
def ban_check(self):
con = self.redis
ip = self.get_real_ip()
str_count = con.r.get(ip)
if str_count and int(str_count) > 10:
return True
def imprisonment(self, ip):
con = self.redis
# don't use incr - we need to set expire time
if con.r.exists(ip):
count_str = con.r.get(ip)
count = int(count_str)
count += 1
else:
count = 1
# ban rule: (count-10)*600
if count > 10:
ex = (count - 10) * 3600
else:
ex = None
con.r.set(ip, count, ex)
def get_real_ip(self):
x_real = self.tornado.request.headers.get("X-Real-IP")
remote_ip = self.tornado.request.remote_ip
2021-02-10 10:01:35 +08:00
logging.debug("X-Real-IP:%s, Remote-IP:%s", x_real, remote_ip)
2021-02-09 20:04:54 +08:00
return x_real or remote_ip
2021-02-07 10:33:12 +08:00
2021-02-11 08:41:20 +08:00
class IndexHandler(BaseHandler):
executor = ThreadPoolExecutor(100)
@run_on_executor()
def send_index(self):
with open("index.html") as f:
html = f.read()
return html
@gen.coroutine
def get(self):
resp = yield self.send_index()
self.write(resp)
2021-02-06 08:58:58 +08:00
class ResourceHandler(BaseHandler):
2021-02-11 08:41:20 +08:00
executor = ThreadPoolExecutor(100)
2021-02-06 08:58:58 +08:00
@run_on_executor()
def get_resource_data(self):
2021-02-09 20:04:54 +08:00
forbidden = False
banner = AntiCrawler(self)
if banner.execute():
2021-02-10 10:01:35 +08:00
logging.warning("%s@%s make you happy:-(", self.request.headers.get("user-agent"),
self.request.headers.get("X-Real-IP")
)
2021-02-09 20:04:54 +08:00
data = {}
forbidden = True
else:
param = self.get_query_argument("id")
with contextlib.suppress(ValueError):
param = int(param)
data = self.mongo.db["yyets"].find_one_and_update(
{"data.info.id": param},
{'$inc': {'data.info.views': 1}},
{'_id': False})
if data:
forbidden = False
else:
# not found, dangerous
ip = banner.get_real_ip()
banner.imprisonment(ip)
self.set_status(404)
data = {}
if forbidden:
self.set_status(HTTPStatus.FORBIDDEN)
2021-02-06 08:58:58 +08:00
return data
@run_on_executor()
def search_resource(self):
param = self.get_query_argument("kw").lower()
projection = {'_id': False,
'data.info': True,
}
2021-02-08 13:13:28 +08:00
2021-02-06 19:58:40 +08:00
data = self.mongo.db["yyets"].find({
2021-02-06 08:58:58 +08:00
"$or": [
2021-02-07 18:48:00 +08:00
{"data.info.cnname": {'$regex': f'.*{param}.*', "$options": "-i"}},
{"data.info.enname": {'$regex': f'.*{param}.*', "$options": "-i"}},
{"data.info.aliasname": {'$regex': f'.*{param}.*', "$options": "-i"}},
2021-02-06 08:58:58 +08:00
]},
projection
)
return dict(data=list(data))
@gen.coroutine
def get(self):
if self.get_query_argument("id", None):
resp = yield self.get_resource_data()
elif self.get_query_argument("kw", None):
resp = yield self.search_resource()
else:
resp = "error"
self.write(resp)
2021-02-06 11:35:28 +08:00
class TopHandler(BaseHandler):
2021-02-11 08:41:20 +08:00
executor = ThreadPoolExecutor(100)
2021-02-06 11:35:28 +08:00
@run_on_executor()
def get_top_resource(self):
projection = {'_id': False,
'data.info': True,
}
2021-02-08 13:13:28 +08:00
2021-02-08 16:51:56 +08:00
area_dict = dict(ALL={"$regex": ".*"}, US="美国", JP="日本", KR="韩国", UK="英国")
all_data = {}
for abbr, area in area_dict.items():
data = self.mongo.db["yyets"].find({"data.info.area": area}, projection).sort("data.info.views",
pymongo.DESCENDING).limit(15)
2021-02-08 16:51:56 +08:00
all_data[abbr] = list(data)
area_dict["ALL"] = "全部"
all_data["class"] = area_dict
return all_data
2021-02-06 11:35:28 +08:00
@gen.coroutine
def get(self):
resp = yield self.get_top_resource()
self.write(resp)
2021-02-10 11:06:59 +08:00
class NameHandler(BaseHandler):
2021-02-11 08:41:20 +08:00
executor = ThreadPoolExecutor(100)
2021-02-10 11:06:59 +08:00
@staticmethod
def json_encode(value):
return json.dumps(value, ensure_ascii=False)
2021-02-10 11:06:59 +08:00
@run_on_executor()
def get_names(self):
escape.json_encode = self.json_encode
2021-02-10 11:06:59 +08:00
if self.get_query_argument("human", None):
aggregation = [
{
"$project": {
"name": {
"$concat": [
"$data.info.area",
"$data.info.channel_cn",
": ",
"$data.info.cnname",
" ",
"$data.info.enname",
" ",
"$data.info.aliasname"
]
},
"_id": False
}
}
]
query_cursor = self.mongo.db["yyets"].aggregate(aggregation)
else:
projection = {'_id': False,
'data.info.cnname': True,
'data.info.enname': True,
'data.info.aliasname': True,
'data.info.channel_cn': True,
}
query_cursor = self.mongo.db["yyets"].find({}, projection)
data = []
for i in query_cursor:
data.extend(i.values())
return dict(data=data)
@gen.coroutine
def get(self):
resp = yield self.get_names()
2021-02-10 11:06:59 +08:00
self.write(resp)
2021-02-06 17:21:33 +08:00
class MetricsHandler(BaseHandler):
2021-02-11 08:41:20 +08:00
executor = ThreadPoolExecutor(100)
2021-02-09 11:08:52 +08:00
@run_on_executor()
def set_metrics(self):
2021-03-14 14:07:37 +08:00
metrics_type = self.get_query_argument("type")
2021-03-15 11:05:12 +08:00
today = time.strftime("%Y-%m-%d", time.localtime())
2021-03-14 14:07:37 +08:00
self.mongo.db['metrics'].update_one(
2021-03-15 11:05:12 +08:00
{'date': today}, {'$inc': {metrics_type: 1}},
2021-03-14 14:07:37 +08:00
upsert=True
)
2021-02-06 17:21:33 +08:00
self.set_status(HTTPStatus.CREATED)
return {}
@run_on_executor()
def get_metrics(self):
2021-03-15 11:05:12 +08:00
day = self.get_query_argument("date", None)
condition = dict(date=day) if day else dict()
2021-03-14 14:07:37 +08:00
result = self.mongo.db['metrics'].find(condition, {'_id': False})
2021-02-07 09:32:23 +08:00
return dict(metrics=list(result))
2021-02-06 08:58:58 +08:00
@gen.coroutine
def get(self):
2021-02-06 17:21:33 +08:00
resp = yield self.get_metrics()
self.write(resp)
@gen.coroutine
def post(self):
resp = yield self.set_metrics()
2021-02-06 08:58:58 +08:00
self.write(resp)
2021-03-14 18:19:59 +08:00
class GrafanaIndexHandler(BaseHandler):
def get(self):
self.write({})
class GrafanaSearchHandler(BaseHandler):
def post(self):
data = ["access", "search", "resource"]
self.write(json.dumps(data))
class GrafanaQueryHandler(BaseHandler):
@staticmethod
def generate_date_series(start: str, end: str) -> list:
start_int = [int(i) for i in start.split("-")]
end_int = [int(i) for i in end.split("-")]
sdate = date(*start_int) # start date
edate = date(*end_int) # end date
delta = edate - sdate # as timedelta
days = []
for i in range(delta.days + 1):
day = sdate + timedelta(days=i)
days.append(day.strftime("%Y-%m-%d"))
return days
@staticmethod
def time_str_int(text):
return time.mktime(time.strptime(text, "%Y-%m-%d"))
def post(self):
payload = json.loads(self.request.body)
start = payload["range"]["from"].split("T")[0]
end = payload["range"]["to"].split("T")[0]
date_series = self.generate_date_series(start, end)
targets = [i["target"] for i in payload["targets"] if i["target"]]
grafana_data = []
for target in targets:
data_points = []
condition = {"date": {"$in": date_series}}
projection = {"_id": False}
result = self.mongo.db["metrics"].find(condition, projection)
for i in result:
datum = [i[target], self.time_str_int(i["date"]) * 1000]
data_points.append(datum)
temp = {
"target": target,
"datapoints": data_points
}
grafana_data.append(temp)
self.write(json.dumps(grafana_data))
2021-02-11 08:41:20 +08:00
class BlacklistHandler(BaseHandler):
executor = ThreadPoolExecutor(100)
@run_on_executor()
def get_black_list(self):
r = Redis().r
keys = r.keys("*")
result = {}
for key in keys:
count = r.get(key)
ttl = r.ttl(key)
2021-02-11 13:10:17 +08:00
if ttl != -1:
result[key] = dict(count=count, ttl=ttl)
2021-02-11 08:41:20 +08:00
return result
@gen.coroutine
def get(self):
resp = yield self.get_black_list()
self.write(resp)
2021-03-22 00:06:37 +08:00
class NotFoundHandler(BaseHandler):
def prepare(self): # for all methods
self.render("404.html")
2021-02-06 08:58:58 +08:00
class RunServer:
root_path = os.path.dirname(__file__)
static_path = os.path.join(root_path, '')
handlers = [
(r'/api/resource', ResourceHandler),
2021-02-06 11:35:28 +08:00
(r'/api/top', TopHandler),
2021-02-10 11:06:59 +08:00
(r'/api/name', NameHandler),
2021-02-06 17:21:33 +08:00
(r'/api/metrics', MetricsHandler),
2021-03-14 18:19:59 +08:00
(r'/api/grafana/', GrafanaIndexHandler),
(r'/api/grafana/search', GrafanaSearchHandler),
(r'/api/grafana/query', GrafanaQueryHandler),
2021-02-11 08:41:20 +08:00
(r'/api/blacklist', BlacklistHandler),
2021-02-06 08:58:58 +08:00
(r'/', IndexHandler),
2021-02-26 11:38:21 +08:00
(r'/(.*\.html|.*\.js|.*\.css|.*\.png|.*\.jpg|.*\.ico|.*\.gif|.*\.woff2|.*\.gz|.*\.zip)', web.StaticFileHandler,
2021-02-06 08:58:58 +08:00
{'path': static_path}),
]
2021-03-22 00:06:37 +08:00
application = web.Application(handlers, xheaders=True, default_handler_class=NotFoundHandler)
2021-02-06 08:58:58 +08:00
@staticmethod
def run_server(port, host, **kwargs):
tornado_server = httpserver.HTTPServer(RunServer.application, **kwargs)
tornado_server.bind(port, host)
2021-02-06 19:58:40 +08:00
tornado_server.start(0)
2021-02-06 08:58:58 +08:00
try:
2021-02-06 19:58:40 +08:00
print('Server is running on http://{}:{}'.format(host, port))
2021-02-06 08:58:58 +08:00
ioloop.IOLoop.instance().current().start()
except KeyboardInterrupt:
ioloop.IOLoop.instance().stop()
print('"Ctrl+C" received, exiting.\n')
def reset_top():
logging.info("resetting top...")
m = Mongo()
m.db["yyets"].update_many({}, {"$set": {"data.info.views": 0}})
2021-02-06 08:58:58 +08:00
if __name__ == "__main__":
2021-02-07 09:32:23 +08:00
scheduler = BackgroundScheduler()
scheduler.add_job(reset_top, 'cron', hour=0, minute=0, day=1)
2021-02-07 09:32:23 +08:00
scheduler.start()
2021-02-06 08:58:58 +08:00
options.define("p", default=8888, help="running port", type=int)
options.define("h", default='127.0.0.1', help="listen address", type=str)
options.parse_command_line()
p = options.options.p
h = options.options.h
RunServer.run_server(port=p, host=h)