mirror of
https://github.com/tgbot-collection/YYeTsBot.git
synced 2025-11-25 19:37:34 +08:00
sync douban as a job
This commit is contained in:
@@ -1,40 +0,0 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - douban.py
|
||||
# 7/11/21 10:17
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import contextlib
|
||||
import random
|
||||
import sys
|
||||
import pathlib
|
||||
import time
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
lib_path = pathlib.Path(__file__).parent.parent.parent.joinpath("yyetsweb").resolve().as_posix()
|
||||
sys.path.append(lib_path)
|
||||
from Mongo import DoubanMongoResource
|
||||
|
||||
douban = DoubanMongoResource()
|
||||
session = requests.Session()
|
||||
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
|
||||
session.headers.update({"User-Agent": ua})
|
||||
|
||||
yyets_data = douban.db["yyets"].find()
|
||||
|
||||
rids = [i["data"]["info"]["id"] for i in yyets_data]
|
||||
logging.info("resource id complete")
|
||||
for rid in tqdm(rids):
|
||||
with contextlib.suppress(Exception):
|
||||
d = douban.find_douban(rid)
|
||||
logging.info("Processed %s, length %d", rid, len(d))
|
||||
time.sleep(random.randint(1, 5))
|
||||
|
||||
logging.info("ALL FINISH!")
|
||||
45
yyetsweb/migration/douban_sync.py
Normal file
45
yyetsweb/migration/douban_sync.py
Normal file
@@ -0,0 +1,45 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - douban.py
|
||||
# 7/11/21 10:17
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import contextlib
|
||||
import random
|
||||
import sys
|
||||
import pathlib
|
||||
import time
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
|
||||
sys.path.append(lib_path)
|
||||
from Mongo import DoubanMongoResource
|
||||
|
||||
|
||||
def sync_douban():
|
||||
douban = DoubanMongoResource()
|
||||
session = requests.Session()
|
||||
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
|
||||
session.headers.update({"User-Agent": ua})
|
||||
|
||||
yyets_data = douban.db["yyets"].find()
|
||||
douban_data = douban.db["douban"].find()
|
||||
|
||||
id1 = [i["data"]["info"]["id"] for i in yyets_data]
|
||||
id2 = [i["resourceId"] for i in douban_data]
|
||||
rids = list(set(id1).difference(id2))
|
||||
logging.info("resource id complete %d", len(rids))
|
||||
for rid in tqdm(rids):
|
||||
with contextlib.suppress(Exception):
|
||||
d = douban.find_douban(rid)
|
||||
logging.info("Processed %s, length %d", rid, len(d))
|
||||
time.sleep(random.randint(1, 5))
|
||||
|
||||
logging.info("ALL FINISH!")
|
||||
@@ -7,21 +7,23 @@
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import os
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
|
||||
import pytz
|
||||
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from tornado import httpserver, ioloop, options, web
|
||||
from tornado.log import enable_pretty_logging
|
||||
from tornado import web, httpserver, ioloop, options
|
||||
|
||||
from handler import (AnnouncementHandler, BlacklistHandler, CaptchaHandler,
|
||||
CommentChildHandler, CommentHandler, CommentNewestHandler,
|
||||
DBDumpHandler, DoubanHandler, GrafanaIndexHandler,
|
||||
GrafanaQueryHandler, GrafanaSearchHandler, IndexHandler,
|
||||
MetricsHandler, NameHandler, NotFoundHandler,
|
||||
ResourceHandler, TopHandler, UserHandler, UserLikeHandler)
|
||||
from migration.douban_sync import sync_douban
|
||||
from Mongo import OtherMongoResource
|
||||
from handler import IndexHandler, UserHandler, ResourceHandler, TopHandler, UserLikeHandler, NameHandler, \
|
||||
CommentHandler, AnnouncementHandler, CaptchaHandler, MetricsHandler, GrafanaIndexHandler, GrafanaSearchHandler, \
|
||||
GrafanaQueryHandler, BlacklistHandler, NotFoundHandler, DBDumpHandler, CommentChildHandler, DoubanHandler, \
|
||||
CommentNewestHandler
|
||||
|
||||
enable_pretty_logging()
|
||||
|
||||
@@ -83,6 +85,7 @@ if __name__ == "__main__":
|
||||
timez = pytz.timezone('Asia/Shanghai')
|
||||
scheduler = BackgroundScheduler(timezone=timez)
|
||||
scheduler.add_job(OtherMongoResource().reset_top, 'cron', hour=0, minute=0, day=1)
|
||||
scheduler.add_job(sync_douban, 'cron', hour=0, minute=0, day=1)
|
||||
scheduler.start()
|
||||
options.define("p", default=8888, help="running port", type=int)
|
||||
options.define("h", default='127.0.0.1', help="listen address", type=str)
|
||||
|
||||
Reference in New Issue
Block a user