#!/usr/bin/env python3 # coding: utf-8 import logging import os import random import re import time from copy import deepcopy import requests from bs4 import BeautifulSoup from Mongo import Mongo from utils import setup_logger setup_logger() class BaseSync: def __init__(self): self.mongo = Mongo() self.yyets = self.mongo.db["yyets"] self.sync = self.mongo.db["sync"] self.structure = { "status": 1, "info": "OK", "data": { "info": { "id": None, "cnname": "", "enname": " ", "aliasname": "", "channel": "", "channel_cn": "", "area": "日本", "show_type": "", "expire": "", "views": 0, "year": [] }, "list": [ { "season_num": "101", "season_cn": "单剧", "items": { "MP4": [] }, "formats": [ "MP4", ] } ] } } self.session = requests.Session() self.session.headers.update( { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" }) @staticmethod def sleep(times=1): time.sleep(random.random() * times) class Zhuixinfan(BaseSync): def run(self): zhuixinfan = "http://www.fanxinzhui.com/rr/{}" start = (self.sync.find_one({"name": "zhuixinfan"}) or {}). \ get("resource_id", os.getenv("ZHUIXINFAN_START", 20)) end = os.getenv("ZHUIXINFAN_END", 2500) for i in range(start, end): url = zhuixinfan.format(i) html = self.session.get(zhuixinfan.format(i)).content.decode("u8") self.sleep() if html != "资源不存在": self.build_data(html, url) self.sync.update_one({"name": "zhuixinfan"}, {"$set": {"resource_id": end}}) logging.info("Zhuixinfan Finished") def build_data(self, html, link): structure = deepcopy(self.structure) if "更新至" in html or re.findall(r"全\d+回", html): channel, channel_cn = "tv", "日剧" else: channel, channel_cn = "movie", "日影" soup = BeautifulSoup(html, "html.parser") title = soup.find_all("div", class_="resource_title")[0].h2.text chn = soup.title.text.split("_")[0] eng = title.replace(chn, "").split("(")[0].strip() year = int("".join(re.findall(r"\d{4}", title)[-1]).strip()) structure["data"]["info"]["cnname"] = chn structure["data"]["info"]["enname"] = eng structure["data"]["info"]["year"] = [year] structure["data"]["info"]["source"] = link structure["data"]["info"]["channel"] = channel structure["data"]["info"]["channel_cn"] = channel_cn logging.info("Building data for %s - %s", chn, link) li = soup.find("ul", class_="item_list") if li: li = li.find_all("li") else: li = [] for p in li: resource = { "itemid": "", "episode": p.span.text, "name": p.span.nextSibling.text, "size": "unknown", "yyets_trans": 0, "dateline": str(int(time.time())), "files": [] } res = p.find("p", class_="way") if res.span is None: continue links = res.find_all("a") for item in links: content = item["href"] if "ed2k" in content: resource["files"].append( { "way": "1", "way_cn": "电驴", "address": content, "passwd": "" } ) elif "magnet" in content: resource["files"].append( { "way": "2", "way_cn": "磁力", "address": content, "passwd": "" } ) elif "pan.baidu" in content: baidu_password = res.span.a.nextSibling.nextSibling.text resource["files"].append( { "way": "13", "way_cn": "百度网盘", "address": content, "passwd": baidu_password } ) elif "weiyun" in content: resource["files"].append( { "way": "14", "way_cn": "微云", "address": content, "passwd": "" } ) else: logging.debug("Unknown link: %s", content) structure["data"]["list"][0]["items"]["MP4"].append(resource) self.update_yyets(structure) def update_yyets(self, data): source = data["data"]["info"]["source"] exists = self.yyets.find_one({"data.info.source": source}) already_cond = {"data.info.cnname": data["data"]["info"]["cnname"]} already_in = self.yyets.find_one(already_cond) if already_in: logging.info("Already in old yyets, updating data.info.source: %s", source) self.yyets.update_one(already_cond, {"$set": {"data.info.source": source}}) elif exists: logging.info("Updating new data.info.id: %s", source) self.yyets.update_one({"data.info.source": source}, {"$set": {"data.list": data["data"]["list"]}}) else: last_id = 90000 last = self.yyets.find_one({"data.info.id": {"$gte": last_id}}, sort=[("data.info.id", -1)]) if last: last_id = last["data"]["info"]["id"] + 1 logging.info("Inserting data.info.id: %s", last_id) data["data"]["info"]["id"] = last_id self.yyets.insert_one(data.copy()) class YYSub(BaseSync): def get_lastest_id(self): url = "https://www.yysub.net/resourcelist?channel=&area=&category=&year=&tvstation=&sort=pubdate&page=1" html = self.session.get(url).content.decode("u8") soup = BeautifulSoup(html, "html.parser") x = soup.find("div", class_="resource-showlist has-point") return int(x.ul.li.div.a["href"].split("/")[-1]) def run(self): logging.info("Starting to sync YYSub...") structure = deepcopy(self.structure) end = self.get_lastest_id() + 1 # end = 41566 start = (self.sync.find_one({"name": "yysub"}) or {}).get("resource_id", 41557) api = "https://m.yysub.net/api/resource/{}" for i in range(start, end): resp = self.session.get(api.format(i)) self.sleep() if resp.status_code != 200: continue data = resp.json()["data"] if data.get("cnname"): logging.info("Found valid resource: %s - %s", data["cnname"], i) structure["data"]["info"]["id"] = i structure["data"]["info"]["cnname"] = data["cnname"] structure["data"]["info"]["enname"] = data["enname"] structure["data"]["info"]["aliasname"] = data["aliasname"] structure["data"]["info"]["channel"] = data["channel"] structure["data"]["info"]["channel_cn"] = data["channel_cn"] structure["data"]["info"]["area"] = data["area"] structure["data"]["list"] = [] structure["data"]["info"]["source"] = f"https://www.yysub.net/resource/{i}" self.insert_data(structure.copy()) self.sync.update_one({"name": "yysub"}, {"$set": {"resource_id": end}}, upsert=True) logging.info("YYsub Finished") def insert_data(self, data): rid = data["data"]["info"]["id"] self.yyets.update_one({"data.info.id": rid}, {"$set": data}, upsert=True) if __name__ == '__main__': a = Zhuixinfan() # a.build_data(open("1.html").read(), "https://www.zhuixinfan.com/resource/1.html") a.run() # b = YYSub() # b.run()