Files
YYeTsBot/yyetsweb/sync.py
2023-03-06 18:52:59 +01:00

239 lines
8.7 KiB
Python

#!/usr/bin/env python3
# coding: utf-8
import logging
import os
import random
import re
import time
from copy import deepcopy
import requests
from bs4 import BeautifulSoup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
from Mongo import Mongo
class BaseSync:
def __init__(self):
self.mongo = Mongo()
self.yyets = self.mongo.db["yyets"]
self.sync = self.mongo.db["sync"]
self.structure = {
"status": 1,
"info": "OK",
"data": {
"info": {
"id": None,
"cnname": "",
"enname": " ",
"aliasname": "",
"channel": "",
"channel_cn": "",
"area": "日本",
"show_type": "",
"expire": "",
"views": 0,
"year": []
},
"list": [
{
"season_num": "101",
"season_cn": "单剧",
"items": {
"MP4": []
},
"formats": [
"MP4",
]
}
]
}
}
self.session = requests.Session()
self.session.headers.update(
{
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
})
@staticmethod
def sleep(times=1):
time.sleep(random.random() * times)
class Zhuixinfan(BaseSync):
def run(self):
zhuixinfan = "http://www.fanxinzhui.com/rr/{}"
start = (self.sync.find_one({"name": "zhuixinfan"}) or {}). \
get("resource_id", os.getenv("ZHUIXINFAN_START", 20))
end = os.getenv("ZHUIXINFAN_END", 2500)
for i in range(start, end):
url = zhuixinfan.format(i)
html = self.session.get(zhuixinfan.format(i)).content.decode("u8")
self.sleep()
if html != "资源不存在":
self.build_data(html, url)
self.sync.update_one({"name": "zhuixinfan"}, {"$set": {"resource_id": end}})
logging.info("Zhuixinfan Finished")
def build_data(self, html, link):
structure = deepcopy(self.structure)
if "更新至" in html or re.findall(r"\d+回", html):
channel, channel_cn = "tv", "日剧"
else:
channel, channel_cn = "movie", "日影"
soup = BeautifulSoup(html, "html.parser")
title = soup.find_all("div", class_="resource_title")[0].h2.text
chn = soup.title.text.split("_")[0]
eng = title.replace(chn, "").split("(")[0].strip()
year = int("".join(re.findall(r"\d{4}", title)[-1]).strip())
structure["data"]["info"]["cnname"] = chn
structure["data"]["info"]["enname"] = eng
structure["data"]["info"]["year"] = [year]
structure["data"]["info"]["source"] = link
structure["data"]["info"]["channel"] = channel
structure["data"]["info"]["channel_cn"] = channel_cn
logging.info("Building data for %s - %s", chn, link)
li = soup.find("ul", class_="item_list")
if li:
li = li.find_all("li")
else:
li = []
for p in li:
resource = {
"itemid": "",
"episode": p.span.text,
"name": p.span.nextSibling.text,
"size": "unknown",
"yyets_trans": 0,
"dateline": str(int(time.time())),
"files": []
}
res = p.find("p", class_="way")
if res.span is None:
continue
links = res.find_all("a")
for item in links:
content = item["href"]
if "ed2k" in content:
resource["files"].append(
{
"way": "1",
"way_cn": "电驴",
"address": content,
"passwd": ""
}
)
elif "magnet" in content:
resource["files"].append(
{
"way": "2",
"way_cn": "磁力",
"address": content,
"passwd": ""
}
)
elif "pan.baidu" in content:
baidu_password = res.span.a.nextSibling.nextSibling.text
resource["files"].append(
{
"way": "13",
"way_cn": "百度网盘",
"address": content,
"passwd": baidu_password
}
)
elif "weiyun" in content:
resource["files"].append(
{
"way": "14",
"way_cn": "微云",
"address": content,
"passwd": ""
}
)
else:
logging.debug("Unknown link: %s", content)
structure["data"]["list"][0]["items"]["MP4"].append(resource)
self.update_yyets(structure)
def update_yyets(self, data):
source = data["data"]["info"]["source"]
exists = self.yyets.find_one({"data.info.source": source})
already_cond = {"data.info.cnname": data["data"]["info"]["cnname"]}
already_in = self.yyets.find_one(already_cond)
if already_in:
logging.info("Already in old yyets, updating data.info.source: %s", source)
self.yyets.update_one(already_cond, {"$set": {"data.info.source": source}})
elif exists:
logging.info("Updating new data.info.id: %s", source)
self.yyets.update_one({"data.info.source": source}, {"$set": {"data.list": data["data"]["list"]}})
else:
last_id = 90000
last = self.yyets.find_one({"data.info.id": {"$gte": last_id}}, sort=[("data.info.id", -1)])
if last:
last_id = last["data"]["info"]["id"] + 1
logging.info("Inserting data.info.id: %s", last_id)
data["data"]["info"]["id"] = last_id
self.yyets.insert_one(data.copy())
class YYSub(BaseSync):
def get_lastest_id(self):
url = "https://www.yysub.net/resourcelist?channel=&area=&category=&year=&tvstation=&sort=pubdate&page=1"
html = self.session.get(url).content.decode("u8")
soup = BeautifulSoup(html, "html.parser")
x = soup.find("div", class_="resource-showlist has-point")
return int(x.ul.li.div.a["href"].split("/")[-1])
def run(self):
logging.info("Starting to sync YYSub...")
structure = deepcopy(self.structure)
end = self.get_lastest_id() + 1
# end = 41566
start = (self.sync.find_one({"name": "yysub"}) or {}).get("resource_id", 41557)
api = "https://m.yysub.net/api/resource/{}"
for i in range(start, end):
resp = self.session.get(api.format(i))
self.sleep()
if resp.status_code != 200:
continue
data = resp.json()["data"]
if data.get("cnname"):
logging.info("Found valid resource: %s - %s", data["cnname"], i)
structure["data"]["info"]["id"] = i
structure["data"]["info"]["cnname"] = data["cnname"]
structure["data"]["info"]["enname"] = data["enname"]
structure["data"]["info"]["aliasname"] = data["aliasname"]
structure["data"]["info"]["channel"] = data["channel"]
structure["data"]["info"]["channel_cn"] = data["channel_cn"]
structure["data"]["info"]["area"] = data["area"]
structure["data"]["list"] = []
structure["data"]["info"]["source"] = f"https://www.yysub.net/resource/{i}"
self.insert_data(structure.copy())
self.sync.update_one({"name": "yysub"}, {"$set": {"resource_id": end}}, upsert=True)
logging.info("YYsub Finished")
def insert_data(self, data):
rid = data["data"]["info"]["id"]
self.yyets.update_one({"data.info.id": rid}, {"$set": data}, upsert=True)
if __name__ == '__main__':
a = Zhuixinfan()
# a.build_data(open("1.html").read(), "https://www.zhuixinfan.com/resource/1.html")
a.run()
# b = YYSub()
# b.run()