mirror of
https://github.com/tgbot-collection/YYeTsBot.git
synced 2025-11-25 03:15:05 +08:00
274 lines
9.8 KiB
Python
274 lines
9.8 KiB
Python
#!/usr/bin/env python3
|
|
# coding: utf-8
|
|
import contextlib
|
|
import logging
|
|
import os
|
|
import random
|
|
import re
|
|
import time
|
|
from copy import deepcopy
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from tqdm import tqdm
|
|
|
|
from databases.base import Mongo
|
|
from databases.douban import Douban
|
|
|
|
|
|
class BaseSync:
|
|
def __init__(self):
|
|
self.mongo = Mongo()
|
|
self.yyets = self.mongo.db["yyets"]
|
|
self.sync = self.mongo.db["sync"]
|
|
self.structure = {
|
|
"status": 1,
|
|
"info": "OK",
|
|
"data": {
|
|
"info": {
|
|
"id": None,
|
|
"cnname": "",
|
|
"enname": " ",
|
|
"aliasname": "",
|
|
"channel": "",
|
|
"channel_cn": "",
|
|
"area": "日本",
|
|
"show_type": "",
|
|
"expire": "",
|
|
"views": 0,
|
|
"year": [],
|
|
},
|
|
"list": [
|
|
{
|
|
"season_num": "101",
|
|
"season_cn": "单剧",
|
|
"items": {"MP4": []},
|
|
"formats": [
|
|
"MP4",
|
|
],
|
|
}
|
|
],
|
|
},
|
|
}
|
|
self.session = requests.Session()
|
|
self.session.headers.update(
|
|
{
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
|
|
}
|
|
)
|
|
|
|
@staticmethod
|
|
def sleep(times=1):
|
|
time.sleep(random.random() * times)
|
|
|
|
|
|
class Zhuixinfan(BaseSync):
|
|
def run(self):
|
|
zhuixinfan = "http://www.fanxinzhui.com/rr/{}"
|
|
start = (self.sync.find_one({"name": "zhuixinfan"}) or {}).get(
|
|
"resource_id", os.getenv("ZHUIXINFAN_START", 20)
|
|
)
|
|
end = os.getenv("ZHUIXINFAN_END", 2500)
|
|
for i in range(start, end):
|
|
url = zhuixinfan.format(i)
|
|
html = self.session.get(zhuixinfan.format(i)).content.decode("u8")
|
|
self.sleep()
|
|
if html != "资源不存在":
|
|
self.build_data(html, url)
|
|
|
|
self.sync.update_one({"name": "zhuixinfan"}, {"$set": {"resource_id": end}})
|
|
logging.info("Zhuixinfan Finished")
|
|
|
|
def build_data(self, html, link):
|
|
structure = deepcopy(self.structure)
|
|
if "更新至" in html or re.findall(r"全\d+回", html):
|
|
channel, channel_cn = "tv", "日剧"
|
|
else:
|
|
channel, channel_cn = "movie", "日影"
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
title = soup.find_all("div", class_="resource_title")[0].h2.text
|
|
chn = soup.title.text.split("_")[0]
|
|
eng = title.replace(chn, "").split("(")[0].strip()
|
|
year = int("".join(re.findall(r"\d{4}", title)[-1]).strip())
|
|
structure["data"]["info"]["cnname"] = chn
|
|
structure["data"]["info"]["enname"] = eng
|
|
structure["data"]["info"]["year"] = [year]
|
|
structure["data"]["info"]["source"] = link
|
|
structure["data"]["info"]["channel"] = channel
|
|
structure["data"]["info"]["channel_cn"] = channel_cn
|
|
|
|
logging.info("Building data for %s - %s", chn, link)
|
|
|
|
li = soup.find("ul", class_="item_list")
|
|
if li:
|
|
li = li.find_all("li")
|
|
else:
|
|
li = []
|
|
|
|
for p in li:
|
|
resource = {
|
|
"itemid": "",
|
|
"episode": p.span.text,
|
|
"name": p.span.nextSibling.text,
|
|
"size": "unknown",
|
|
"yyets_trans": 0,
|
|
"dateline": str(int(time.time())),
|
|
"files": [],
|
|
}
|
|
|
|
res = p.find("p", class_="way")
|
|
if res.span is None:
|
|
continue
|
|
links = res.find_all("a")
|
|
for item in links:
|
|
content = item["href"]
|
|
if "ed2k" in content:
|
|
resource["files"].append(
|
|
{"way": "1", "way_cn": "电驴", "address": content, "passwd": ""}
|
|
)
|
|
elif "magnet" in content:
|
|
resource["files"].append(
|
|
{"way": "2", "way_cn": "磁力", "address": content, "passwd": ""}
|
|
)
|
|
elif "pan.baidu" in content:
|
|
baidu_password = res.span.a.nextSibling.nextSibling.text
|
|
resource["files"].append(
|
|
{
|
|
"way": "13",
|
|
"way_cn": "百度网盘",
|
|
"address": content,
|
|
"passwd": baidu_password,
|
|
}
|
|
)
|
|
elif "weiyun" in content:
|
|
resource["files"].append(
|
|
{"way": "14", "way_cn": "微云", "address": content, "passwd": ""}
|
|
)
|
|
else:
|
|
logging.debug("Unknown link: %s", content)
|
|
|
|
structure["data"]["list"][0]["items"]["MP4"].append(resource)
|
|
|
|
self.update_yyets(structure)
|
|
|
|
def update_yyets(self, data):
|
|
source = data["data"]["info"]["source"]
|
|
exists = self.yyets.find_one({"data.info.source": source})
|
|
already_cond = {"data.info.cnname": data["data"]["info"]["cnname"]}
|
|
already_in = self.yyets.find_one(already_cond)
|
|
if already_in:
|
|
logging.info("Already in old yyets, updating data.info.source: %s", source)
|
|
self.yyets.update_one(already_cond, {"$set": {"data.info.source": source}})
|
|
elif exists:
|
|
logging.info("Updating new data.info.id: %s", source)
|
|
self.yyets.update_one(
|
|
{"data.info.source": source},
|
|
{"$set": {"data.list": data["data"]["list"]}},
|
|
)
|
|
else:
|
|
last_id = 90000
|
|
last = self.yyets.find_one(
|
|
{"data.info.id": {"$gte": last_id}}, sort=[("data.info.id", -1)]
|
|
)
|
|
if last:
|
|
last_id = last["data"]["info"]["id"] + 1
|
|
logging.info("Inserting data.info.id: %s", last_id)
|
|
data["data"]["info"]["id"] = last_id
|
|
self.yyets.insert_one(data.copy())
|
|
|
|
|
|
class YYSub(BaseSync):
|
|
def get_lastest_id(self):
|
|
url = "https://www.yysub.net/resourcelist?channel=&area=&category=&year=&tvstation=&sort=pubdate&page=1"
|
|
html = self.session.get(url).content.decode("u8")
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
x = soup.find("div", class_="resource-showlist has-point")
|
|
return int(x.ul.li.div.a["href"].split("/")[-1])
|
|
|
|
def get_channel_cn(self, channel, area):
|
|
if len(area) == 2 and channel == "tv":
|
|
return f"{area[0]}剧"
|
|
if channel == "movie":
|
|
return "电影"
|
|
return ""
|
|
|
|
def run(self):
|
|
logging.info("Starting to sync YYSub...")
|
|
structure = deepcopy(self.structure)
|
|
end = self.get_lastest_id() + 1
|
|
# end = 41566
|
|
start = (self.sync.find_one({"name": "yysub"}) or {}).get("resource_id", 41557)
|
|
api = "https://m.yysub.net/api/resource/{}"
|
|
for i in range(start, end):
|
|
resp = self.session.get(api.format(i))
|
|
self.sleep()
|
|
if resp.status_code != 200:
|
|
continue
|
|
data = resp.json()["data"]
|
|
if data.get("cnname"):
|
|
logging.info("Found valid resource: %s - %s", data["cnname"], i)
|
|
channel_cn = self.get_channel_cn(data["channel"], data["area"])
|
|
structure["data"]["info"]["id"] = i
|
|
structure["data"]["info"]["cnname"] = data["cnname"]
|
|
structure["data"]["info"]["enname"] = data["enname"]
|
|
structure["data"]["info"]["aliasname"] = data["aliasname"]
|
|
structure["data"]["info"]["channel"] = data["channel"]
|
|
structure["data"]["info"]["channel_cn"] = (
|
|
data["channel_cn"] or channel_cn
|
|
)
|
|
structure["data"]["info"]["area"] = data["area"]
|
|
structure["data"]["list"] = []
|
|
structure["data"]["info"][
|
|
"source"
|
|
] = f"https://www.yysub.net/resource/{i}"
|
|
self.insert_data(structure.copy())
|
|
|
|
self.sync.update_one(
|
|
{"name": "yysub"}, {"$set": {"resource_id": end}}, upsert=True
|
|
)
|
|
logging.info("YYsub Finished")
|
|
|
|
def insert_data(self, data):
|
|
rid = data["data"]["info"]["id"]
|
|
self.yyets.update_one({"data.info.id": rid}, {"$set": data}, upsert=True)
|
|
|
|
|
|
def sync_douban():
|
|
douban = Douban()
|
|
session = requests.Session()
|
|
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4280.88 Safari/537.36"
|
|
session.headers.update({"User-Agent": ua})
|
|
|
|
yyets_data = douban.db["yyets"].aggregate(
|
|
[
|
|
{"$group": {"_id": None, "ids": {"$push": "$data.info.id"}}},
|
|
{"$project": {"_id": 0, "ids": 1}},
|
|
]
|
|
)
|
|
douban_data = douban.db["douban"].aggregate(
|
|
[
|
|
{"$group": {"_id": None, "ids": {"$push": "$resourceId"}}},
|
|
{"$project": {"_id": 0, "ids": 1}},
|
|
]
|
|
)
|
|
|
|
id1 = next(yyets_data)["ids"]
|
|
id2 = next(douban_data)["ids"]
|
|
rids = list(set(id1).difference(id2))
|
|
rids.remove(233)
|
|
logging.info("resource id complete %d", len(rids))
|
|
for rid in tqdm(rids):
|
|
with contextlib.suppress(Exception):
|
|
d = douban.find_douban(rid)
|
|
logging.info("Processed %s, length %d", rid, len(d))
|
|
|
|
logging.info("ALL FINISH!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
a = Zhuixinfan()
|
|
# a.build_data(open("1.html").read(), "https://www.zhuixinfan.com/resource/1.html")
|
|
a.run()
|
|
# b = YYSub()
|
|
# b.run()
|