mirror of
https://github.com/tgbot-collection/YYeTsBot.git
synced 2025-11-25 03:15:05 +08:00
move folder, bump
This commit is contained in:
67
yyetsweb/commands/add_year.py
Normal file
67
yyetsweb/commands/add_year.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - add_year.py
|
||||
# 4/8/21 18:39
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from common import Mongo
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
def ts_year(ts: str) -> int:
|
||||
return int(time.strftime("%Y", time.localtime(int(ts))))
|
||||
|
||||
|
||||
def extract_year(name: str) -> int:
|
||||
try:
|
||||
r = int(re.findall(r"\.(19\d{2}|20\d{2})\.", name)[0])
|
||||
except:
|
||||
r = None
|
||||
return r
|
||||
|
||||
|
||||
col = Mongo().client["zimuzu"]["yyets"]
|
||||
|
||||
data = col.find()
|
||||
|
||||
for datum in tqdm(data):
|
||||
list_data = datum["data"]["list"]
|
||||
translate_year = []
|
||||
filename_year = []
|
||||
for single in list_data:
|
||||
dl = single["items"].values()
|
||||
for i in dl:
|
||||
for j in i:
|
||||
if d := ts_year(j["dateline"]):
|
||||
translate_year.append(d)
|
||||
if d := extract_year(j["name"]):
|
||||
filename_year.append(d)
|
||||
|
||||
translate_year = list(set(translate_year))
|
||||
filename_year = list(set(filename_year)) # more accurate
|
||||
|
||||
final_year = []
|
||||
|
||||
if filename_year:
|
||||
final_year = filename_year.copy()
|
||||
elif translate_year:
|
||||
final_year = translate_year
|
||||
_id = datum["data"]["info"]["id"]
|
||||
name = datum["data"]["info"]["cnname"]
|
||||
should_write = True
|
||||
for y in final_year:
|
||||
if y <= 1900:
|
||||
final_year.remove(y)
|
||||
logging.warning("%s is %s, popping %s", name, final_year, y)
|
||||
|
||||
col.update_one({"data.info.id": _id}, {"$set": {"data.info.year": final_year}})
|
||||
BIN
yyetsweb/commands/aliyun.xlsx
Normal file
BIN
yyetsweb/commands/aliyun.xlsx
Normal file
Binary file not shown.
@@ -7,9 +7,10 @@
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
from common import Mongo
|
||||
from tqdm import tqdm
|
||||
|
||||
from common import Mongo
|
||||
|
||||
client = Mongo()
|
||||
user_col = client.db["users"]
|
||||
|
||||
|
||||
39
yyetsweb/commands/change_datetime.py
Normal file
39
yyetsweb/commands/change_datetime.py
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - change_datetime.py
|
||||
# 6/15/21 14:15
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import time
|
||||
|
||||
from common import Mongo
|
||||
|
||||
client = Mongo().client
|
||||
|
||||
comment = client["zimuzu"]["comment"] # date
|
||||
users = client["zimuzu"]["users"] # date
|
||||
|
||||
all_comments = list(comment.find())
|
||||
all_users = list(users.find())
|
||||
|
||||
for item in all_comments:
|
||||
object_id = item["_id"]
|
||||
old_date = time.strptime(item["date"], "%a %b %d %H:%M:%S %Y")
|
||||
new_date = time.strftime("%Y-%m-%d %H:%M:%S", old_date)
|
||||
|
||||
condition = {"_id": object_id}
|
||||
update = {"$set": {"date": new_date}}
|
||||
comment.find_one_and_update(condition, update)
|
||||
|
||||
for item in all_users:
|
||||
# unique for username
|
||||
object_id = item["_id"]
|
||||
old_date = time.strptime(item["date"], "%a %b %d %H:%M:%S %Y")
|
||||
new_date = time.strftime("%Y-%m-%d %H:%M:%S", old_date)
|
||||
|
||||
condition = {"_id": object_id}
|
||||
update = {"$set": {"date": new_date}}
|
||||
users.find_one_and_update(condition, update)
|
||||
@@ -7,9 +7,10 @@
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import pymongo
|
||||
import os
|
||||
|
||||
import pymongo
|
||||
|
||||
|
||||
class Mongo:
|
||||
def __init__(self):
|
||||
|
||||
36
yyetsweb/commands/douban_data_clean.py
Normal file
36
yyetsweb/commands/douban_data_clean.py
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - douban_data.py
|
||||
# 7/24/21 19:28
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import logging
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
|
||||
sys.path.append(lib_path)
|
||||
from Mongo import DoubanMongoResource
|
||||
from tqdm import tqdm
|
||||
|
||||
m = DoubanMongoResource()
|
||||
|
||||
m.db["douban"].update_many({}, {"$unset": {"raw": ""}})
|
||||
logging.info("raw data deleted.")
|
||||
# only writers are wrong
|
||||
# wrong_field = ["actors", "directors", "genre", "writers"]
|
||||
wrong_field = ["writers"]
|
||||
# String 2 "string" 4 array
|
||||
for field in wrong_field:
|
||||
incorrect_data = m.db["douban"].find({field: {"$not": {"$type": 4}}})
|
||||
for datum in tqdm(incorrect_data):
|
||||
logging.info("fixing %s", datum)
|
||||
new_field = datum[field].split()
|
||||
m.db["douban"].update_one({"_id": datum["_id"]}, {"$set": {field: new_field}})
|
||||
|
||||
|
||||
logging.info("finish")
|
||||
40
yyetsweb/commands/douban_fix.py
Normal file
40
yyetsweb/commands/douban_fix.py
Normal file
@@ -0,0 +1,40 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - douban_fix.py
|
||||
# 7/11/21 09:37
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import argparse
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
import requests
|
||||
|
||||
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
|
||||
sys.path.append(lib_path)
|
||||
|
||||
from Mongo import DoubanMongoResource
|
||||
|
||||
parser = argparse.ArgumentParser(description='豆瓣数据修复')
|
||||
parser.add_argument('resource_id', metavar='r', type=int, help='resource id')
|
||||
parser.add_argument('douban_id', metavar='d', type=int, help='douban id')
|
||||
args = parser.parse_args()
|
||||
resource_id = args.resource_id
|
||||
douban_id = args.douban_id
|
||||
|
||||
douban = DoubanMongoResource()
|
||||
session = requests.Session()
|
||||
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
|
||||
session.headers.update({"User-Agent": ua})
|
||||
|
||||
yyets_data = douban.db["yyets"].find_one({"data.info.id": resource_id})
|
||||
search_html = ""
|
||||
cname = yyets_data["data"]["info"]["cnname"]
|
||||
|
||||
final_data = douban.get_craw_data(cname, douban_id, resource_id, search_html, session)
|
||||
douban.db["douban"].find_one_and_replace({"resourceId": resource_id}, final_data)
|
||||
print("fix complete")
|
||||
sys.exit(0)
|
||||
46
yyetsweb/commands/douban_sync.py
Normal file
46
yyetsweb/commands/douban_sync.py
Normal file
@@ -0,0 +1,46 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - douban.py
|
||||
# 7/11/21 10:17
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
|
||||
sys.path.append(lib_path)
|
||||
from Mongo import DoubanMongoResource
|
||||
|
||||
|
||||
def sync_douban():
|
||||
douban = DoubanMongoResource()
|
||||
session = requests.Session()
|
||||
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
|
||||
session.headers.update({"User-Agent": ua})
|
||||
|
||||
yyets_data = douban.db["yyets"].find()
|
||||
douban_data = douban.db["douban"].find()
|
||||
|
||||
id1 = [i["data"]["info"]["id"] for i in yyets_data]
|
||||
id2 = [i["resourceId"] for i in douban_data]
|
||||
rids = list(set(id1).difference(id2))
|
||||
logging.info("resource id complete %d", len(rids))
|
||||
for rid in tqdm(rids):
|
||||
with contextlib.suppress(Exception):
|
||||
d = douban.find_douban(rid)
|
||||
logging.info("Processed %s, length %d", rid, len(d))
|
||||
|
||||
logging.info("ALL FINISH!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sync_douban()
|
||||
28
yyetsweb/commands/format_order.py
Normal file
28
yyetsweb/commands/format_order.py
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - format_order.py
|
||||
# 2/9/21 16:24
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import pymongo
|
||||
|
||||
client = pymongo.MongoClient(host="mongo")
|
||||
db = client["zimuzu"]
|
||||
col = db["yyets"]
|
||||
|
||||
all_data = col.find().sort("data.info.id")
|
||||
|
||||
for resource in all_data:
|
||||
for index in range(len(resource["data"]["list"])):
|
||||
season = resource["data"]["list"][index]
|
||||
if season["formats"][0] == "APP":
|
||||
order = season["formats"][1:]
|
||||
order.append("APP")
|
||||
rid = resource["data"]["info"]["id"]
|
||||
set_value = {"$set": {f"data.list.{index}.formats": order}}
|
||||
print(f"{rid}-{index}->{set_value}")
|
||||
col.find_one_and_update({"data.info.id": rid}, set_value)
|
||||
client.close()
|
||||
41
yyetsweb/commands/grafana_test_data.py
Normal file
41
yyetsweb/commands/grafana_test_data.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - grafana_test_data.py
|
||||
# 3/14/21 18:25
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import random
|
||||
from datetime import date, timedelta
|
||||
|
||||
from common import Mongo
|
||||
|
||||
col = Mongo().client["zimuzu"]["metrics"]
|
||||
|
||||
|
||||
def generate_date_series(start: str, end: str) -> list:
|
||||
start_int = [int(i) for i in start.split("-")]
|
||||
end_int = [int(i) for i in end.split("-")]
|
||||
sdate = date(*start_int) # start date
|
||||
edate = date(*end_int) # end date
|
||||
|
||||
delta = edate - sdate # as timedelta
|
||||
days = []
|
||||
for i in range(delta.days + 1):
|
||||
day = sdate + timedelta(days=i)
|
||||
days.append(day.strftime("%Y-%m-%d"))
|
||||
return days
|
||||
|
||||
|
||||
date_series = generate_date_series("2021-02-01", "2021-03-14")
|
||||
|
||||
inserted = []
|
||||
for date in date_series:
|
||||
inserted.append({"date": date,
|
||||
"access": random.randint(1, 50),
|
||||
"search": random.randint(1, 50),
|
||||
"resource": random.randint(1, 50)})
|
||||
|
||||
col.insert_many(inserted)
|
||||
68
yyetsweb/commands/share_excel.py
Normal file
68
yyetsweb/commands/share_excel.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/local/bin/python3
|
||||
# coding: utf-8
|
||||
|
||||
# YYeTsBot - share_excel.py
|
||||
# 12/18/21 19:21
|
||||
#
|
||||
|
||||
__author__ = "Benny <benny.think@gmail.com>"
|
||||
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
import openpyxl
|
||||
|
||||
web_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
|
||||
sys.path.append(web_path)
|
||||
from Mongo import Mongo
|
||||
from tqdm import tqdm
|
||||
from yyetsweb.utils import ts_date
|
||||
|
||||
wb = openpyxl.open("aliyun.xlsx")
|
||||
|
||||
data = {}
|
||||
|
||||
for ws in wb.worksheets:
|
||||
line = 0
|
||||
for line in range(1, ws.max_row + 1):
|
||||
name = ws.cell(line, 1).value
|
||||
link = ws.cell(line, 2).value
|
||||
line += 1
|
||||
data[name] = link
|
||||
|
||||
template = {
|
||||
"username": "Benny",
|
||||
"ip": "127.0.0.1",
|
||||
"date": "",
|
||||
"browser": "cli",
|
||||
"content": "",
|
||||
"resource_id": 234,
|
||||
"type": "parent"
|
||||
}
|
||||
col = Mongo().db["comment"]
|
||||
share_doc = {
|
||||
"status": 1.0,
|
||||
"info": "OK",
|
||||
"data": {
|
||||
"info": {
|
||||
"id": 234,
|
||||
"cnname": "网友分享",
|
||||
"enname": "",
|
||||
"aliasname": "",
|
||||
"channel": "share",
|
||||
"channel_cn": "",
|
||||
"area": "",
|
||||
"show_type": "",
|
||||
"expire": "1610401225",
|
||||
"views": 0
|
||||
},
|
||||
"list": []
|
||||
}
|
||||
}
|
||||
|
||||
Mongo().db["yyets"].update_one({"data.info.id": 234}, {"$set": share_doc}, upsert=True)
|
||||
|
||||
for name, link in tqdm(data.items()):
|
||||
template["content"] = f"{name}\n{link}"
|
||||
template["date"] = ts_date()
|
||||
col.insert_one(template.copy())
|
||||
Reference in New Issue
Block a user