move folder, bump

This commit is contained in:
Benny
2023-02-13 18:25:45 +01:00
parent 4a75ebdcce
commit c7711aea08
16 changed files with 19 additions and 150 deletions

View File

@@ -0,0 +1,67 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - add_year.py
# 4/8/21 18:39
#
__author__ = "Benny <benny.think@gmail.com>"
import logging
import re
import time
from tqdm import tqdm
from common import Mongo
logging.basicConfig(level=logging.INFO)
def ts_year(ts: str) -> int:
return int(time.strftime("%Y", time.localtime(int(ts))))
def extract_year(name: str) -> int:
try:
r = int(re.findall(r"\.(19\d{2}|20\d{2})\.", name)[0])
except:
r = None
return r
col = Mongo().client["zimuzu"]["yyets"]
data = col.find()
for datum in tqdm(data):
list_data = datum["data"]["list"]
translate_year = []
filename_year = []
for single in list_data:
dl = single["items"].values()
for i in dl:
for j in i:
if d := ts_year(j["dateline"]):
translate_year.append(d)
if d := extract_year(j["name"]):
filename_year.append(d)
translate_year = list(set(translate_year))
filename_year = list(set(filename_year)) # more accurate
final_year = []
if filename_year:
final_year = filename_year.copy()
elif translate_year:
final_year = translate_year
_id = datum["data"]["info"]["id"]
name = datum["data"]["info"]["cnname"]
should_write = True
for y in final_year:
if y <= 1900:
final_year.remove(y)
logging.warning("%s is %s, popping %s", name, final_year, y)
col.update_one({"data.info.id": _id}, {"$set": {"data.info.year": final_year}})

Binary file not shown.

View File

@@ -7,9 +7,10 @@
__author__ = "Benny <benny.think@gmail.com>"
from common import Mongo
from tqdm import tqdm
from common import Mongo
client = Mongo()
user_col = client.db["users"]

View File

@@ -0,0 +1,39 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - change_datetime.py
# 6/15/21 14:15
#
__author__ = "Benny <benny.think@gmail.com>"
import time
from common import Mongo
client = Mongo().client
comment = client["zimuzu"]["comment"] # date
users = client["zimuzu"]["users"] # date
all_comments = list(comment.find())
all_users = list(users.find())
for item in all_comments:
object_id = item["_id"]
old_date = time.strptime(item["date"], "%a %b %d %H:%M:%S %Y")
new_date = time.strftime("%Y-%m-%d %H:%M:%S", old_date)
condition = {"_id": object_id}
update = {"$set": {"date": new_date}}
comment.find_one_and_update(condition, update)
for item in all_users:
# unique for username
object_id = item["_id"]
old_date = time.strptime(item["date"], "%a %b %d %H:%M:%S %Y")
new_date = time.strftime("%Y-%m-%d %H:%M:%S", old_date)
condition = {"_id": object_id}
update = {"$set": {"date": new_date}}
users.find_one_and_update(condition, update)

View File

@@ -7,9 +7,10 @@
__author__ = "Benny <benny.think@gmail.com>"
import pymongo
import os
import pymongo
class Mongo:
def __init__(self):

View File

@@ -0,0 +1,36 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - douban_data.py
# 7/24/21 19:28
#
__author__ = "Benny <benny.think@gmail.com>"
import logging
import pathlib
import sys
logging.basicConfig(level=logging.INFO)
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(lib_path)
from Mongo import DoubanMongoResource
from tqdm import tqdm
m = DoubanMongoResource()
m.db["douban"].update_many({}, {"$unset": {"raw": ""}})
logging.info("raw data deleted.")
# only writers are wrong
# wrong_field = ["actors", "directors", "genre", "writers"]
wrong_field = ["writers"]
# String 2 "string" 4 array
for field in wrong_field:
incorrect_data = m.db["douban"].find({field: {"$not": {"$type": 4}}})
for datum in tqdm(incorrect_data):
logging.info("fixing %s", datum)
new_field = datum[field].split()
m.db["douban"].update_one({"_id": datum["_id"]}, {"$set": {field: new_field}})
logging.info("finish")

View File

@@ -0,0 +1,40 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - douban_fix.py
# 7/11/21 09:37
#
__author__ = "Benny <benny.think@gmail.com>"
import argparse
import pathlib
import sys
import requests
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(lib_path)
from Mongo import DoubanMongoResource
parser = argparse.ArgumentParser(description='豆瓣数据修复')
parser.add_argument('resource_id', metavar='r', type=int, help='resource id')
parser.add_argument('douban_id', metavar='d', type=int, help='douban id')
args = parser.parse_args()
resource_id = args.resource_id
douban_id = args.douban_id
douban = DoubanMongoResource()
session = requests.Session()
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
session.headers.update({"User-Agent": ua})
yyets_data = douban.db["yyets"].find_one({"data.info.id": resource_id})
search_html = ""
cname = yyets_data["data"]["info"]["cnname"]
final_data = douban.get_craw_data(cname, douban_id, resource_id, search_html, session)
douban.db["douban"].find_one_and_replace({"resourceId": resource_id}, final_data)
print("fix complete")
sys.exit(0)

View File

@@ -0,0 +1,46 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - douban.py
# 7/11/21 10:17
#
__author__ = "Benny <benny.think@gmail.com>"
import contextlib
import logging
import pathlib
import sys
import requests
from tqdm import tqdm
logging.basicConfig(level=logging.INFO)
lib_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(lib_path)
from Mongo import DoubanMongoResource
def sync_douban():
douban = DoubanMongoResource()
session = requests.Session()
ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
session.headers.update({"User-Agent": ua})
yyets_data = douban.db["yyets"].find()
douban_data = douban.db["douban"].find()
id1 = [i["data"]["info"]["id"] for i in yyets_data]
id2 = [i["resourceId"] for i in douban_data]
rids = list(set(id1).difference(id2))
logging.info("resource id complete %d", len(rids))
for rid in tqdm(rids):
with contextlib.suppress(Exception):
d = douban.find_douban(rid)
logging.info("Processed %s, length %d", rid, len(d))
logging.info("ALL FINISH!")
if __name__ == '__main__':
sync_douban()

View File

@@ -0,0 +1,28 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - format_order.py
# 2/9/21 16:24
#
__author__ = "Benny <benny.think@gmail.com>"
import pymongo
client = pymongo.MongoClient(host="mongo")
db = client["zimuzu"]
col = db["yyets"]
all_data = col.find().sort("data.info.id")
for resource in all_data:
for index in range(len(resource["data"]["list"])):
season = resource["data"]["list"][index]
if season["formats"][0] == "APP":
order = season["formats"][1:]
order.append("APP")
rid = resource["data"]["info"]["id"]
set_value = {"$set": {f"data.list.{index}.formats": order}}
print(f"{rid}-{index}->{set_value}")
col.find_one_and_update({"data.info.id": rid}, set_value)
client.close()

View File

@@ -0,0 +1,41 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - grafana_test_data.py
# 3/14/21 18:25
#
__author__ = "Benny <benny.think@gmail.com>"
import random
from datetime import date, timedelta
from common import Mongo
col = Mongo().client["zimuzu"]["metrics"]
def generate_date_series(start: str, end: str) -> list:
start_int = [int(i) for i in start.split("-")]
end_int = [int(i) for i in end.split("-")]
sdate = date(*start_int) # start date
edate = date(*end_int) # end date
delta = edate - sdate # as timedelta
days = []
for i in range(delta.days + 1):
day = sdate + timedelta(days=i)
days.append(day.strftime("%Y-%m-%d"))
return days
date_series = generate_date_series("2021-02-01", "2021-03-14")
inserted = []
for date in date_series:
inserted.append({"date": date,
"access": random.randint(1, 50),
"search": random.randint(1, 50),
"resource": random.randint(1, 50)})
col.insert_many(inserted)

View File

@@ -0,0 +1,68 @@
#!/usr/local/bin/python3
# coding: utf-8
# YYeTsBot - share_excel.py
# 12/18/21 19:21
#
__author__ = "Benny <benny.think@gmail.com>"
import pathlib
import sys
import openpyxl
web_path = pathlib.Path(__file__).parent.parent.resolve().as_posix()
sys.path.append(web_path)
from Mongo import Mongo
from tqdm import tqdm
from yyetsweb.utils import ts_date
wb = openpyxl.open("aliyun.xlsx")
data = {}
for ws in wb.worksheets:
line = 0
for line in range(1, ws.max_row + 1):
name = ws.cell(line, 1).value
link = ws.cell(line, 2).value
line += 1
data[name] = link
template = {
"username": "Benny",
"ip": "127.0.0.1",
"date": "",
"browser": "cli",
"content": "",
"resource_id": 234,
"type": "parent"
}
col = Mongo().db["comment"]
share_doc = {
"status": 1.0,
"info": "OK",
"data": {
"info": {
"id": 234,
"cnname": "网友分享",
"enname": "",
"aliasname": "",
"channel": "share",
"channel_cn": "",
"area": "",
"show_type": "",
"expire": "1610401225",
"views": 0
},
"list": []
}
}
Mongo().db["yyets"].update_one({"data.info.id": 234}, {"$set": share_doc}, upsert=True)
for name, link in tqdm(data.items()):
template["content"] = f"{name}\n{link}"
template["date"] = ts_date()
col.insert_one(template.copy())