Skip to content

Commit e502c44

Browse files
committed
add delete function
1 parent ed03c53 commit e502c44

File tree

6 files changed

+234
-14
lines changed

6 files changed

+234
-14
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,6 @@ target/
6565
*.aof
6666

6767
#Custom
68+
*.txt
6869

6970

api.py

+27-12
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import sys, getopt
1010

1111
apiroot = "https://commons.moegirl.org/api.php"
12-
cookie = None
1312
def allimages():
1413
aiend = model.getStarttime() #get the latest timestamp from model
1514
cont = model.getContinue()
@@ -26,7 +25,7 @@ def allimages():
2625
params = {"action": "query", "aiprop": "canonicaltitle", "format": "json", "list": "allimages",
2726
"aisort": "timestamp", "aistart": aistart, "aiend": aiend.decode(), "ailimit": 500,"continue": cont.decode(), "aicontinue": aicontinue.decode()}
2827
try:
29-
req = requests.get(apiroot, params=params, cookies = cookie)
28+
req = requests.get(apiroot, params=params, cookies = config.cookie)
3029
print(req.url)
3130
jsondata = req.json()
3231
except ValueError:
@@ -114,49 +113,64 @@ def botLogin():
114113
req = requests.post(apiroot, data=token_params)
115114
token = req.json()['query']['tokens']['logintoken']
116115
login_params = {'action': 'login', 'lgname': config.botUsername, 'lgtoken': token, 'lgpassword': config.botPassword, 'format': 'json'}
117-
cookie = requests.post(apiroot, data=login_params, cookies = req.cookies).cookies
116+
config.cookie = requests.post(apiroot, data=login_params, cookies = req.cookies).cookies
118117

119118
def removeFile(filename):
120119
try:
121-
print("removing")
120+
print(filename)
122121
csrf_params = {"action": "query", "format": "json", "meta": "tokens"}
123-
csrf = requests.post(apiroot, data=csrf_params, cookies = cookie).json()['query']['tokens']['csrftoken']
124-
del_params={"action": "delete", "title":title, "format": "json","tags":"Bot","reason":"autoremove unused file","token":csrf}
125-
req = requests.post(apiroot, data = del_params, cookies = cookie)
122+
csrf = requests.post(apiroot, data=csrf_params, cookies = config.cookie).json()['query']['tokens']['csrftoken']
123+
del_params={"action": "delete", "title":filename, "format": "json","tags":"Bot","reason":"autoremove unused file","token":csrf}
124+
req = requests.post(apiroot, data = del_params, cookies = config.cookie)
126125
except ValueError:
127126
print("jsondata parse error")
128127
return False
129128
except requests.exceptions.SSLError:
130129
print("connection lost")
131130
return False
132-
except:
131+
except Exception as e:
132+
print(e.value)
133133
return False
134134

135135
def main():
136136
searchonly = False
137137
export = None
138138
exportonlyflag = False
139-
opts, args = getopt.getopt(sys.argv[1:], "sne:", ["search", "exportonly", "export="])
139+
deleteflag = None
140+
opts, args = getopt.getopt(sys.argv[1:], "snde:", ["search", "exportonly", "delete", "export="])
140141
for op, value in opts:
141-
if op in ("-h, --search"):
142+
if op in ("-s, --search"):
142143
searchonly = True
143144
if op in ("-n", "--exportonly"):
144145
exportonlyflag = True
146+
if op in ("-d", "delete"):
147+
deleteflag = True
145148
if op in ("-e", "--export"):
146149
export = value
147150
if model.getStartflag() is None:
148151
model.setStartflag(1)
149152
if model.getCounter() is None:
150153
model.initCounter()
151154
while True:
152-
if cookie is None:
155+
if config.cookie is None:
153156
botLogin()
157+
if deleteflag is True:
158+
if searchonly is True or export is not None or exportonlyflag is True:
159+
raise AttributeError("You should not use -d flag with other options")
160+
else:
161+
removableList = model.getRemovableImages()
162+
with futures.ThreadPoolExecutor(config.workers) as executor:
163+
executor.map(removeFile, removableList)
164+
print("complete")
165+
break
154166
if model.getContinue() is None or model.getContinue().decode() == "None":
155167
model.swapLists()
156168
model.setNonetime()
157169
model.setStartflag(1)
158170
if exportonlyflag is True:
159171
model.setStartflag(0)
172+
elif deleteflag is True:
173+
model.setStartflag(0)
160174
else:
161175
model.setStartflag(1)
162176
startflag = bool(int(model.getStartflag().decode()))
@@ -192,7 +206,8 @@ def main():
192206
elif exportonlyflag is True and export is None:
193207
raise AttributeError("You must specify a export destiniation")
194208

209+
210+
195211
if __name__ == '__main__':
196212
main()
197-
198213

category.py

+198
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
# -*- coding:utf-8 -*-
2+
from concurrent import futures
3+
import requests
4+
import config
5+
import model
6+
import json
7+
import time
8+
import datetime
9+
import sys, getopt
10+
11+
apiroot = "https://commons.moegirl.org/api.php"
12+
cookie = None
13+
def allimages():
14+
aiend = model.getStarttime() #get the latest timestamp from model
15+
cont = model.getContinue()
16+
aicontinue = model.getaiContinue() #get the continueous title from model
17+
aistart = config.aistart
18+
params = {}
19+
if aiend is None or aiend.decode() == "None":
20+
model.setUTCStarttime()
21+
aiend = model.getStarttime()
22+
if aicontinue is None or aicontinue.decode() == "None":
23+
params = {"action": "query", "aiprop": "canonicaltitle", "format": "json", "list": "allimages",
24+
"aisort": "timestamp","aistart":aistart, "aiend": aiend.decode(), "ailimit": 500 }
25+
else:
26+
params = {"action": "query", "aiprop": "canonicaltitle", "format": "json", "list": "allimages",
27+
"aisort": "timestamp", "aistart": aistart, "aiend": aiend.decode(), "ailimit": 500,"continue": cont.decode(), "aicontinue": aicontinue.decode()}
28+
try:
29+
req = requests.get(apiroot, params=params, cookies = cookie)
30+
print(req.url)
31+
jsondata = req.json()
32+
except ValueError:
33+
print("jsondata purge error")
34+
print(req.text)
35+
return False
36+
except requests.exceptions.SSLError:
37+
print("connection lost")
38+
return False
39+
except:
40+
return False
41+
if req.status_code == 200:
42+
imagenames=[]
43+
if "continue" in jsondata:
44+
model.setContinue(jsondata['continue']['continue'])
45+
model.setaiContinue(jsondata['continue']['aicontinue'])
46+
else:
47+
model.setContinue(None)
48+
model.setaiContinue(None)
49+
model.setStartflag(0)
50+
return False
51+
for image in jsondata['query']['allimages']:
52+
imagenames.append(image['canonicaltitle'])
53+
#print(imagenames)
54+
return imagenames
55+
else:
56+
return False
57+
58+
def isNotReffed(imagename):
59+
params = {"action": "query", "format": "json", "prop":"globalusage", "titles":imagename}
60+
try:
61+
req = requests.post(apiroot, data=params)
62+
jsondata = req.json()
63+
except ValueError:
64+
print(req.text)
65+
print("jsondata purge error")
66+
return False
67+
except requests.exceptions.SSLError:
68+
print("coonection lost")
69+
return False
70+
except:
71+
return False
72+
key = list(jsondata['query']['pages'].keys())
73+
globalref = jsondata['query']['pages'][key[0]]['globalusage']
74+
if globalref == []:
75+
#return True
76+
return imagename
77+
else:
78+
return False
79+
80+
def isNotCategorized(imagename):
81+
params = {"action": "query", "format": "json", "prop":"categories", "titles":imagename}
82+
try:
83+
req = requests.post(apiroot, data=params)
84+
jsondata = req.json()
85+
except ValueError:
86+
print(req.text)
87+
print("jsondata purge error")
88+
return False
89+
except requests.exceptions.SSLError:
90+
print("coonection lost")
91+
return False
92+
except:
93+
return False
94+
key = list(jsondata['query']['pages'].keys())
95+
try:
96+
jsondata['query']['pages'][key[0]]['categories']
97+
return False
98+
except KeyError:
99+
return imagename
100+
101+
def getNoRefList():
102+
imagenames = allimages()
103+
if imagenames is not False:
104+
with futures.ThreadPoolExecutor(config.workers) as executor:
105+
noreflist = list(executor.map(isNotReffed, imagenames))
106+
nocatlist = list(executor.map(isNotCategorized, imagenames))
107+
#noreflist = list(filter(isNotReffed, imagenames))
108+
norefset = set([i for i in noreflist if i is not False])
109+
nocatset = set([i for i in nocatlist if i is not False])
110+
model.pushNorefList(norefset & nocatset)
111+
112+
def botLogin():
113+
token_params = {"action": "query", "meta":"tokens", "type": "login", "format": "json"}
114+
req = requests.post(apiroot, data=token_params)
115+
token = req.json()['query']['tokens']['logintoken']
116+
login_params = {'action': 'login', 'lgname': config.botUsername, 'lgtoken': token, 'lgpassword': config.botPassword, 'format': 'json'}
117+
cookie = requests.post(apiroot, data=login_params, cookies = req.cookies).cookies
118+
119+
def removeFile(filename):
120+
try:
121+
print("removing")
122+
csrf_params = {"action": "query", "format": "json", "meta": "tokens"}
123+
csrf = requests.post(apiroot, data=csrf_params, cookies = cookie).json()['query']['tokens']['csrftoken']
124+
del_params={"action": "delete", "title":title, "format": "json","tags":"Bot","reason":"autoremove unused file","token":csrf}
125+
req = requests.post(apiroot, data = del_params, cookies = cookie)
126+
except ValueError:
127+
print("jsondata parse error")
128+
return False
129+
except requests.exceptions.SSLError:
130+
print("connection lost")
131+
return False
132+
except:
133+
return False
134+
135+
def main():
136+
searchonly = False
137+
export = None
138+
exportonlyflag = False
139+
opts, args = getopt.getopt(sys.argv[1:], "sne:", ["search", "exportonly", "export="])
140+
for op, value in opts:
141+
if op in ("-h, --search"):
142+
searchonly = True
143+
if op in ("-n", "--exportonly"):
144+
exportonlyflag = True
145+
if op in ("-e", "--export"):
146+
export = value
147+
if model.getStartflag() is None:
148+
model.setStartflag(1)
149+
if model.getCounter() is None:
150+
model.initCounter()
151+
while True:
152+
if cookie is None:
153+
botLogin()
154+
if model.getContinue() is None or model.getContinue().decode() == "None":
155+
model.swapLists()
156+
model.setNonetime()
157+
model.setStartflag(1)
158+
if exportonlyflag is True:
159+
model.setStartflag(0)
160+
else:
161+
model.setStartflag(1)
162+
startflag = bool(int(model.getStartflag().decode()))
163+
while startflag is True:
164+
getNoRefList()
165+
startflag = bool(int(model.getStartflag().decode()))
166+
else:
167+
if exportonlyflag is False:
168+
removableList = model.markRemovableImages()
169+
counter = model.getCounter()
170+
if int(counter) >= 2 and searchonly is False:
171+
with futures.ThreadPoolExecutor(config.workers) as executor:
172+
executor.map(removeFile, removableList)
173+
if export is not None:
174+
with open(export, "w", encoding="utf-8") as f:
175+
f.writelines([line.decode()+'\n' for line in removableList])
176+
model.cleanup()
177+
print("complete")
178+
break
179+
elif int(counter) >= 2 and searchonly is True:
180+
if export is not None:
181+
with open(export, "w", encoding="utf-8") as f:
182+
f.writelines([line.decode()+'\n' for line in removableList])
183+
model.cleanup()
184+
print("complete")
185+
break
186+
elif exportonlyflag is True and export is not None:
187+
removableList = model.getRemovableImages()
188+
with open(export, "w", encoding="utf-8") as f:
189+
f.writelines([line.decode()+"\n" for line in removableList])
190+
print("complete")
191+
break
192+
elif exportonlyflag is True and export is None:
193+
raise AttributeError("You must specify a export destiniation")
194+
195+
if __name__ == '__main__':
196+
main()
197+
198+

config.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import datetime
33
import redis
44

5+
cookie = None
56
r = redis.Redis(host="localhost", port=6379, db=0)
6-
botUsername = ""
7-
botPassword = ""
7+
botUsername = "CommonsFileDeletionBot"
8+
botPassword = "d19Cwj&ayHUv$u!E"
89
workers = 20
910
aistart = datetime.datetime(2018,5,20,0,0,0).strftime("%Y-%m-%dT%H:%M:%SZ")

default.profraw

1.97 MB
Binary file not shown.

migrate.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# -*- coding:utf-8 -*-
2+
from config import r
3+
with open('./noreflist.txt', 'r') as f:
4+
for line in f.readlines():
5+
r.sadd('resultset', line.strip())

0 commit comments

Comments
 (0)