1
+ # -*- coding:utf-8 -*-
2
+ from concurrent import futures
3
+ import requests
4
+ import config
5
+ import model
6
+ import json
7
+ import time
8
+ import datetime
9
+ import sys , getopt
10
+
11
+ apiroot = "https://commons.moegirl.org/api.php"
12
+ cookie = None
13
+ def allimages ():
14
+ aiend = model .getStarttime () #get the latest timestamp from model
15
+ cont = model .getContinue ()
16
+ aicontinue = model .getaiContinue () #get the continueous title from model
17
+ aistart = config .aistart
18
+ params = {}
19
+ if aiend is None or aiend .decode () == "None" :
20
+ model .setUTCStarttime ()
21
+ aiend = model .getStarttime ()
22
+ if aicontinue is None or aicontinue .decode () == "None" :
23
+ params = {"action" : "query" , "aiprop" : "canonicaltitle" , "format" : "json" , "list" : "allimages" ,
24
+ "aisort" : "timestamp" ,"aistart" :aistart , "aiend" : aiend .decode (), "ailimit" : 500 }
25
+ else :
26
+ params = {"action" : "query" , "aiprop" : "canonicaltitle" , "format" : "json" , "list" : "allimages" ,
27
+ "aisort" : "timestamp" , "aistart" : aistart , "aiend" : aiend .decode (), "ailimit" : 500 ,"continue" : cont .decode (), "aicontinue" : aicontinue .decode ()}
28
+ try :
29
+ req = requests .get (apiroot , params = params , cookies = cookie )
30
+ print (req .url )
31
+ jsondata = req .json ()
32
+ except ValueError :
33
+ print ("jsondata purge error" )
34
+ print (req .text )
35
+ return False
36
+ except requests .exceptions .SSLError :
37
+ print ("connection lost" )
38
+ return False
39
+ except :
40
+ return False
41
+ if req .status_code == 200 :
42
+ imagenames = []
43
+ if "continue" in jsondata :
44
+ model .setContinue (jsondata ['continue' ]['continue' ])
45
+ model .setaiContinue (jsondata ['continue' ]['aicontinue' ])
46
+ else :
47
+ model .setContinue (None )
48
+ model .setaiContinue (None )
49
+ model .setStartflag (0 )
50
+ return False
51
+ for image in jsondata ['query' ]['allimages' ]:
52
+ imagenames .append (image ['canonicaltitle' ])
53
+ #print(imagenames)
54
+ return imagenames
55
+ else :
56
+ return False
57
+
58
+ def isNotReffed (imagename ):
59
+ params = {"action" : "query" , "format" : "json" , "prop" :"globalusage" , "titles" :imagename }
60
+ try :
61
+ req = requests .post (apiroot , data = params )
62
+ jsondata = req .json ()
63
+ except ValueError :
64
+ print (req .text )
65
+ print ("jsondata purge error" )
66
+ return False
67
+ except requests .exceptions .SSLError :
68
+ print ("coonection lost" )
69
+ return False
70
+ except :
71
+ return False
72
+ key = list (jsondata ['query' ]['pages' ].keys ())
73
+ globalref = jsondata ['query' ]['pages' ][key [0 ]]['globalusage' ]
74
+ if globalref == []:
75
+ #return True
76
+ return imagename
77
+ else :
78
+ return False
79
+
80
+ def isNotCategorized (imagename ):
81
+ params = {"action" : "query" , "format" : "json" , "prop" :"categories" , "titles" :imagename }
82
+ try :
83
+ req = requests .post (apiroot , data = params )
84
+ jsondata = req .json ()
85
+ except ValueError :
86
+ print (req .text )
87
+ print ("jsondata purge error" )
88
+ return False
89
+ except requests .exceptions .SSLError :
90
+ print ("coonection lost" )
91
+ return False
92
+ except :
93
+ return False
94
+ key = list (jsondata ['query' ]['pages' ].keys ())
95
+ try :
96
+ jsondata ['query' ]['pages' ][key [0 ]]['categories' ]
97
+ return False
98
+ except KeyError :
99
+ return imagename
100
+
101
+ def getNoRefList ():
102
+ imagenames = allimages ()
103
+ if imagenames is not False :
104
+ with futures .ThreadPoolExecutor (config .workers ) as executor :
105
+ noreflist = list (executor .map (isNotReffed , imagenames ))
106
+ nocatlist = list (executor .map (isNotCategorized , imagenames ))
107
+ #noreflist = list(filter(isNotReffed, imagenames))
108
+ norefset = set ([i for i in noreflist if i is not False ])
109
+ nocatset = set ([i for i in nocatlist if i is not False ])
110
+ model .pushNorefList (norefset & nocatset )
111
+
112
+ def botLogin ():
113
+ token_params = {"action" : "query" , "meta" :"tokens" , "type" : "login" , "format" : "json" }
114
+ req = requests .post (apiroot , data = token_params )
115
+ token = req .json ()['query' ]['tokens' ]['logintoken' ]
116
+ login_params = {'action' : 'login' , 'lgname' : config .botUsername , 'lgtoken' : token , 'lgpassword' : config .botPassword , 'format' : 'json' }
117
+ cookie = requests .post (apiroot , data = login_params , cookies = req .cookies ).cookies
118
+
119
+ def removeFile (filename ):
120
+ try :
121
+ print ("removing" )
122
+ csrf_params = {"action" : "query" , "format" : "json" , "meta" : "tokens" }
123
+ csrf = requests .post (apiroot , data = csrf_params , cookies = cookie ).json ()['query' ]['tokens' ]['csrftoken' ]
124
+ del_params = {"action" : "delete" , "title" :title , "format" : "json" ,"tags" :"Bot" ,"reason" :"autoremove unused file" ,"token" :csrf }
125
+ req = requests .post (apiroot , data = del_params , cookies = cookie )
126
+ except ValueError :
127
+ print ("jsondata parse error" )
128
+ return False
129
+ except requests .exceptions .SSLError :
130
+ print ("connection lost" )
131
+ return False
132
+ except :
133
+ return False
134
+
135
+ def main ():
136
+ searchonly = False
137
+ export = None
138
+ exportonlyflag = False
139
+ opts , args = getopt .getopt (sys .argv [1 :], "sne:" , ["search" , "exportonly" , "export=" ])
140
+ for op , value in opts :
141
+ if op in ("-h, --search" ):
142
+ searchonly = True
143
+ if op in ("-n" , "--exportonly" ):
144
+ exportonlyflag = True
145
+ if op in ("-e" , "--export" ):
146
+ export = value
147
+ if model .getStartflag () is None :
148
+ model .setStartflag (1 )
149
+ if model .getCounter () is None :
150
+ model .initCounter ()
151
+ while True :
152
+ if cookie is None :
153
+ botLogin ()
154
+ if model .getContinue () is None or model .getContinue ().decode () == "None" :
155
+ model .swapLists ()
156
+ model .setNonetime ()
157
+ model .setStartflag (1 )
158
+ if exportonlyflag is True :
159
+ model .setStartflag (0 )
160
+ else :
161
+ model .setStartflag (1 )
162
+ startflag = bool (int (model .getStartflag ().decode ()))
163
+ while startflag is True :
164
+ getNoRefList ()
165
+ startflag = bool (int (model .getStartflag ().decode ()))
166
+ else :
167
+ if exportonlyflag is False :
168
+ removableList = model .markRemovableImages ()
169
+ counter = model .getCounter ()
170
+ if int (counter ) >= 2 and searchonly is False :
171
+ with futures .ThreadPoolExecutor (config .workers ) as executor :
172
+ executor .map (removeFile , removableList )
173
+ if export is not None :
174
+ with open (export , "w" , encoding = "utf-8" ) as f :
175
+ f .writelines ([line .decode ()+ '\n ' for line in removableList ])
176
+ model .cleanup ()
177
+ print ("complete" )
178
+ break
179
+ elif int (counter ) >= 2 and searchonly is True :
180
+ if export is not None :
181
+ with open (export , "w" , encoding = "utf-8" ) as f :
182
+ f .writelines ([line .decode ()+ '\n ' for line in removableList ])
183
+ model .cleanup ()
184
+ print ("complete" )
185
+ break
186
+ elif exportonlyflag is True and export is not None :
187
+ removableList = model .getRemovableImages ()
188
+ with open (export , "w" , encoding = "utf-8" ) as f :
189
+ f .writelines ([line .decode ()+ "\n " for line in removableList ])
190
+ print ("complete" )
191
+ break
192
+ elif exportonlyflag is True and export is None :
193
+ raise AttributeError ("You must specify a export destiniation" )
194
+
195
+ if __name__ == '__main__' :
196
+ main ()
197
+
198
+
0 commit comments