forked from dytttf/antispider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_dsign.py
111 lines (104 loc) · 5.15 KB
/
get_dsign.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#coding:utf8
import re
def get_dsign(js):
#去标签
js = re.sub('<[^>]+>', '', js)
#去getName
js = js.replace("function getName(){var caller=getName.caller;if(caller.name){return caller.name} var str=caller.toString().replace(/[\s]*/g,\"\");var name=str.match(/^function([^\(]+?)\(/);if(name && name[1]){return name[1];} else {return '';}}", "")
#处理常量函数
constant_function_regex1 = re.compile("\(function\(\)\{'return [^\']+';return '[^\']+'\}\)\(\)")
l = constant_function_regex1.findall(js)
for i in l:
js = js.replace(i, "'%s'"%(re.search("'return [^\']+';return '([^\']+)'", i).group(1)))
#
constant_function_regex2 = re.compile("function \w+\(\)\{'[^\']+';function [^\']+\(\)\{return '[^\']+'\}; return \w+\(\);\}")
l = constant_function_regex2.findall(js)
for i in l:
a = re.search("function (\w+)\(\)\{'[^\']+';function [^\']+\(\)\{return '([^\']+)'\}; return \w+\(\);\}", i)
js = js.replace(i, "%s='%s';"%(a.group(1), a.group(2)))
#
constant_function_regex3 = re.compile("\w+=function\(\)\{'return \w+';return '[^\']+';\};")
l = constant_function_regex3.findall(js)
for i in l:
a = re.search("(\w+)=function\(\)\{'return \w+';return '([^\']+)';\};", i)
js = js.replace(i, "%s='%s';"%(a.group(1), a.group(2)))
#
constant_function_regex4 = re.compile("\w+=function\(\)\{'\w+';var \w+=function\(\)\{return '[^\']+'\}; return \w+\(\);\};")
l = constant_function_regex4.findall(js)
for i in l:
a = re.search("(\w+)=function\(\)\{'\w+';var \w+=function\(\)\{return '([^\']+)'\}; return \w+\(\);\};", i)
js = js.replace(i, "%s='%s';"%(a.group(1), a.group(2)))
#
constant_function_regex5 = re.compile("((?:function \w+\(\w+\)\{)+function \w+\(\)\{return getName\(\);\}.*?return.*?return.*?\})")
l = constant_function_regex5.findall(js)
for i in l:
a = re.search("^function (\w+)\(\w+\)", i)
b = re.search("function (\w+)\(\)\{return getName", i)
js = js.replace(i, "%s='%s';"%(a.group(1), b.group(1)))
#
constant_function_regex6 = re.compile("\(function\([^\)]+\)\{'return [^\']+';return [^\}]+\}\)\('[^\']+'\)")
l = constant_function_regex6.findall(js)
for i in l:
a = re.search("\(function\([^\)]+\)\{'return [^\']+';return [^\}]+\}\)\('([^\']+)'\)", i)
js = js.replace(i, "'%s'"%(a.group(1)))
#
constant_function_regex6 = re.compile("\(function\(\w+\)\{return \(function\(\w+\)\{return \w+;\}\)\(\w+\);\}\)\('[^\']+'\)")
l = constant_function_regex6.findall(js)
for i in l:
a = re.search("\(function\(\w+\)\{return \(function\(\w+\)\{return \w+;\}\)\(\w+\);\}\)\('([^\']+)'\)", i)
js = js.replace(i, "'%s'"%(a.group(1)))
#
constant_function_regex7 = re.compile("\(function\(\)\{'return [^\']+';return \(function\(\)\{return '[^\']+';\}\)\(\);\}\)\(\)")
l = constant_function_regex7.findall(js)
for i in l:
a = re.search("\(function\(\)\{'return [^\']+';return \(function\(\)\{return '([^\']+)';\}\)\(\);\}\)\(\)", i)
js = js.replace(i, "'%s'"%(a.group(1)))
#
constant_function_regex9 = re.compile("\w+=function\(\w+\)\{var \w+=function\(\w+\)\{'return \w+';return \w+;\}; return \w+\(\w+\);\};")
l = constant_function_regex9.findall(js)
for i in l:
a = re.search("(\w+)=function\(\w+\)\{var \w+=function\(\w+\)\{'return \w+';return \w+;\}; return \w+\(\w+\);\};", i)
js = js.replace(i, "%s='*';"%(a.group(1)))
#
constant_function_regex8 = re.compile("\w+=function\(\w+\)\{'return \w+';return \w+;\};")
l = constant_function_regex8.findall(js)
for i in l:
a = re.search("(\w+)=function\(\w+\)\{'return \w+';return \w+;\};", i)
js = js.replace(i, "%s='*';"%(a.group(1)))
#
constant_function_regex9 = re.compile("function \w+\(\)\{'return \w+';return '[^\']+'\}")
l = constant_function_regex9.findall(js)
for i in l:
a = re.search("function (\w+)\(\)\{'return \w+';return '([^\']+)'\}", i)
js = js.replace(i, "%s='%s';"%(a.group(1), a.group(2)))
#
js = re.sub('\s*=\s*', '=', js)
#变量处理
var_regex = re.compile("(\w+='[^\']+')")
var_list = var_regex.findall(js)
t_var_list = []
for var in var_list:
i = var.find('=')
k,v = var[0:i], var[i+1:]
t_var_list.append((k,v))
t_var_list.sort(key=lambda x:len(x[0]))
t_var_list.reverse()
for k,v in t_var_list:
if v == "'*'":
js = re.sub("%s\(('[^\']+')\)?"%k, "\\1", js)
else:
js = re.sub('%s\(\'[^\']+\'\)'%k, v, js)
js = re.sub('%s\(\)'%k, v, js)
js = re.sub('%s'%k, v, js)
js = ''.join([x for x in js.split(';') if '+' in x])
#js = re.sub("\([^\)]+\)", "", js)
js = re.sub("'\+[^\']*'", "", js)
_dsign = re.findall('ign=([a-z\d]+)', js)
if _dsign:
_dsign = _dsign[-1]
else:
_dsign = ''
return _dsign
with open('_dsign.js') as f:
js = f.read()
print get_dsign(js)