-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path考研帮.py
66 lines (47 loc) · 1.62 KB
/
考研帮.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import requests
import re
import time
import pymysql
def kaoyan51(page):
Header={
"User-Agent":" Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
url="https://www.51kywang.com/51kaoyanwang/wap_doc/14654849_0_0_0.html?&style=1&pageNum="+str(page)
response=requests.get(url)
html=response.text
pattern=re.compile('<a class="urlfont" href="(.*?)" >(.*?)</a>')
items=re.findall(pattern,html)
for item in items:
print(item)
bool=isexisturl(item[0])
if bool==False:
print("准备入库")
sava_to_mysql("https://www.51kywang.com"+item[0], item[1])
time.sleep(1)
else:
print('信息已存在')
def sava_to_mysql(url,title):
inserttime=time.strftime("%Y-%m-%d %H:%M:%S")
sql = 'insert into kaoyan51(url,title,time) values ("%s","%s","%s")' %(url,title,inserttime)
count = cursor.execute(sql)
db.commit()
def isexisturl(url):
sql = 'select * from kaoyan51 where url = "%s"'%("https://www.51kywang.com"+url)
count = cursor.execute(sql)
db.commit()
count=cursor.rowcount
if count!=0:
return True
else:
return False
while(True):
db = pymysql.connect(host="", port=, user='', password='', database='',
charset='utf8')
cursor = db.cursor()
print('正在爬取考研屋....')
for x in range(1, 20):
kaoyan51(x)
cursor.close()
db.close()
print('延迟一小时....')
time.sleep(600)