-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpa07智联招聘.py
49 lines (42 loc) · 1.63 KB
/
pa07智联招聘.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#用来创建excel文档并写入数据
###
#源码地址:http://blog.csdn.net/u010187278/article/details/77847737?locationNum=10&fps=1
#
#
#阿
###
# -*- coding:utf-8 -*-
import urllib.request
import re
import xlwt#用来创建excel文档并写入数据
mes = input("请输入需要爬取的岗位关键词:")
#获取原码
def get_content(page):
url ='http://search.51job.com/list/000000,000000,0000,00,9,99,'+mes+',2,'+ str(page)+'.html'
print(url)
a = urllib.request.urlopen(url)#打开网址
html = a.read().decode('gbk')#读取源代码并转为unicode
return html
def get(html):
reg = re.compile(r'class="t1 ">.*? <a target="_blank" title="(.*?)".*? <span class="t2"><a target="_blank" title="(.*?)".*?<span class="t3">(.*?)</span>.*?<span class="t4">(.*?)</span>.*? <span class="t5">(.*?)</span>',re.S)
#匹配换行符
items = re.findall(reg,html)
return items
def excel_write(items,index):
#爬取到的内容写入excel表格
for item in items:#职位信息
for i in range(0,5):
#print item[i]
ws.write(index,i,item[i])#行,列,数据
print(index)
index+=1
newTable="D:\python\myspoils\zhilian\智联招聘岗位爬虫结果.xls"#表格名称
wb = xlwt.Workbook(encoding='utf-8')#创建excel文件,声明编码
ws = wb.add_sheet('sheet1')#创建表格
headData = ['招聘职位','公司','地址','薪资','日期']#表头部信息
for colnum in range(0, 5):
ws.write(0, colnum, headData[colnum], xlwt.easyxf('font: bold on')) # 行,列
for each in range(1,10):
index=(each-1)*50+1
excel_write(get(get_content(each)),index)
wb.save(newTable)