-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAppraisalTool.py
66 lines (53 loc) · 2.03 KB
/
AppraisalTool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from bs4 import BeautifulSoup
from collections import namedtuple
import xlwt
import glob
Soup = namedtuple('Soup', 'name soup')
Table = namedtuple('Table', 'header rows')
def idea(filenames):
soups = [Soup(filename, BeautifulSoup(open(filename))) for filename in filenames]
soup_tables = {}
def is_header(class_):
if class_:
return class_.startswith('datasheetTopLabel')
else:
return False
def is_row(class_):
if class_:
return class_.startswith('rowEven') or class_.startswith('rowOdd')
else:
return False
for soup in soups:
tables = soup.soup.find_all('table')
soup_tables[soup.name] = []
for table in [t for t in tables if not t.find_all('table')]:
header_cells = [row for row in table.find_all('td', {'class': is_header})]
row_cells = []
for row in table.find_all('tr', {'class': is_row}):
row_cells.append([td for td in row.find_all('td')])
tbl = Table(header_cells, row_cells)
soup_tables[soup.name].append(tbl)
font0 = xlwt.Font()
font0.name = 'Times New Roman'
font0.colour_index = 2
font0.bold = False
style0 = xlwt.XFStyle()
style0.font = font0
wb = xlwt.Workbook()
ws = wb.add_sheet('This is awesome')
row = 0
for name, tables in soup_tables.items():
for table in tables:
if len(table[0]) > 0:
for col, header_cell in enumerate([cell for cell in table.header
if cell.get_text(strip=True)]):
ws.write(row, col, header_cell.string)
row += 1
for roow in table.rows:
for col, cell in enumerate([r for r in roow if r.get_text(strip=True)]):
ws.write(row, col, cell.get_text(strip=True))
row += 1
wb.save('example.xls')
if __name__ == '__main__':
filenames = glob.glob('html/*.html')
idea(filenames)