-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathxlsx_reader.py
56 lines (43 loc) · 1.67 KB
/
xlsx_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
'''Module that reads an xlsx spreadsheet and can produce json data from it'''
import pylightxl as xl
def get_column_names(sheet):
'''Takes a single worksheet, returns the strings in the top row of each column'''
column_lists = sheet.cols
column_names = []
for column_list in column_lists:
column_names.append(column_list[00])
return column_names
def get_row_data(row, column_names):
'''takes a single row of a worksheet and an array of rows,
returns an object with column_name:rowvalue
'''
row_data = {}
counter = 0
for cell in row:
column_name = column_names[counter]
#TODO: this doesn't format any for a cell. Consider formatting date/numbers
row_data[column_name] = cell
counter = counter + 1
return row_data
def get_sheet_data(sheet, column_names):
'''Takes a single worksheet, returns an object with row data'''
max_rows = sheet.size[0]
sheet_data = []
for idx in range(2, max_rows):
row = sheet.row(idx)
row_data = get_row_data(row, column_names)
sheet_data.append(row_data)
return sheet_data
def get_workbook_data(workbook):
'''Takes a workbook and returns all worksheet data'''
workbook_sheet_names = workbook.ws_names
workbook_data = {}
for sheet_name in workbook_sheet_names:
worksheet = workbook.ws(ws=sheet_name)
column_names = get_column_names(worksheet)
sheet_data = get_sheet_data(worksheet, column_names)
workbook_data[sheet_name.lower().replace(' ', '_')] = sheet_data
return workbook_data
def get_workbook(filename):
'''opens a workbook for reading'''
return xl.readxl(filename)