-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsummary.py
82 lines (63 loc) · 2.36 KB
/
summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
Print summary tables from results.
"""
import sys
import numpy as np
def get_fhandle(filepath_or_buffer):
if hasattr(filepath_or_buffer, 'read'):
return filepath_or_buffer
else:
try:
return open(filepath_or_buffer)
except:
return filepath_or_buffer.open()
def summary(fpath_or_buff, concise=False):
"""The file being read has a table of the per-term error, and a second table
with running mean of error. We want a table that just has the per-term error
and the cumulative error. We can also discard the `rmse` column, which just
specifies the difference between the tables.
"""
f = get_fhandle(fpath_or_buff)
content = f.read()
t1, t2 = content.split('\n\n')
rows = [l.split() for l in t2.split('\n') if l]
# extract mean RMSE for each method
totals = [row[-1] for row in rows]
totals[0] = "all" # header
totals[2] = "" # remove count from last term
# now remove second column from per-term table and insert total RMSEs
rows = [l.split() for l in t1.split('\n') if l]
for i in range(len(rows)):
rows[i][1] = totals[i] # replace junk with total RMSE
# fix the "# test records" label
rows[2][0] = "# predictions"
rows[2].pop(2)
# calculate total number of grades predicted
total_count = sum(map(int, rows[2][2:]))
rows[2][1] = str(total_count)
# remove old underlining
rows.pop(1)
# EXPERIMENTAL
if concise:
for i in range(2, len(rows[0])):
rows[0][i] = rows[0][i].replace('term', 't')
for i in range(2, len(rows)):
for j in range(1, len(rows[i])):
rows[i][j] = rows[i][j][:5]
# compute margin for justification formatting
widths = np.array([[len(item) for item in row]
for row in rows]).max(axis=0)
margin = 4
colwidths = np.array(widths) + margin
underlines = ['-' * width for width in widths]
rows.insert(1, underlines)
# next, justify the columns appropriately
def format_row(row):
return [row[i].ljust(colwidths[i]) for i in range(0, 2)] + \
[row[i].rjust(colwidths[i]) for i in range(2, len(row))]
lines = [''.join(format_row(row)) for row in rows]
return '\n'.join(lines)
if __name__ == "__main__":
fname = sys.argv[1]
with open(fname) as f:
print summary(f)