-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathautoconf.py
306 lines (262 loc) · 12.1 KB
/
autoconf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import re, argparse, glob
from enum import Enum
from pathlib import Path
import pyexcel as pe
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)
logging.addLevelName(logging.WARNING, "\033[33m%s\033[0m" % logging.getLevelName(logging.WARNING))
logging.addLevelName(logging.ERROR, "\033[31m%s\033[0m" % logging.getLevelName(logging.ERROR))
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
from lib.spreadsheetReader import getRows
from lib.mung import checkAndClean
from lib.constants import ASSIGNMENTS_KEY, ALL_DEFAULT_FILTERS
from lib.config import loadConfig, saveConfig
FileType = Enum('FileType', 'ROSTER GRADESCOPE SCORED_GOOGLE_FORM UNSCORED_GOOGLE_FORM CLICKERS OTHER')
DEFAULT_ATTR_DICT = {
"Roster Name": {"onePerStudent": True, "onlyPrintIfPresent": True},
"Section": {"onePerStudent": True},
"Email": {},
"Student ID": {"identifiesStudent": True, "onePerStudent": True, "filters": ["strip", "toUpper", "ucsdIDCheck"]},
"Clicker ID": {"identifiesStudent": True, "filters": ["strip", "remove#", "8char", "toUpper"]}
}
keywords = {
"Roster Name": [r'name\b'],
"Section": [r'\bsection\b', r'\bsect\b', r'\bsec\b'],
"Email": [r'\bemail\b'],
"Student ID": [r'\bpid\b', r'\bsid\b'],
"Clicker ID": [r'\bclicker\b', r'\biclicker\b', r'\bremote\b'],
# "homework": [r'\bhw\b', r'\bhomework\b', r'\bassignment\b'],
# "quiz": [r'\bquiz\b'],
# "exam": [r'\bexam\b'],
# "test": [r'\btest\b'],
}
def inferTypeFromFields(fields):
if fields[:5] == ['Sect ID', 'Course', 'Title', 'SecCode', 'Instructor']:
return FileType.ROSTER
if fields[:4] == ['Last Name', 'First Name', 'Student ID', 'Remote ID'] and any([re.match(r'^Session \d+', field) for field in fields]):
return FileType.CLICKERS
lastCol = ""
for col in fields:
if col == lastCol + " - Max Points":
return FileType.GRADESCOPE
lastCol = col
if fields[0] == 'Timestamp':
if 'Score' in fields:
return FileType.SCORED_GOOGLE_FORM
else:
return FileType.UNSCORED_GOOGLE_FORM
return FileType.OTHER
def updateOtherConfig(allAttrs, sourceConf, rows, fileType, allNames):
'''updates sourceConf and allNames in place. Returns True if successful; False if the
file could not be used (i.e. appeared to contain no grade data)'''
fields = rows[0].keys()
if len(set(fields)) != len(fields):
logger.warning("duplicate column!")
(attrConfig, ignoredAttrCols) = guessAttrConfig(fields, allAttrs)
if len(attrConfig.keys()) == 0:
logger.debug(" No student attributes detected; ignoring")
return False
itemConfig = []
if fileType in [FileType.OTHER, FileType.CLICKERS]:
for item in fields:
if item not in ignoredAttrCols and item not in attrConfig.keys():
filters = ALL_DEFAULT_FILTERS
# try:
# [float(checkAndClean(row[item], filters)) for row in rows]
# except ValueError:
# continue
if fileType == FileType.CLICKERS:
itemType = "clickers"
else:
itemType = guessItemType(item)
name = forceUniqueName(item, allNames)
itemConfig.append({"name": name, "scoreCol": item, "max_points": 1, "type": itemType, "filters": filters})
if fileType == FileType.SCORED_GOOGLE_FORM:
row = rows[0]
score = row['Score']
if '/' in score:
maxPoints = int(score.split('/')[1].strip())
# filters = ["stripDenominator"]
else:
maxPoints = max([int(x['Score']) for x in rows])
# filters = []
name = sourceConf.get("sheetName") or Path(sourceConf['file']).stem
name = forceUniqueName(name, allNames)
itemConfig.append({"name": name, "scoreCol": "Score", "max_points": maxPoints, "type": fileType.name, "filters": ALL_DEFAULT_FILTERS, "due_date": "12/31/9999 23:59:59", "timestampCol": "Timestamp"})
if fileType == FileType.UNSCORED_GOOGLE_FORM:
name = sourceConf.get("sheetName") or Path(sourceConf['file']).stem
name = forceUniqueName(name, allNames)
itemConfig.append({"name": name, "max_points": 1, "type": fileType.name, "filters": ALL_DEFAULT_FILTERS, "due_date": "12/31/9999 23:59:59", "timestampCol": "Timestamp"})
sourceConf.update({
# "_autoconf_fileType": fileType.name,
"attributes": attrConfig,
ASSIGNMENTS_KEY: itemConfig, #[NoIndent(x) for x in itemConfig],
# "_autoconf_ignoredAttrCols": ignoredAttrCols
})
return True
def forceUniqueName(name, allNames):
uniqueName = getUniqueName(name, allNames)
allNames.add(uniqueName)
return uniqueName
def getUniqueName(name, allNames):
if name not in allNames:
return name
idx = 2
while True:
testName = name + "#" + str(idx)
if testName not in allNames:
return testName
idx += 1
def guessItemType(item):
item = item.lower()
if "hw" in item or "assignment" in item or "homework" in item:
return "homework"
return "unknown"
def guessAttrConfig(potentialAttrFields, allAttrs):
keywordLookup = {}
for attr in allAttrs:
if attr in keywords:
keywordLookup.update({keyword:attr for keyword in keywords[attr]})
attrConfig = {}
ignoredAttrCols = []
attrsLowered = [attr.lower() for attr in allAttrs]
for item in potentialAttrFields:
# Check if the column name is referring to a student attribute
identifiedAttr = None
itemLowered = item.lower()
if itemLowered in attrsLowered:
identifiedAttr = item
else:
for (keyword, attr) in keywordLookup.items():
if re.search(keyword, itemLowered):
identifiedAttr = attr
break
if identifiedAttr == None:
continue
if identifiedAttr in attrConfig.values():
old = next(key for key, value in attrConfig.items() if value == identifiedAttr)
logger.warning(f"found two columns for attribute {identifiedAttr}: `{old}` and `{item}`")
ignoredAttrCols.append(item)
elif allAttrs[identifiedAttr].get("identifiesStudent", False):
attrConfig.update({item: identifiedAttr})
else:
ignoredAttrCols.append(item)
return (attrConfig, ignoredAttrCols)
def updateGradescopeConfig(allAttrs, sourceConf, rows, allNames):
fields = rows[0].keys()
assignments = []
attrs = []
for field in fields:
if field + " - Max Points" in fields:
assignments.append(field)
elif " - Max Points" not in field and " - Lateness" not in field:
attrs.append(field)
(attrConfig, _) = guessAttrConfig(attrs, allAttrs)
itemConfig = []
for item in assignments:
itemType = guessItemType(item)
maxPoints = float(rows[0][item + " - Max Points"])
name = forceUniqueName(item, allNames)
itemConfig.append({"name": name, "scoreCol": item, "max_points": maxPoints, "type": itemType, "filters": ALL_DEFAULT_FILTERS}) #["NoneTo0"]})
sourceConf.update({
"attributes": attrConfig,
ASSIGNMENTS_KEY: itemConfig,
})
def updateConfig(globalConfigObj, sourceConf, rows, allNames):
fileType = inferTypeFromFields(list(rows[0].keys()))
logger.debug(f"\tInferred type: {fileType.name}")
if fileType == FileType.ROSTER:
# It's not a proper csv; more like 2 on top of each other.
# It gets a special flag in config
sourceConf.update({
"isRoster": True,
"attributes": {
"Email": "Email",
"PID": "Student ID",
"Student": "Roster Name"
},
ASSIGNMENTS_KEY: []
})
elif fileType == FileType.GRADESCOPE:
updateGradescopeConfig(globalConfigObj['studentAttributes'], sourceConf, rows, allNames)
elif fileType in [FileType.SCORED_GOOGLE_FORM, FileType.UNSCORED_GOOGLE_FORM, FileType.CLICKERS, FileType.OTHER]:
usable = updateOtherConfig(globalConfigObj['studentAttributes'], sourceConf, rows, fileType, allNames)
if not usable:
return
else:
raise Exception(f"unknown filetype {fileType.name}")
globalConfigObj["sources"].append(sourceConf)
def main(sources, configInFilename, configOutFilename):
if configInFilename:
globalConfigObj = loadConfig(configInFilename)
else:
globalConfigObj = {}
if "studentAttributes" not in globalConfigObj:
globalConfigObj["studentAttributes"] = DEFAULT_ATTR_DICT
if "sources" not in globalConfigObj:
globalConfigObj["sources"] = []
if "outputs" not in globalConfigObj:
globalConfigObj["outputs"] = {
"report-name": "CSEnn Grade Report",
"disclaimer-text": "These are all the scores recorded for you in this course. If there are any discrepancies between the scores you see here and your own records, email...",
"content": []
}
# ignoredFiles = globalConfigObj.get("_autoconf_ignoredFiles", [])
preconfiguredFiles = list(map(getSource, globalConfigObj["sources"]))
allNames = set()
for inFile in sources:
inFilePath = Path(inFile)
if inFilePath.is_dir():
logger.debug(f"Recursively searching `{inFilePath}` for csv and xlsx files...")
fileIter = glob.iglob(str(inFilePath/'**'), recursive=True)
else:
fileIter = [inFile]
fileIter = [Path(filename) for filename in fileIter]
for filePath in fileIter:
if filePath.name[:2] == "~$":
logger.debug(f"Ignoring xlsx temporary file: `{filePath}`.")
continue
if filePath.is_dir():
continue
# if filename in ignoredFiles:
# print("Skipping because of _autoconf_ignoredFiles")
# continue
ext = filePath.suffix
if ext == ".csv":
sourceIter = [(filePath, None)]
elif ext == ".xlsx":
book = pe.get_book(file_name=str(filePath))
sourceIter = [(filePath, name) for name in book.sheet_names()]
else:
logger.debug(f"Ignoring non-csv/xlsx file: `{filePath}`.")
continue
for (filePath, sheetName) in sourceIter:
logger.debug(f"Handling source `{filePath}`{' (sheet '+sheetName+')' if sheetName else ''}")
if (str(filePath), sheetName) in preconfiguredFiles:
logger.debug("Skipping because file is already configured")
continue
rows = getRows(filePath, sheetName=sheetName)
sourceConf = {"file": str(filePath), "sheetName": sheetName}
updateConfig(globalConfigObj, sourceConf, rows, allNames)
globalConfigObj["sources"].sort(key=mySort)
categories = set()
for sourceData in globalConfigObj['sources']:
for item in sourceData[ASSIGNMENTS_KEY]:
categories.add(item['type'])
oldCategories = set(map(lambda z: z['from'], globalConfigObj['outputs']['content']))
globalConfigObj['outputs']['content'] += [{ "title": f"[Rename me - display name of {c}]", "from": c} for c in categories.difference(oldCategories)]
saveConfig(configOutFilename, globalConfigObj)
logger.info(f"Wrote config file to `{configOutFilename}`")
def getSource(sourceObj):
return (sourceObj["file"], sourceObj.get("sheetName", None))
def mySort(obj):
return (obj["file"].lower(), obj["sheetName"])
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('sourcesDir', metavar='SOURCE', type=str, nargs='*',
help='A csv or xlsx source (roster, gradesheet, etc) or a directory containing such sources')
parser.add_argument('-i', metavar='CONFIG_FILE', type=str, help='An initial config file to add to')
parser.add_argument('-o', metavar='OUTPUT_FILE', type=str, help='Output file (default: tempConfig.json)', default='tempConfig.json')
args = parser.parse_args()
main(args.sourcesDir, args.i, args.o)