Skip to content

Commit dd4e4bc

Browse files
committed
Fix rare edge case bug
There was a rare edge case that an email address found in the enrollment data, that didn't match RPI's 5+1 user ID pattern, could potentially cause the wrong entry(ies) to be filtered out when filtering duplicate student enrollments. Filtering code has been improved and the bug fixed.
1 parent 2fda991 commit dd4e4bc

File tree

2 files changed

+22
-64
lines changed

2 files changed

+22
-64
lines changed

student_auto_feed/ssaf_validate.php

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -74,48 +74,6 @@ public static function validate_row($row, $row_num) : bool {
7474
return true;
7575
}
7676

77-
/**
78-
* Check $rows for duplicate user IDs.
79-
*
80-
* Submitty's master DB does not permit students to register more than once
81-
* for any course. It would trigger a key violation exception. This
82-
* function checks for data anomalies where a student shows up in a course
83-
* more than once as that is indicative of an issue with CSV file data.
84-
* Returns TRUE, as in no error, when $rows has all unique user IDs.
85-
* False, as in error found, otherwise. $user_ids is filled when return
86-
* is FALSE.
87-
*
88-
* @param array $rows Data rows to check (presumably an entire couse).
89-
* @param string[] &$user_id Duplicated user ID, when found.
90-
* @param string[] &$d_rows Rows containing duplicate user IDs, indexed by user ID.
91-
* @return bool TRUE when all user IDs are unique, FALSE otherwise.
92-
*/
93-
public static function check_for_duplicate_user_ids(array $rows, &$user_ids, &$d_rows) : bool {
94-
usort($rows, function($a, $b) { return $a[COLUMN_USER_ID] <=> $b[COLUMN_USER_ID]; });
95-
96-
$user_ids = [];
97-
$d_rows = [];
98-
$are_all_unique = true; // Unless proven FALSE
99-
$length = count($rows);
100-
for ($i = 1; $i < $length; $i++) {
101-
$j = $i - 1;
102-
if ($rows[$i][COLUMN_USER_ID] === $rows[$j][COLUMN_USER_ID]) {
103-
$are_all_unique = false;
104-
$user_id = $rows[$i][COLUMN_USER_ID];
105-
$user_ids[] = $user_id;
106-
$d_rows[$user_id][] = $j;
107-
$d_rows[$user_id][] = $i;
108-
}
109-
}
110-
111-
foreach($d_rows as &$d_row) {
112-
array_unique($d_row, SORT_REGULAR);
113-
}
114-
unset($d_row);
115-
116-
return $are_all_unique;
117-
}
118-
11977
/**
12078
* Validate that there isn't an excessive drop ratio in course enrollments.
12179
*

student_auto_feed/submitty_student_auto_feed.php

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ public function go() {
141141
case $this->check_for_excessive_dropped_users():
142142
// This check will block all upserts when an error is detected.
143143
exit(1);
144-
case $this->check_for_duplicate_user_ids():
145-
$this->log_it("Duplicate user IDs detected in CSV file.");
144+
case $this->filter_duplicate_registrations():
145+
// Never returns false. Error messages are already in log queue.
146146
break;
147147
case $this->invalidate_courses():
148148
// Should do nothing when $this->invalid_courses is empty
@@ -299,31 +299,31 @@ private function get_csv_data() {
299299
}
300300

301301
/**
302-
* Users cannot be registered to the same course multiple times.
302+
* Students cannot be registered to the same course multiple times.
303303
*
304-
* Any course with a user registered more than once is flagged invalid as
305-
* it is indicative of data errors from the CSV file.
306-
*
307-
* @return bool always TRUE
304+
* If multiple registrations for the same student and course are found, the first instance is allowed to be
305+
* upserted to the database. All other instances are removed from the data set and therefore not upserted.
308306
*/
309-
private function check_for_duplicate_user_ids() {
310-
foreach($this->data as $course => $rows) {
311-
$user_ids = null;
312-
$d_rows = null;
313-
// Returns FALSE (as in there is an error) when duplicate IDs are found.
314-
// However, a duplicate ID does not invalidate a course. Instead, the
315-
// first enrollment is accepted, the other enrollments are discarded,
316-
// and the event is logged.
317-
if (validate::check_for_duplicate_user_ids($rows, $user_ids, $d_rows) === false) {
318-
foreach($d_rows as $user_id => $userid_rows) {
319-
$length = count($userid_rows);
320-
for ($i = 1; $i < $length; $i++) {
321-
unset($this->data[$course][$userid_rows[$i]]);
322-
}
307+
private function filter_duplicate_registrations(): true {
308+
foreach($this->data as $course => &$rows) {
309+
usort($rows, function($a, $b) { return $a[COLUMN_USER_ID] <=> $b[COLUMN_USER_ID]; });
310+
$duplicated_ids = [];
311+
$num_rows = count($rows);
312+
313+
// We are iterating from bottom to top through a course's data set. Should we find a duplicate registration
314+
// and unset it from the array, (1) we are unsetting duplicates starting from the bottom, (2) which preserves
315+
// the first entry among duplicate entries, and (3) we do not make a comparison with a null key.
316+
for ($j = $num_rows - 1, $i = $j - 1; $i >= 0; $i--, $j--) {
317+
if ($rows[$i][COLUMN_USER_ID] === $rows[$j][COLUMN_USER_ID]) {
318+
$duplicated_ids[] = $rows[$j][COLUMN_USER_ID];
319+
unset($rows[$j]);
323320
}
321+
}
324322

323+
if (count($duplicated_ids) > 0) {
324+
array_unique($duplicated_ids, SORT_STRING);
325325
$msg = "Duplicate user IDs detected in {$course} data: ";
326-
$msg .= implode(", ", $user_ids);
326+
$msg .= implode(", ", $duplicated_ids);
327327
$this->log_it($msg);
328328
}
329329
}

0 commit comments

Comments
 (0)