@@ -141,8 +141,8 @@ public function go() {
141
141
case $ this ->check_for_excessive_dropped_users ():
142
142
// This check will block all upserts when an error is detected.
143
143
exit (1 );
144
- case $ this ->check_for_duplicate_user_ids ():
145
- $ this -> log_it ( " Duplicate user IDs detected in CSV file. " );
144
+ case $ this ->filter_duplicate_registrations ():
145
+ // Never returns false. Error messages are already in log queue.
146
146
break ;
147
147
case $ this ->invalidate_courses ():
148
148
// Should do nothing when $this->invalid_courses is empty
@@ -299,31 +299,31 @@ private function get_csv_data() {
299
299
}
300
300
301
301
/**
302
- * Users cannot be registered to the same course multiple times.
302
+ * Students cannot be registered to the same course multiple times.
303
303
*
304
- * Any course with a user registered more than once is flagged invalid as
305
- * it is indicative of data errors from the CSV file.
306
- *
307
- * @return bool always TRUE
304
+ * If multiple registrations for the same student and course are found, the first instance is allowed to be
305
+ * upserted to the database. All other instances are removed from the data set and therefore not upserted.
308
306
*/
309
- private function check_for_duplicate_user_ids () {
310
- foreach ($ this ->data as $ course => $ rows ) {
311
- $ user_ids = null ;
312
- $ d_rows = null ;
313
- // Returns FALSE (as in there is an error) when duplicate IDs are found.
314
- // However, a duplicate ID does not invalidate a course. Instead, the
315
- // first enrollment is accepted, the other enrollments are discarded,
316
- // and the event is logged.
317
- if (validate::check_for_duplicate_user_ids ($ rows , $ user_ids , $ d_rows ) === false ) {
318
- foreach ($ d_rows as $ user_id => $ userid_rows ) {
319
- $ length = count ($ userid_rows );
320
- for ($ i = 1 ; $ i < $ length ; $ i ++) {
321
- unset($ this ->data [$ course ][$ userid_rows [$ i ]]);
322
- }
307
+ private function filter_duplicate_registrations (): true {
308
+ foreach ($ this ->data as $ course => &$ rows ) {
309
+ usort ($ rows , function ($ a , $ b ) { return $ a [COLUMN_USER_ID ] <=> $ b [COLUMN_USER_ID ]; });
310
+ $ duplicated_ids = [];
311
+ $ num_rows = count ($ rows );
312
+
313
+ // We are iterating from bottom to top through a course's data set. Should we find a duplicate registration
314
+ // and unset it from the array, (1) we are unsetting duplicates starting from the bottom, (2) which preserves
315
+ // the first entry among duplicate entries, and (3) we do not make a comparison with a null key.
316
+ for ($ j = $ num_rows - 1 , $ i = $ j - 1 ; $ i >= 0 ; $ i --, $ j --) {
317
+ if ($ rows [$ i ][COLUMN_USER_ID ] === $ rows [$ j ][COLUMN_USER_ID ]) {
318
+ $ duplicated_ids [] = $ rows [$ j ][COLUMN_USER_ID ];
319
+ unset($ rows [$ j ]);
323
320
}
321
+ }
324
322
323
+ if (count ($ duplicated_ids ) > 0 ) {
324
+ array_unique ($ duplicated_ids , SORT_STRING );
325
325
$ msg = "Duplicate user IDs detected in {$ course } data: " ;
326
- $ msg .= implode (", " , $ user_ids );
326
+ $ msg .= implode (", " , $ duplicated_ids );
327
327
$ this ->log_it ($ msg );
328
328
}
329
329
}
0 commit comments