Skip to content

Commit 1fe7878

Browse files
[Bugfix:Plagiarism] Fix all versions bug (#72)
* Fix multiple versions bug * add config.json * fix course * Add placeholder file * FIx tests(?)
1 parent a45457e commit 1fe7878

File tree

31 files changed

+1705
-40
lines changed

31 files changed

+1705
-40
lines changed

bin/process_all.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,11 @@ mkdir -p "${BASEPATH}/users"
8585

8686
############################################################################
8787
# Run Lichen
88-
./tokenize_all.py "$tmp_location" || { rm -rf "$tmp_location"; exit 1; }
89-
./hash_all.py "$tmp_location" || { rm -rf "$tmp_location"; exit 1; }
90-
./compare_hashes.out "$tmp_location" || { rm -rf "$tmp_location"; echo "${KILL_ERROR_MESSAGE}"; exit 1; }
88+
{ # We still want to unzip files if an error occurs when running Lichen here
89+
./tokenize_all.py "$tmp_location" &&
90+
./hash_all.py "$tmp_location" &&
91+
./compare_hashes.out "$tmp_location" || echo "${KILL_ERROR_MESSAGE}";
92+
}
9193

9294
############################################################################
9395
# Zip the results back up and send them back to the course's lichen directory

compare_hashes/compare_hashes.cpp

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ int main(int argc, char* argv[]) {
157157
std::unordered_set<hash> provided_code;
158158
// stores all hashes from other gradeables
159159
std::unordered_map<hash, std::unordered_map<user_id, std::vector<HashLocation>>> other_gradeables;
160-
// stores the highest match for every student, used later for generating overall_rankings.txt
161-
std::unordered_map<user_id, std::pair<int, Score>> highest_matches;
160+
// stores the matches for every student, used later for generating overall_rankings.txt
161+
std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>> highest_matches;
162162
// keeps track of max matching hashes across all submissions, used for calculation of ranking score
163163
unsigned int max_hashes_matched = 0;
164164

@@ -283,7 +283,7 @@ int main(int argc, char* argv[]) {
283283
}
284284
}
285285

286-
// if the hash doesn't match any of the provided code's hashes, try to find matched between other students
286+
// if the hash doesn't match any of the provided code's hashes, try to find matches between other students
287287
if (!provided_match_found) {
288288
// look up that hash in the all_hashes table, loop over all other students that have the same hash
289289
std::unordered_map<std::string, std::vector<HashLocation>> occurences = all_hashes[hash_itr->first];
@@ -333,23 +333,6 @@ int main(int argc, char* argv[]) {
333333
continue;
334334
}
335335

336-
// Save this submissions highest percent match for later when we generate overall_rankings.txt
337-
float percentMatch = (*submission_itr)->getPercentage();
338-
unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount();
339-
Score submission_score(totalMatchingHashes, percentMatch);
340-
if (max_hashes_matched < totalMatchingHashes) {
341-
max_hashes_matched = totalMatchingHashes;
342-
}
343-
344-
std::unordered_map<user_id, std::pair<int, Score> >::iterator highest_matches_itr = highest_matches.find((*submission_itr)->student());
345-
std::pair<int, Score> new_pair = {(*submission_itr)->version(), submission_score};
346-
if (highest_matches_itr == highest_matches.end()) {
347-
highest_matches.insert({(*submission_itr)->student(), new_pair});
348-
}
349-
else if (submission_score > highest_matches_itr->second.second) {
350-
highest_matches_itr->second = new_pair;
351-
}
352-
353336
// =========================================================================
354337
// Write matches.json file
355338

@@ -563,6 +546,19 @@ int main(int argc, char* argv[]) {
563546
}
564547
}
565548

549+
// =========================================================================
550+
// Save this submission's highest percent match for later when we generate overall_rankings.txt
551+
float percentMatch = (*submission_itr)->getPercentage();
552+
unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount();
553+
Score submission_score(totalMatchingHashes, percentMatch);
554+
if (max_hashes_matched < totalMatchingHashes) {
555+
max_hashes_matched = totalMatchingHashes;
556+
}
557+
558+
std::pair<version_number, Score> new_pair = {(*submission_itr)->version(), submission_score};
559+
highest_matches[(*submission_itr)->student()].push_back(new_pair);
560+
// =========================================================================
561+
566562
std::sort(student_ranking.begin(), student_ranking.end(), ranking_sorter);
567563

568564
// create the directory and a file to write into
@@ -609,10 +605,18 @@ int main(int argc, char* argv[]) {
609605
// take the map of highest matches and convert it to a vector so we can sort it
610606
// by percent match and then save it to a file
611607
std::vector<StudentRanking> ranking;
612-
for (std::unordered_map<user_id, std::pair<int, Score> >::iterator itr
608+
for (std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>>::iterator itr
613609
= highest_matches.begin(); itr != highest_matches.end(); ++itr) {
614-
ranking.push_back(StudentRanking(itr->first, itr->second.first, "", itr->second.second));
615-
ranking[ranking.size()-1].score.calculateScore(max_hashes_matched);
610+
611+
std::pair<version_number, Score> best_score = itr->second.front();
612+
best_score.second.calculateScore(max_hashes_matched);
613+
for (unsigned int i=0; i < itr->second.size(); i++) {
614+
itr->second[i].second.calculateScore(max_hashes_matched);
615+
if (itr->second[i].second > best_score.second) {
616+
best_score = itr->second[i];
617+
}
618+
}
619+
ranking.push_back(StudentRanking(itr->first, best_score.first, "", best_score.second));
616620
}
617621

618622
std::sort(ranking.begin(), ranking.end(), ranking_sorter);

compare_hashes/score.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define SCORE_H
33

44
#include <cassert>
5+
#include <string>
56

67
typedef int location_in_submission;
78
typedef unsigned int hash;
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"semester": "f21",
3+
"course": "plagiarism",
4+
"gradeable": "multiple_versions",
5+
"config_id": 1,
6+
"version": "all_versions",
7+
"regex": [
8+
""
9+
],
10+
"regex_dirs": [
11+
"submissions"
12+
],
13+
"language": "plaintext",
14+
"threshold": 10,
15+
"hash_size": 4,
16+
"other_gradeables": [],
17+
"ignore_submissions": []
18+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Beginning Lichen run: 2021-12-21 17:20:31
2+
CONCATENATE ALL...done in 0 seconds, 949 Bytes concatenated
3+
TOKENIZE ALL...done in 0 seconds
4+
HASH ALL...done in 0 seconds
5+
COMPARE HASHES...finished loading in 0 seconds
6+
hash walk: 33% complete
7+
hash walk: 66% complete
8+
hash walk: 100% complete
9+
finished walking in 0 seconds
10+
COMPARE HASHES done in 0 seconds

tests/data/test_lichen/multiple_versions/expected_output/other_gradeables/git_placeholder.txt

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
aphacker 2 81.4% 35
2+
bitdiddle 1 81.4% 35

tests/data/test_lichen/multiple_versions/expected_output/provided_code/files/git_placeholder.txt

Whitespace-only changes.

tests/data/test_lichen/multiple_versions/expected_output/provided_code/hashes.txt

Whitespace-only changes.

tests/data/test_lichen/multiple_versions/expected_output/provided_code/submission.concatenated

Whitespace-only changes.

0 commit comments

Comments
 (0)