Skip to content

Commit 1fe7878

Browse files
[Bugfix:Plagiarism] Fix all versions bug (#72)
* Fix multiple versions bug * add config.json * fix course * Add placeholder file * FIx tests(?)
1 parent a45457e commit 1fe7878

31 files changed

+1705
-40
lines changed

bin/process_all.sh

+5-3
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,11 @@ mkdir -p "${BASEPATH}/users"
8585

8686
############################################################################
8787
# Run Lichen
88-
./tokenize_all.py "$tmp_location" || { rm -rf "$tmp_location"; exit 1; }
89-
./hash_all.py "$tmp_location" || { rm -rf "$tmp_location"; exit 1; }
90-
./compare_hashes.out "$tmp_location" || { rm -rf "$tmp_location"; echo "${KILL_ERROR_MESSAGE}"; exit 1; }
88+
{ # We still want to unzip files if an error occurs when running Lichen here
89+
./tokenize_all.py "$tmp_location" &&
90+
./hash_all.py "$tmp_location" &&
91+
./compare_hashes.out "$tmp_location" || echo "${KILL_ERROR_MESSAGE}";
92+
}
9193

9294
############################################################################
9395
# Zip the results back up and send them back to the course's lichen directory

compare_hashes/compare_hashes.cpp

+27-23
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ int main(int argc, char* argv[]) {
157157
std::unordered_set<hash> provided_code;
158158
// stores all hashes from other gradeables
159159
std::unordered_map<hash, std::unordered_map<user_id, std::vector<HashLocation>>> other_gradeables;
160-
// stores the highest match for every student, used later for generating overall_rankings.txt
161-
std::unordered_map<user_id, std::pair<int, Score>> highest_matches;
160+
// stores the matches for every student, used later for generating overall_rankings.txt
161+
std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>> highest_matches;
162162
// keeps track of max matching hashes across all submissions, used for calculation of ranking score
163163
unsigned int max_hashes_matched = 0;
164164

@@ -283,7 +283,7 @@ int main(int argc, char* argv[]) {
283283
}
284284
}
285285

286-
// if the hash doesn't match any of the provided code's hashes, try to find matched between other students
286+
// if the hash doesn't match any of the provided code's hashes, try to find matches between other students
287287
if (!provided_match_found) {
288288
// look up that hash in the all_hashes table, loop over all other students that have the same hash
289289
std::unordered_map<std::string, std::vector<HashLocation>> occurences = all_hashes[hash_itr->first];
@@ -333,23 +333,6 @@ int main(int argc, char* argv[]) {
333333
continue;
334334
}
335335

336-
// Save this submissions highest percent match for later when we generate overall_rankings.txt
337-
float percentMatch = (*submission_itr)->getPercentage();
338-
unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount();
339-
Score submission_score(totalMatchingHashes, percentMatch);
340-
if (max_hashes_matched < totalMatchingHashes) {
341-
max_hashes_matched = totalMatchingHashes;
342-
}
343-
344-
std::unordered_map<user_id, std::pair<int, Score> >::iterator highest_matches_itr = highest_matches.find((*submission_itr)->student());
345-
std::pair<int, Score> new_pair = {(*submission_itr)->version(), submission_score};
346-
if (highest_matches_itr == highest_matches.end()) {
347-
highest_matches.insert({(*submission_itr)->student(), new_pair});
348-
}
349-
else if (submission_score > highest_matches_itr->second.second) {
350-
highest_matches_itr->second = new_pair;
351-
}
352-
353336
// =========================================================================
354337
// Write matches.json file
355338

@@ -563,6 +546,19 @@ int main(int argc, char* argv[]) {
563546
}
564547
}
565548

549+
// =========================================================================
550+
// Save this submission's highest percent match for later when we generate overall_rankings.txt
551+
float percentMatch = (*submission_itr)->getPercentage();
552+
unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount();
553+
Score submission_score(totalMatchingHashes, percentMatch);
554+
if (max_hashes_matched < totalMatchingHashes) {
555+
max_hashes_matched = totalMatchingHashes;
556+
}
557+
558+
std::pair<version_number, Score> new_pair = {(*submission_itr)->version(), submission_score};
559+
highest_matches[(*submission_itr)->student()].push_back(new_pair);
560+
// =========================================================================
561+
566562
std::sort(student_ranking.begin(), student_ranking.end(), ranking_sorter);
567563

568564
// create the directory and a file to write into
@@ -609,10 +605,18 @@ int main(int argc, char* argv[]) {
609605
// take the map of highest matches and convert it to a vector so we can sort it
610606
// by percent match and then save it to a file
611607
std::vector<StudentRanking> ranking;
612-
for (std::unordered_map<user_id, std::pair<int, Score> >::iterator itr
608+
for (std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>>::iterator itr
613609
= highest_matches.begin(); itr != highest_matches.end(); ++itr) {
614-
ranking.push_back(StudentRanking(itr->first, itr->second.first, "", itr->second.second));
615-
ranking[ranking.size()-1].score.calculateScore(max_hashes_matched);
610+
611+
std::pair<version_number, Score> best_score = itr->second.front();
612+
best_score.second.calculateScore(max_hashes_matched);
613+
for (unsigned int i=0; i < itr->second.size(); i++) {
614+
itr->second[i].second.calculateScore(max_hashes_matched);
615+
if (itr->second[i].second > best_score.second) {
616+
best_score = itr->second[i];
617+
}
618+
}
619+
ranking.push_back(StudentRanking(itr->first, best_score.first, "", best_score.second));
616620
}
617621

618622
std::sort(ranking.begin(), ranking.end(), ranking_sorter);

compare_hashes/score.h

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define SCORE_H
33

44
#include <cassert>
5+
#include <string>
56

67
typedef int location_in_submission;
78
typedef unsigned int hash;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"semester": "f21",
3+
"course": "plagiarism",
4+
"gradeable": "multiple_versions",
5+
"config_id": 1,
6+
"version": "all_versions",
7+
"regex": [
8+
""
9+
],
10+
"regex_dirs": [
11+
"submissions"
12+
],
13+
"language": "plaintext",
14+
"threshold": 10,
15+
"hash_size": 4,
16+
"other_gradeables": [],
17+
"ignore_submissions": []
18+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Beginning Lichen run: 2021-12-21 17:20:31
2+
CONCATENATE ALL...done in 0 seconds, 949 Bytes concatenated
3+
TOKENIZE ALL...done in 0 seconds
4+
HASH ALL...done in 0 seconds
5+
COMPARE HASHES...finished loading in 0 seconds
6+
hash walk: 33% complete
7+
hash walk: 66% complete
8+
hash walk: 100% complete
9+
finished walking in 0 seconds
10+
COMPARE HASHES done in 0 seconds

tests/data/test_lichen/multiple_versions/expected_output/other_gradeables/git_placeholder.txt

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
aphacker 2 81.4% 35
2+
bitdiddle 1 81.4% 35

tests/data/test_lichen/multiple_versions/expected_output/provided_code/files/git_placeholder.txt

Whitespace-only changes.

tests/data/test_lichen/multiple_versions/expected_output/provided_code/hashes.txt

Whitespace-only changes.

tests/data/test_lichen/multiple_versions/expected_output/provided_code/submission.concatenated

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
ccbfc51b
2+
46d2e902
3+
0a1bc040
4+
6d14f9b3
5+
a5d513dd
6+
5e030a24
7+
c715d526
8+
fd3fa0fe
9+
b1917b6c
10+
ccbfc51b
11+
fcf8964c
12+
6afa4117
13+
25a42a47
14+
2ac066f5
15+
c6097572
16+
6011cbf5
17+
adefe73d
18+
36182b9f
19+
36d719a0
20+
fe129c06
21+
e44ef48d
22+
6bb90c04
23+
083a9efd
24+
93d49734
25+
0f905a05
26+
8bfb058d
27+
06410254
28+
61b171ee
29+
6c920afa
30+
05660ab4
31+
30a548ac
32+
b38f50f3
33+
2997d7c5
34+
297c601f
35+
e8ccd482
36+
ae6d442f
37+
4de258e3
38+
fae8aa98
39+
24ac3d5d
40+
fbdad65f
41+
fc98ba6b
42+
44bbaa49
43+
83df01b7
44+
964fade5
45+
2ea0ba40
46+
5494f32a
47+
e248b1d9
48+
528feb65
49+
27d1db1f
50+
c552988d
51+
cf65191e
52+
eff2064e
53+
0847585b
54+
c64da9e5
55+
7b3dc1c1
56+
045fe7d1
57+
50ac87da
58+
f5f088e7
59+
ecb2eef0
60+
7d75f52c
61+
8576ec09
62+
497a431b
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
[
2+
{
3+
"end": 4,
4+
"others": [
5+
{
6+
"matchingpositions": [
7+
{
8+
"end": 4,
9+
"start": 1
10+
},
11+
{
12+
"end": 13,
13+
"start": 10
14+
}
15+
],
16+
"source_gradeable": "f21__plagiarism__multiple_versions",
17+
"username": "bitdiddle",
18+
"version": 1
19+
}
20+
],
21+
"start": 1,
22+
"type": "match"
23+
},
24+
{
25+
"end": 6,
26+
"others": [
27+
{
28+
"matchingpositions": [
29+
{
30+
"end": 6,
31+
"start": 2
32+
}
33+
],
34+
"source_gradeable": "f21__plagiarism__multiple_versions",
35+
"username": "bitdiddle",
36+
"version": 1
37+
}
38+
],
39+
"start": 2,
40+
"type": "match"
41+
},
42+
{
43+
"end": 12,
44+
"others": [
45+
{
46+
"matchingpositions": [
47+
{
48+
"end": 12,
49+
"start": 8
50+
}
51+
],
52+
"source_gradeable": "f21__plagiarism__multiple_versions",
53+
"username": "bitdiddle",
54+
"version": 1
55+
}
56+
],
57+
"start": 8,
58+
"type": "match"
59+
},
60+
{
61+
"end": 13,
62+
"others": [
63+
{
64+
"matchingpositions": [
65+
{
66+
"end": 4,
67+
"start": 1
68+
},
69+
{
70+
"end": 13,
71+
"start": 10
72+
}
73+
],
74+
"source_gradeable": "f21__plagiarism__multiple_versions",
75+
"username": "bitdiddle",
76+
"version": 1
77+
}
78+
],
79+
"start": 10,
80+
"type": "match"
81+
},
82+
{
83+
"end": 14,
84+
"others": [
85+
{
86+
"matchingpositions": [
87+
{
88+
"end": 14,
89+
"start": 11
90+
}
91+
],
92+
"source_gradeable": "f21__plagiarism__multiple_versions",
93+
"username": "bitdiddle",
94+
"version": 1
95+
}
96+
],
97+
"start": 11,
98+
"type": "match"
99+
},
100+
{
101+
"end": 20,
102+
"others": [
103+
{
104+
"matchingpositions": [
105+
{
106+
"end": 32,
107+
"start": 26
108+
}
109+
],
110+
"source_gradeable": "f21__plagiarism__multiple_versions",
111+
"username": "bitdiddle",
112+
"version": 1
113+
}
114+
],
115+
"start": 14,
116+
"type": "match"
117+
},
118+
{
119+
"end": 34,
120+
"others": [
121+
{
122+
"matchingpositions": [
123+
{
124+
"end": 46,
125+
"start": 34
126+
}
127+
],
128+
"source_gradeable": "f21__plagiarism__multiple_versions",
129+
"username": "bitdiddle",
130+
"version": 1
131+
}
132+
],
133+
"start": 22,
134+
"type": "match"
135+
}
136+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bitdiddle 1 f21__plagiarism__multiple_versions 32.79%
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
==== submission_1.txt ====
2+
This file is meant to represent the first submission of three in a small test involving users with multiple submissions. This submission is a little longer than the other two submissions because we want to test that the system still works when the highest matching version is the second version.

0 commit comments

Comments
 (0)