17
17
#include " lichen_config.h"
18
18
#include " submission.h"
19
19
#include " hash_location.h"
20
- #include " score.h"
21
20
22
21
23
22
// =============================================================================
@@ -29,20 +28,6 @@ typedef std::string user_id;
29
28
typedef unsigned int version_number;
30
29
31
30
32
- // =============================================================================
33
- // helper classes
34
-
35
-
36
- // represents an element in a ranking of students by percent match
37
- struct StudentRanking {
38
- StudentRanking (const user_id &id, int v, const std::string &sg, const Score &s) : student(id), version(v), source_gradeable(sg), score(s) {}
39
- user_id student;
40
- version_number version;
41
- std::string source_gradeable;
42
- Score score;
43
- };
44
-
45
-
46
31
// =============================================================================
47
32
// helper functions
48
33
@@ -89,12 +74,6 @@ void incrementEndPositionsForMatches(nlohmann::json &others) {
89
74
}
90
75
91
76
92
- bool ranking_sorter (const StudentRanking &a, const StudentRanking &b) {
93
- return a.score > b.score ||
94
- (a.score == b.score && a.student < b.student );
95
- }
96
-
97
-
98
77
// =============================================================================
99
78
// MAIN
100
79
@@ -157,10 +136,6 @@ int main(int argc, char* argv[]) {
157
136
std::unordered_set<hash> provided_code;
158
137
// stores all hashes from other gradeables
159
138
std::unordered_map<hash, std::unordered_map<user_id, std::vector<HashLocation>>> other_gradeables;
160
- // stores the matches for every student, used later for generating overall_rankings.txt
161
- std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>> highest_matches;
162
- // keeps track of max matching hashes across all submissions, used for calculation of ranking score
163
- unsigned int max_hashes_matched = 0 ;
164
139
// a map of "user_id:version" strings to the non-zero number of times their matching positions array was truncated
165
140
std::unordered_map<std::string, int > matching_positions_truncations;
166
141
@@ -323,7 +298,7 @@ int main(int argc, char* argv[]) {
323
298
324
299
// Note: we DO look for matches across submissions of the same student for self-plagiarism
325
300
326
- // save the locations of all other occurences from proir term submissions
301
+ // save the locations of all other occurences from prior term submissions
327
302
std::vector<HashLocation>::iterator itr = other_occurences_itr->second .begin ();
328
303
for (; itr != other_occurences_itr->second .end (); ++itr) {
329
304
(*submission_itr)->addSuspiciousMatch (hash_itr->second , *itr, hash_itr->first );
@@ -515,80 +490,14 @@ int main(int argc, char* argv[]) {
515
490
assert (ostr.good ());
516
491
ostr << match_data.dump (4 ) << std::endl;
517
492
518
- // =========================================================================
519
- // create individual ranking file
520
- // the file contains all the other students share matches, sorted by decreasing order of the percent match
521
-
522
- // find and sort the other submissions it matches with
523
- std::vector<StudentRanking> student_ranking;
524
- std::unordered_map<std::string, std::unordered_map<user_id, std::unordered_map<version_number, std::unordered_set<hash>>>> matches = (*submission_itr)->getStudentsMatched ();
525
-
526
- std::unordered_map<std::string, std::unordered_map<user_id, std::unordered_map<version_number, std::unordered_set<hash>>>>::const_iterator gradeables_itr = matches.begin ();
527
- for (; gradeables_itr != matches.end (); ++gradeables_itr) {
528
- for (std::unordered_map<user_id, std::unordered_map<version_number, std::unordered_set<hash>>>::const_iterator matches_itr = gradeables_itr->second .begin ();
529
- matches_itr != gradeables_itr->second .end (); ++matches_itr) {
530
-
531
- for (std::unordered_map<version_number, std::unordered_set<hash>>::const_iterator version_itr = matches_itr->second .begin ();
532
- version_itr != matches_itr->second .end (); ++version_itr) {
533
-
534
- // Calculate the Percent Match:
535
- // count the number of unique hashes for the percent match calculation
536
- std::vector<std::pair<hash, location_in_submission>> submission_hashes = (*submission_itr)->getHashes ();
537
- std::unordered_set<hash> unique_hashes;
538
- for (std::vector<std::pair<hash, location_in_submission>>::const_iterator itr = submission_hashes.begin ();
539
- itr != submission_hashes.end (); ++itr) {
540
- unique_hashes.insert (itr->first );
541
- }
542
-
543
- // the percent match is currently calculated using the number of hashes that match between this
544
- // submission and the other submission, over the total number of hashes this submission has.
545
- // In other words, the percentage is how much of this submission's code was plgairised from the other.
546
- unsigned int num_hashes_matched = version_itr->second .size ();
547
- float percent = (100.0 * num_hashes_matched) / unique_hashes.size ();
548
- student_ranking.push_back (StudentRanking (matches_itr->first , version_itr->first , gradeables_itr->first , Score (num_hashes_matched, percent)));
549
- student_ranking.back ().score .calculateScore (num_hashes_matched);
550
- }
551
- }
552
- }
553
-
554
- // =========================================================================
555
- // Save this submission's highest percent match for later when we generate overall_rankings.txt
556
- float percentMatch = (*submission_itr)->getPercentage ();
557
- unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount ();
558
- Score submission_score (totalMatchingHashes, percentMatch);
559
- if (max_hashes_matched < totalMatchingHashes) {
560
- max_hashes_matched = totalMatchingHashes;
561
- }
562
-
563
- std::pair<version_number, Score> new_pair = {(*submission_itr)->version (), submission_score};
564
- highest_matches[(*submission_itr)->student ()].push_back (new_pair);
565
- // =========================================================================
566
-
567
- std::sort (student_ranking.begin (), student_ranking.end (), ranking_sorter);
568
-
569
- // create the directory and a file to write into
570
- boost::filesystem::path ranking_student_dir = users_root_directory / (*submission_itr)->student () / std::to_string ((*submission_itr)->version ());
571
- boost::filesystem::path ranking_student_file = ranking_student_dir / " ranking.txt" ;
572
- boost::filesystem::create_directories (ranking_student_dir);
573
- std::ofstream ranking_student_ostr (ranking_student_file.string ());
574
-
575
- // finally, write the file of ranking for this submission
576
- for (unsigned int i = 0 ; i < student_ranking.size (); i++) {
577
- ranking_student_ostr
578
- << std::setw (15 ) << std::left << student_ranking[i].student << " "
579
- << std::setw (3 ) << std::left << student_ranking[i].version << " "
580
- << std::setw (1 ) << std::right << student_ranking[i].source_gradeable << " "
581
- << std::setw (6 ) << std::setprecision (2 ) << std::fixed << student_ranking[i].score .getPercent () << " %" << std::endl;
582
- }
583
-
584
493
// =========================================================================
585
494
// Cleanup
586
495
587
- // Done with this submissions . discard the data and clear the memory
496
+ // Done with this submission . discard the data and clear the memory
588
497
delete (*submission_itr);
589
498
(*submission_itr) = nullptr ;
590
499
591
- // print current progress
500
+ // Print current progress
592
501
my_counter++;
593
502
if (int ((my_counter / float (all_submissions.size ())) * 100 ) > my_percent) {
594
503
int new_my_percent = int ((my_counter / float (all_submissions.size ())) * 100 );
@@ -607,7 +516,7 @@ int main(int argc, char* argv[]) {
607
516
608
517
time (&end);
609
518
diff = difftime (end, start);
610
- std::cout << " ]" << std::endl << " Finished processing submissions in " << diff << " seconds " << std::endl ;
519
+ std::cout << " ]" << std::endl;
611
520
612
521
// Print out the list of users who had their matching positions array truncated
613
522
if (matching_positions_truncations.size () > 0 ) {
@@ -618,40 +527,6 @@ int main(int argc, char* argv[]) {
618
527
}
619
528
std::cout << std::endl << " - Try increasing the hash size or adding a regex to fix this problem." << std::endl;
620
529
}
621
- fflush (stdout);
622
-
623
- // ===========================================================================
624
- // Create a general summary of rankings of users by percentage match
625
-
626
- // create a single file of students ranked by highest percentage of code plagiarised
627
- boost::filesystem::path ranking_file = lichen_gradeable_path / " overall_ranking.txt" ;
628
- std::ofstream ranking_ostr (ranking_file.string ());
629
-
630
- // take the map of highest matches and convert it to a vector so we can sort it
631
- // by percent match and then save it to a file
632
- std::vector<StudentRanking> ranking;
633
- for (std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>>::iterator itr
634
- = highest_matches.begin (); itr != highest_matches.end (); ++itr) {
635
-
636
- std::pair<version_number, Score> best_score = itr->second .front ();
637
- best_score.second .calculateScore (max_hashes_matched);
638
- for (unsigned int i=0 ; i < itr->second .size (); i++) {
639
- itr->second [i].second .calculateScore (max_hashes_matched);
640
- if (itr->second [i].second > best_score.second ) {
641
- best_score = itr->second [i];
642
- }
643
- }
644
- ranking.push_back (StudentRanking (itr->first , best_score.first , " " , best_score.second ));
645
- }
646
-
647
- std::sort (ranking.begin (), ranking.end (), ranking_sorter);
648
- for (unsigned int i = 0 ; i < ranking.size (); i++) {
649
- ranking_ostr
650
- << std::left << std::setw (20 ) << ranking[i].student << " "
651
- << std::setw (3 ) << ranking[i].version << " "
652
- << std::right << std::setw (4 ) << std::setprecision (1 ) << std::fixed << ranking[i].score .getPercent () << " % "
653
- << std::setw (5 ) << ranking[i].score .getHashesMatched () << std::endl;
654
- }
655
530
656
531
// ===========================================================================
657
532
// Done!
0 commit comments