Skip to content

Commit 8238f69

Browse files
authored
Common code ranges & Details about matching code blocks (#6)
* edits * matching ranges * positions & versions are now ints (not strings)
1 parent ed02455 commit 8238f69

File tree

2 files changed

+129
-35
lines changed

2 files changed

+129
-35
lines changed

bin/process_all.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
semester=$1
44
course=$2
55
gradeable=$3
6+
window=$4
67

78
/usr/local/submitty/Lichen/bin/concatenate_all.py $semester $course $gradeable
89
/usr/local/submitty/Lichen/bin/tokenize_all.py $semester $course $gradeable --plaintext
9-
/usr/local/submitty/Lichen/bin/hash_all.py $semester $course $gradeable --window 5 --plaintext
10+
/usr/local/submitty/Lichen/bin/hash_all.py $semester $course $gradeable --window $window --plaintext
1011

11-
/usr/local/submitty/Lichen/bin/compare_hashes.out $semester $course $gradeable
12+
/usr/local/submitty/Lichen/bin/compare_hashes.out $semester $course $gradeable --window $window
1213

compare_hashes/compare_hashes.cpp

+126-33
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
// for a user.
2222
class Submission {
2323
public:
24-
Submission(std::string u, std::string v) : username(u),version(v) {}
24+
Submission(std::string u, int v) : username(u),version(v) {}
2525
std::string username;
26-
std::string version;
26+
int version;
2727
};
2828

2929
// to allow sorting
@@ -37,7 +37,7 @@ bool operator<(const Submission &a, const Submission &b) {
3737
// the token) within in a specific concatenated file (the Submission).
3838
class Sequence {
3939
public:
40-
Sequence(std::string u, std::string v, int p) : submission(u,v),position(p) {}
40+
Sequence(std::string username, int version, int p) : submission(username,version),position(p) {}
4141
Submission submission;
4242
int position;
4343
};
@@ -47,7 +47,7 @@ class Sequence {
4747
// helper typedefs
4848

4949

50-
// common sequence hash -> ( each user -> all match locations by that user across all versions )
50+
// matching sequence hash -> ( each user -> all match locations by that user across all versions )
5151
typedef std::map<std::string,std::map<std::string,std::vector<Sequence> > > hashed_sequences;
5252

5353

@@ -66,6 +66,53 @@ bool ranking_sorter(const std::pair<Submission,float> &a, const std::pair<Submis
6666
}
6767

6868

69+
// ===================================================================================
70+
// ===================================================================================
71+
void insert_others(std::map<Submission,std::set<int> > &others,
72+
const std::map<Submission,std::vector<Sequence> > &matches) {
73+
for (std::map<Submission,std::vector<Sequence> >::const_iterator itr = matches.begin(); itr!=matches.end();itr++) {
74+
//std::set<int> foo;
75+
for (int i = 0; i < itr->second.size(); i++) {
76+
others[itr->first].insert(itr->second[i].position);
77+
}
78+
//.insert(std::make_pair(itr->first,foo));
79+
}
80+
}
81+
82+
void convert(std::map<Submission,std::set<int> > &myset, nlohmann::json &obj) {
83+
for (std::map<Submission,std::set<int> >::iterator itr = myset.begin(); itr != myset.end(); itr++) {
84+
nlohmann::json me;
85+
me["username"] = itr->first.username;
86+
me["version"] = itr->first.version;
87+
88+
std::vector<nlohmann::json> foo;
89+
int start = -1;
90+
int end = -1;
91+
std::set<int>::iterator itr2 = itr->second.begin();
92+
while (true) {
93+
int pos = (itr2 == itr->second.end()) ? -1 : *itr2;
94+
if (pos != -1 && start == -1) {
95+
start = end = pos;
96+
} else if (pos != -1 && end+1 == pos) {
97+
end = pos;
98+
} else if (start != -1) {
99+
nlohmann::json range;
100+
range["start"] = start;
101+
range["end"] = end;
102+
start=end=-1;
103+
foo.push_back(range);
104+
}
105+
if (itr2 == itr->second.end()) {
106+
break;
107+
}
108+
itr2++;
109+
}
110+
111+
me["matchingpositions"] = foo;
112+
obj.push_back(me);
113+
}
114+
}
115+
69116
// ===================================================================================
70117
// ===================================================================================
71118
int main(int argc, char* argv[]) {
@@ -76,11 +123,13 @@ int main(int argc, char* argv[]) {
76123

77124
// ---------------------------------------------------------------------------
78125
// deal with command line arguments
79-
assert (argc == 4);
126+
assert (argc == 6);
80127
std::string semester = argv[1];
81128
std::string course = argv[2];
82129
std::string gradeable = argv[3];
83-
130+
assert (argv[4] == std::string("--window"));
131+
int window = std::stoi(std::string(argv[5]));
132+
assert (window >= 1);
84133

85134
// error checking, confirm there are hashes to work with
86135
std::string tmp = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/hashes/"+gradeable;
@@ -112,7 +161,9 @@ int main(int argc, char* argv[]) {
112161
for (boost::filesystem::directory_iterator username_itr( username_path ); username_itr != end_iter; ++username_itr) {
113162
boost::filesystem::path version_path = username_itr->path();
114163
assert (is_directory(version_path));
115-
std::string version = username_itr->path().filename().string();
164+
std::string str_version = username_itr->path().filename().string();
165+
int version = std::stoi(str_version);
166+
assert (version > 0);
116167
// load the hashes sequences from this submission
117168
boost::filesystem::path hash_file = version_path;
118169
hash_file /= "hashes.txt";
@@ -131,7 +182,7 @@ int main(int argc, char* argv[]) {
131182

132183
// label the parts of the file that are common to many
133184
// user,version -> vector<position>
134-
std::map<Submission,std::vector<int> > common;
185+
std::map<Submission,std::set<int> > common;
135186

136187
// label the parts of the file that match the provided code
137188
// user,version -> vector<position>
@@ -152,7 +203,7 @@ int main(int argc, char* argv[]) {
152203
// common to many/all
153204
for (std::map<std::string,std::vector<Sequence> >::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++) {
154205
for (int i = 0; i < itr2->second.size(); i++) {
155-
common[itr2->second[i].submission].push_back(itr2->second[i].position);
206+
common[itr2->second[i].submission].insert(itr2->second[i].position);
156207
}
157208
}
158209
} else if (count > 1 && count < 20) {
@@ -161,15 +212,15 @@ int main(int argc, char* argv[]) {
161212
std::string username = itr2->first;
162213
for (int i = 0; i < itr2->second.size(); i++) {
163214
assert (itr2->second[i].submission.username == username);
164-
std::string version = itr2->second[i].submission.version;
215+
int version = itr2->second[i].submission.version;
165216
int position = itr2->second[i].position;
166217

167218
std::map<Submission, std::vector<Sequence> > matches;
168219

169220
for (std::map<std::string,std::vector<Sequence> >::iterator itr3 = itr->second.begin(); itr3 != itr->second.end(); itr3++) {
170221
std::string match_username = itr3->first;
171222
for (int j = 0; j < itr3->second.size(); j++) {
172-
std::string match_version = itr3->second[j].submission.version;
223+
int match_version = itr3->second[j].submission.version;
173224
Submission ms(match_username,match_version);
174225
matches[ms].push_back(itr3->second[j]);
175226
}
@@ -185,6 +236,7 @@ int main(int argc, char* argv[]) {
185236
// ---------------------------------------------------------------------------
186237
// prepare a sorted list of all users sorted by match percent
187238
std::vector<std::pair<Submission,float> > ranking;
239+
188240
for (std::map<Submission,std::map<int,std::map<Submission,std::vector<Sequence> > > >::iterator itr = suspicious.begin();
189241
itr != suspicious.end(); itr++) {
190242
int total = submission_length[itr->first];
@@ -194,58 +246,99 @@ int main(int argc, char* argv[]) {
194246
std::vector<nlohmann::json> info;
195247

196248
std::string username = itr->first.username;
197-
std::string version = itr->first.version;
249+
int version = itr->first.version;
198250

199251
ranking.push_back(std::make_pair(itr->first,percent));
200252

201253
// prepare the ranges of suspicious matching tokens
202254
int range_start=-1;
203255
int range_end=-1;
204-
for (std::map<int,std::map<Submission,std::vector<Sequence> > >::iterator itr2 = itr->second.begin(); itr2 != itr->second.end(); itr2++) {
205-
int pos = itr2->first;
206-
if (range_start==-1) {
256+
std::map<Submission, std::set<int> > others;
257+
std::map<int,std::map<Submission,std::vector<Sequence> > >::iterator itr2 = itr->second.begin();
258+
while (true) {
259+
int pos = (itr2 == itr->second.end()) ? -1 : itr2->first;
260+
if (pos != -1 && range_start==-1) {
207261
range_start = range_end = pos;
208-
} else if (range_end+1 == pos) {
262+
insert_others(others,itr2->second);
263+
} else if (pos != -1 && range_end+1 == pos) {
209264
range_end = pos;
210-
} else {
211-
std::map<std::string,std::string> info_data;
212-
info_data["start"]=std::to_string(range_start);
213-
info_data["end"]=std::to_string(range_end);
214-
info_data["type"]=std::string("match");
265+
insert_others(others,itr2->second);
266+
} else if (range_start != -1) {
267+
std::map<std::string,nlohmann::json> info_data;
268+
info_data["start"]=nlohmann::json(range_start);
269+
info_data["end"]=nlohmann::json(range_end);
270+
info_data["type"]=nlohmann::json(std::string("match"));
271+
nlohmann::json obj;
272+
convert(others,obj);
273+
info_data["others"]=obj;
215274
info.push_back(info_data);
216275
range_start=range_end=-1;
276+
others.clear();
277+
}
278+
if (itr2 == itr->second.end()) {
279+
break;
217280
}
281+
itr2++;
218282
}
219-
if (range_start != -1) {
220-
std::map<std::string,std::string> info_data;
221-
info_data["start"]=std::to_string(range_start);
222-
info_data["end"]=std::to_string(range_end);
223-
info_data["type"]=std::string("match");
224-
info.push_back(info_data);
225-
range_start=range_end=-1;
283+
284+
std::map<Submission,std::set<int> >::iterator itr3 = common.find(itr->first);
285+
if (itr3 != common.end()) {
286+
//std::cout << "HAS COMMON CODE" << std::endl;
287+
int range_start=-1;
288+
int range_end=-1;
289+
for (std::set<int>::iterator itr4 = itr3->second.begin(); itr4 != itr3->second.end(); itr4++) {
290+
//std::cout << "v=" << *itr4 << std::endl;
291+
if (range_start == -1) {
292+
range_start = range_end = *itr4;
293+
} else if (range_end+1 == *itr4) {
294+
range_end = *itr4;
295+
} else {
296+
std::map<std::string,nlohmann::json> info_data;
297+
info_data["start"]=nlohmann::json(range_start);
298+
info_data["end"]=nlohmann::json(range_end);
299+
info_data["type"]=std::string("common");
300+
info.push_back(info_data);
301+
range_start = range_end = -1;
302+
}
303+
}
304+
if (range_start != -1) {
305+
std::map<std::string,nlohmann::json> info_data;
306+
info_data["start"]=nlohmann::json(range_start);
307+
info_data["end"]=nlohmann::json(range_end);
308+
info_data["type"]=std::string("common");
309+
info.push_back(info_data);
310+
range_start=range_end=-1;
311+
}
226312
}
227313

228314
// save the file with matches per user
229315
nlohmann::json match_data = info;
230-
std::string matches_dir = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/matches/"+gradeable+"/"+username+"/"+version;
316+
std::string matches_dir = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/matches/"+gradeable+"/"+username+"/"+std::to_string(version);
231317
boost::filesystem::create_directories(matches_dir);
232318
std::string matches_file = matches_dir+"/matches.json";
233319
std::ofstream ostr(matches_file);
234320
assert (ostr.good());
235321
ostr << match_data.dump(4) << std::endl;
236322
}
237323

324+
std::set<std::string> users_already_ranked;
325+
238326
// save the rankings to a file
239327
std::string ranking_dir = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/ranking/";
240328
std::string ranking_file = ranking_dir+gradeable+".txt";
241329
boost::filesystem::create_directories(ranking_dir);
242330
std::ofstream ranking_ostr(ranking_file);
243331
std::sort(ranking.begin(),ranking.end(),ranking_sorter);
244332
for (int i = 0; i < ranking.size(); i++) {
245-
ranking_ostr
246-
<< std::setw(6) << std::setprecision(2) << std::fixed << 100.0*ranking[i].second << "% "
247-
<< std::setw(15) << std::left << ranking[i].first.username << " "
248-
<< std::setw(3) << std::right << ranking[i].first.version << std::endl;
333+
std::string username = ranking[i].first.username;
334+
if (users_already_ranked.insert(username).second != false) {
335+
// print each username at most once, only if insert was
336+
// successful (not already in the set)
337+
ranking_ostr
338+
<< std::setw(6) << std::setprecision(2) << std::fixed << 100.0*ranking[i].second << "% "
339+
<< std::setw(15) << std::left << ranking[i].first.username << " "
340+
<< std::setw(3) << std::right << ranking[i].first.version << std::endl;
341+
}
249342
}
250343

251344

0 commit comments

Comments
 (0)