Skip to content

Commit 75bb320

Browse files
[Feature:Plagiarism] compare with prior term gradeables (#42)
* Initial rewrite of process_all.sh * Update process_all.sh * Make modifications to file paths and add timers * Overhaul concatenate_all.py * Implement all versions/active version feature * Fix python errors * Progress: everything through tokenization finished * Everything works * Add timers * remove unnecessary code * little python changes * William made an oopsie (forgot to deal with provided code) * Fix minor bugs Fix process_all.sh script plus fix spelling issue and prevent hash_all.py from breaking when empty tokenized files are written * Fix permissions issue with provided code editing * Remove typo * Remove unnecessary print statement * Modify compare_hashes * Add support for other gradeables to concatenate_all.py * Add support for other gradeables to tokenize_all.py; check permissions in concatenate_all.py * Implement hash ally * Fix python error * Fix another python error... * Bug fixes * Bug fixes + progress * Fix individual ranking files * Clarify console message when compare_hashes complete * Change annoying separator * tiny formatting Co-authored-by: sbelsk <[email protected]>
1 parent 39f7862 commit 75bb320

File tree

4 files changed

+249
-45
lines changed

4 files changed

+249
-45
lines changed

bin/concatenate_all.py

+78-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import sys
1111
import time
1212
import fnmatch
13+
from pathlib import Path
1314

1415
IGNORED_FILES = [
1516
".submit.timestamp"
@@ -35,7 +36,7 @@ def getConcatFilesInDir(input_dir, regex_patterns):
3536
absolute_path = os.path.join(my_dir, my_file)
3637
# print a separator & filename
3738
with open(absolute_path, encoding='ISO-8859-1') as tmp:
38-
result += f"=============== {my_file} ===============\n"
39+
result += f"==== {my_file} ====\n"
3940
# append the contents of the file
4041
result += tmp.read() + "\n"
4142
return result
@@ -70,6 +71,7 @@ def main():
7071
users_to_ignore = config["ignore_submissions"]
7172
regex_patterns = config["regex"].split(',')
7273
regex_dirs = config["regex_dirs"]
74+
prior_term_gradeables = config["prior_term_gradeables"]
7375

7476
# ==========================================================================
7577
# Error checking
@@ -80,6 +82,22 @@ def main():
8082
print('ERROR! Invalid path component ".." in regex')
8183
exit(1)
8284

85+
for ptg in prior_term_gradeables:
86+
for field in ptg:
87+
if ".." in field:
88+
print('ERROR! Invalid path component ".." in prior_term_gradeable field')
89+
exit(1)
90+
91+
# check permissions to make sure we have access to the prior term gradeables
92+
my_course_group_perms = Path(args.basepath).group()
93+
for ptg in prior_term_gradeables:
94+
if Path(args.datapath, ptg["prior_semester"], ptg["prior_course"]).group()\
95+
!= my_course_group_perms:
96+
print(f"Error: Invalid permissions to access course {ptg['prior_semester']}"
97+
f"/{ptg['prior_course']}")
98+
exit(1)
99+
100+
# make sure the regex directory is one of the acceptable directories
83101
for dir in regex_dirs:
84102
if dir not in ["submissions", "results", "checkout"]:
85103
print("ERROR! ", dir, " is not a valid input directory for Lichen")
@@ -124,6 +142,50 @@ def main():
124142
concatenated_contents = getConcatFilesInDir(version_path, regex_patterns)
125143
output_file.write(concatenated_contents)
126144

145+
# ==========================================================================
146+
# loop over all of the other prior term gradeables and concatenate their submissions
147+
for other_gradeable in prior_term_gradeables:
148+
for dir in regex_dirs:
149+
other_gradeable_path = os.path.join(args.datapath,
150+
other_gradeable["prior_semester"],
151+
other_gradeable["prior_course"],
152+
dir,
153+
other_gradeable["prior_gradeable"])
154+
# loop over each user
155+
for other_user in sorted(os.listdir(other_gradeable_path)):
156+
other_user_path = os.path.join(other_gradeable_path, other_user)
157+
if not os.path.isdir(other_user_path):
158+
continue
159+
160+
if version_mode == "active_version":
161+
# get the user's active version from their settings file
162+
other_submissions_details_path = os.path.join(other_user_path,
163+
'user_assignment_settings.json')
164+
165+
with open(other_submissions_details_path) as other_details_file:
166+
other_details_json = json.load(other_details_file)
167+
my_active_version = int(other_details_json["active_version"])
168+
169+
# loop over each version
170+
for other_version in sorted(os.listdir(other_user_path)):
171+
other_version_path = os.path.join(other_user_path, other_version)
172+
if not os.path.isdir(other_version_path):
173+
continue
174+
175+
other_output_file_path = os.path.join(args.basepath, "other_gradeables",
176+
f"{other_gradeable['prior_semester']}__{other_gradeable['prior_course']}__{other_gradeable['prior_gradeable']}", # noqa: E501
177+
other_user, other_version,
178+
"submission.concatenated")
179+
180+
if not os.path.exists(os.path.dirname(other_output_file_path)):
181+
os.makedirs(os.path.dirname(other_output_file_path))
182+
183+
# append to concatenated file
184+
with open(other_output_file_path, "a") as other_output_file:
185+
other_concatenated_contents = getConcatFilesInDir(other_version_path,
186+
regex_patterns)
187+
other_output_file.write(other_concatenated_contents)
188+
127189
# ==========================================================================
128190
# iterate over all of the created submissions, checking to see if they are empty
129191
# and adding a message to the top if so (to differentiate empty files from errors in the UI)
@@ -136,6 +198,21 @@ def main():
136198
if my_cf.read() == "":
137199
my_cf.write("Error: No files matched provided regex in selected directories")
138200

201+
# do the same for the other gradeables
202+
for other_gradeable in prior_term_gradeables:
203+
other_gradeable_dir_name = f"{other_gradeable['prior_semester']}__{other_gradeable['prior_course']}__{other_gradeable['prior_gradeable']}" # noqa: E501
204+
for other_user in os.listdir(os.path.join(args.basepath, "other_gradeables",
205+
other_gradeable_dir_name)):
206+
other_user_path = os.path.join(args.basepath, "other_gradeables",
207+
other_gradeable_dir_name, other_user)
208+
for other_version in os.listdir(other_user_path):
209+
other_version_path = os.path.join(other_user_path, other_version)
210+
my_concatenated_file = os.path.join(other_version_path, "submission.concatenated")
211+
with open(my_concatenated_file, "r+") as my_cf:
212+
if my_cf.read() == "":
213+
my_cf.write("Error: No files matched provided regex in"
214+
"selected directories")
215+
139216
# ==========================================================================
140217
# concatenate provided code
141218
with open(os.path.join(args.basepath, "provided_code",

bin/hash_all.py

+30-3
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ def main():
6060
sys.stdout.write("HASH ALL...")
6161
sys.stdout.flush()
6262

63-
# =========================================================================
64-
# walk the subdirectories
63+
# ==========================================================================
64+
# walk the subdirectories of this gradeable
6565
users_dir = os.path.join(args.basepath, "users")
6666
if not os.path.isdir(users_dir):
6767
print("Error: Unable to find users directory")
@@ -81,7 +81,34 @@ def main():
8181
my_hashes_file = os.path.join(my_dir, "hashes.txt")
8282
hasher(lichen_config_data, my_tokenized_file, my_hashes_file)
8383

84-
# ===========================================================================
84+
# ==========================================================================
85+
# walk the subdirectories of the other gradeables
86+
87+
other_gradeables_dir = os.path.join(args.basepath, "other_gradeables")
88+
if not os.path.isdir(other_gradeables_dir):
89+
print("Error: Unable to find other gradeables directory")
90+
exit(1)
91+
92+
for other_gradeable in sorted(os.listdir(other_gradeables_dir)):
93+
other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable)
94+
if not os.path.isdir(other_gradeable_dir):
95+
continue
96+
97+
for other_user in sorted(os.listdir(other_gradeable_dir)):
98+
other_user_dir = os.path.join(other_gradeable_dir, other_user)
99+
if not os.path.isdir(other_user_dir):
100+
continue
101+
102+
for other_version in sorted(os.listdir(other_user_dir)):
103+
other_version_dir = os.path.join(other_user_dir, other_version)
104+
if not os.path.isdir(other_version_dir):
105+
continue
106+
107+
other_tokenized_file = os.path.join(other_version_dir, "tokens.json")
108+
other_hashes_file = os.path.join(other_version_dir, "hashes.txt")
109+
hasher(lichen_config_data, other_tokenized_file, other_hashes_file)
110+
111+
# ==========================================================================
85112
# hash the provided code
86113
provided_code_tokenized = os.path.join(args.basepath, "provided_code", "tokens.json")
87114
provided_code_hashed = os.path.join(args.basepath, "provided_code", "hashes.txt")

bin/tokenize_all.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def main():
5353
lichen_config_data = json.load(lichen_config)
5454

5555
# ===========================================================================
56-
# walk the subdirectories
56+
# walk the subdirectories to tokenize this gradeable's submissions
5757
users_dir = os.path.join(args.basepath, "users")
5858
if not os.path.isdir(users_dir):
5959
print("Error: Unable to find users directory")
@@ -73,6 +73,32 @@ def main():
7373
my_tokenized_file = os.path.join(my_dir, "tokens.json")
7474
tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file)
7575

76+
# ===========================================================================
77+
# tokenize the other prior term gradeables' submissions
78+
other_gradeables_dir = os.path.join(args.basepath, "other_gradeables")
79+
if not os.path.isdir(other_gradeables_dir):
80+
print("Error: Unable to find other gradeables directory")
81+
exit(1)
82+
83+
for other_gradeable in sorted(os.listdir(other_gradeables_dir)):
84+
other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable)
85+
if not os.path.isdir(other_gradeable_dir):
86+
continue
87+
88+
for other_user in sorted(os.listdir(other_gradeable_dir)):
89+
other_user_dir = os.path.join(other_gradeable_dir, other_user)
90+
if not os.path.isdir(other_user_dir):
91+
continue
92+
93+
for other_version in sorted(os.listdir(other_user_dir)):
94+
other_version_dir = os.path.join(other_user_dir, other_version)
95+
if not os.path.isdir(other_version_dir):
96+
continue
97+
98+
other_concatenated_file = os.path.join(other_version_dir, "submission.concatenated")
99+
other_tokenized_file = os.path.join(other_version_dir, "tokens.json")
100+
tokenize(lichen_config_data, other_concatenated_file, other_tokenized_file)
101+
76102
# ===========================================================================
77103
# tokenize the provided code
78104
provided_code_concat = os.path.join(args.basepath, "provided_code", "submission.concatenated")

0 commit comments

Comments
 (0)