Skip to content

Commit 01f33bb

Browse files
Remove previously tokenized files if concat occurs (#21)
* fix concat on regex * temp git ignore change * remove dir * remove check internally * fix tokenize * fix filesystem tweaks * rm filesystem stuff * remove
1 parent 7d7a25a commit 01f33bb

File tree

3 files changed

+11
-13
lines changed

3 files changed

+11
-13
lines changed

bin/concatenate_all.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os
99
import json
1010
import sys
11+
import shutil
1112
import fnmatch
1213

1314
CONFIG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'config')
@@ -71,13 +72,8 @@ def main():
7172
if not os.path.isdir(my_concatenated_dir):
7273
os.makedirs(my_concatenated_dir)
7374
my_concatenated_file=os.path.join(my_concatenated_dir,"submission.concatenated")
75+
total_concat = 0
7476
with open(my_concatenated_file,'w') as my_cf:
75-
# print a brief header of information
76-
my_cf.write("SEMESTER: "+semester+"\n")
77-
my_cf.write("COURSE: "+course+"\n")
78-
my_cf.write("GRADEABLE: "+gradeable+"\n")
79-
my_cf.write("USER: "+user+"\n")
80-
my_cf.write("VERSION: "+version+"\n")
8177
# loop over all files in all subdirectories
8278
base_path = os.path.join(submission_dir,user,version)
8379
for my_dir,dirs,my_files in os.walk(base_path):
@@ -88,19 +84,24 @@ def main():
8884
for e in expressions:
8985
files_filtered.extend(fnmatch.filter(files, e.strip()))
9086
files = files_filtered
87+
total_concat += len(files)
9188
for my_file in files:
9289
# skip the timestep
9390
if my_file == ".submit.timestamp":
9491
continue
9592
absolute_path=os.path.join(my_dir,my_file)
9693
relative_path=absolute_path[len(base_path):]
9794
# print a separator & filename
98-
my_cf.write("----------------------------------------------------\n")
99-
my_cf.write("FILE: "+relative_path+"\n\n")
10095
with open(absolute_path, encoding='ISO-8859-1') as tmp:
10196
# append the contents of the file
10297
my_cf.write(tmp.read())
103-
my_cf.write("\n")
98+
# Remove concat file if there no content...
99+
if total_concat == 0:
100+
os.remove(my_concatenated_file)
101+
p2 = os.path.join(course_dir, "lichen", "tokenized", gradeable, user, version)
102+
if os.path.isdir(p2):
103+
shutil.rmtree(p2)
104+
os.rmdir(my_concatenated_dir)
104105

105106
print ("done")
106107

bin/hash_all.py

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import json
1414
import hashlib
1515

16-
1716
CONFIG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'config')
1817
with open(os.path.join(CONFIG_PATH, 'submitty.json')) as open_file:
1918
OPEN_JSON = json.load(open_file)

bin/tokenize_all.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,7 @@ def main():
8383
if not os.path.isdir(my_tokenized_dir):
8484
os.makedirs(my_tokenized_dir)
8585
my_tokenized_file=os.path.join(my_tokenized_dir,"tokens.json")
86-
# Check to see if we have already tokenized...
87-
if(not os.path.isfile(my_tokenized_file)):
88-
tokenize(args,my_concatenated_file,my_tokenized_file)
86+
tokenize(args,my_concatenated_file,my_tokenized_file)
8987

9088
print ("done")
9189

0 commit comments

Comments
 (0)