Skip to content

Commit 71cd432

Browse files
authored
Merge branch 'main' into react-app
2 parents 13b3018 + 243cf24 commit 71cd432

7 files changed

Lines changed: 369 additions & 103 deletions

File tree

src/backups/backup_config.json

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,17 @@
55
"offset": 0
66
},
77
"course": {
8-
"lab_start": 0,
9-
"lab_end": 11,
10-
"hw_start": 1,
11-
"hw_end": 10,
12-
"projects": ["maps", "ants"]
8+
"lab_start": 1,
9+
"lab_end": 1,
10+
"hw_start": 2,
11+
"hw_end": 2,
12+
"projects": ["maps"]
1313
},
1414
"data": {
1515
"in_roster": "../../data/private/data_c88c_sp25_gradescope_roster.csv",
1616
"out_roster": "../../data/private/data_c88c_sp25_emails.txt",
1717
"dump": "../../data/private/data_c88c_sp25_dump.json",
1818
"database": "../../data/private/data_c88c_sp25_backups.db"
19-
}
19+
},
20+
"deidentify": true
2021
}

src/backups/db.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
DROP_BACKUP_METADATA_TABLE_CMD = "DROP TABLE IF EXISTS backup_metadata"
2+
3+
DROP_OKPY_MESSAGES_TABLE_CMD = "DROP TABLE IF EXISTS okpy_messages"
4+
5+
CREATE_BACKUP_METADATA_TABLE_CMD = """
6+
CREATE TABLE backup_metadata (
7+
backup_id TEXT PRIMARY KEY,
8+
9+
-- ISO8601 string
10+
created TEXT NOT NULL,
11+
12+
-- okpy endpoint for course (includes semester, e.g. cal/cs88/sp25)
13+
course TEXT NOT NULL,
14+
15+
-- okpy assignment endpoint (not including course endpoint prefix,
16+
-- e.g. lab00. to get full endpoint, do {course}/{assignment})
17+
assignment TEXT NOT NULL,
18+
19+
student_email TEXT NOT NULL,
20+
21+
is_late INTEGER NOT NULL CHECK (is_late = TRUE OR is_late = FALSE),
22+
23+
-- whether student used --submit flag (educated guess)
24+
submitted INTEGER NOT NULL CHECK (submitted = TRUE OR submitted = FALSE),
25+
26+
-- each backup has one or more kinds of okpy "messages"
27+
-- which contain different data about the student's work.
28+
-- see okpy_messages table for more information.
29+
-- the following columns contain the path to the file
30+
-- containing the contents of the okpy message, or NULL if it doesn't exist.
31+
autograder_output_location TEXT,
32+
grading_location TEXT,
33+
file_contents_location TEXT,
34+
analytics_location TEXT,
35+
scoring_location TEXT,
36+
unlock_location TEXT
37+
);
38+
"""
39+
40+
CREATE_OKPY_MESSAGES_TABLE_CMD = """
41+
CREATE TABLE okpy_messages (
42+
id INTEGER PRIMARY KEY,
43+
type TEXT NOT NULL,
44+
description TEXT NOT NULL
45+
);
46+
"""
47+
48+
INSERT_BACKUP_METADATA_CMD = """
49+
INSERT INTO backup_metadata VALUES (
50+
:backup_id,
51+
:created,
52+
:course,
53+
:assignment,
54+
:student_email,
55+
56+
:is_late,
57+
:submitted,
58+
59+
:autograder_output_location,
60+
:grading_location,
61+
:file_contents_location,
62+
:analytics_location,
63+
:scoring_location,
64+
:unlock_location
65+
);
66+
"""
67+
68+
INSERT_OKPY_MESSAGES_TABLE_CMD = """
69+
INSERT INTO okpy_messages VALUES
70+
(:id, :type, :description)
71+
"""
72+
73+
OKPY_MESSAGES_VALUES = [
74+
{
75+
"id": 1,
76+
"type": "autograder_output",
77+
"description": "OkPy autograder output string",
78+
},
79+
{
80+
"id": 2,
81+
"type": "grading",
82+
"description": "For each test, a count of how many were locked/passed/failed",
83+
},
84+
{
85+
"id": 3,
86+
"type": "file_contents",
87+
"description": "Source file names and their contents",
88+
},
89+
{
90+
"id": 4,
91+
"type": "analytics",
92+
"description": "Count of how many attempts student made on a problem and boolean of whether it was solved",
93+
},
94+
{
95+
"id": 5,
96+
"type": "scoring",
97+
"description": "Total score for that OkPy run", # probably only occurs if --score was passed
98+
},
99+
{
100+
"id": 6,
101+
"type": "unlock",
102+
"description": "Unlocking test output",
103+
},
104+
# NOTE: there is another okpy message called "email" but that just contains the student's email
105+
]

src/backups/deidentify.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

src/backups/main.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from emails import process_roster
2020
from request import get_backups_for_all_users_all_assignments
21-
from storage import setup_db, PREFIX, responses_to_backups, store_all_backups
21+
from storage import setup_db, PREFIX, responses_to_backups
2222

2323
DEFAULT_CONFIG_FILE = "backup_config.json"
2424

@@ -209,6 +209,9 @@ def store(
209209
help="Name of sqlite database .db file where backups will be stored"
210210
),
211211
] = None,
212+
deidentify: Annotated[
213+
bool, typer.Option(help="Whether to deidentify student emails")
214+
] = False,
212215
config: Annotated[
213216
str, typer.Option(help="Configuration .json file")
214217
] = DEFAULT_CONFIG_FILE,
@@ -238,6 +241,10 @@ def store(
238241
database = config_dict["data"]["database"]
239242
assert database.endswith(".db"), "database must be a sqlite .db file"
240243

244+
deidentify = config_dict.get("deidentify", deidentify)
245+
if verbose and deidentify:
246+
print("Deidentifying student emails")
247+
241248
# take HTTP response data and persist it in the database
242249
if timeit:
243250
start = time()
@@ -253,18 +260,23 @@ def store(
253260

254261
with open(dump, "r") as f:
255262
emails_to_responses = json.load(f)
256-
backups = responses_to_backups(emails_to_responses, course_endpoint)
263+
264+
num_backups = responses_to_backups(
265+
emails_to_responses, course_endpoint, PREFIX, cur, deidentify
266+
)
257267
if verbose:
258-
print(f"Processed {len(backups)} backups from {dump}")
268+
print(f"Processed {num_backups} backups from {dump}")
259269

260-
store_all_backups(cur, backups)
261-
cur.execute("SELECT COUNT(*) FROM backups_metadata")
270+
cur.execute("SELECT COUNT(*) FROM backup_metadata")
262271
num_rows = cur.fetchone()[0]
272+
assert (
273+
num_backups == num_rows
274+
), "num_backups should match num_rows in backup_metadata table"
263275
if verbose:
264276
print(
265-
f"Wrote backup file contents to {storage_dir} and inserted {num_rows} rows into backups_metadata table"
277+
f"Wrote backup file contents to {storage_dir} and inserted {num_rows} rows into backup_metadata table"
266278
)
267-
cur.execute("SELECT * FROM backups_metadata LIMIT 10")
279+
cur.execute("SELECT * FROM backup_metadata LIMIT 10")
268280
rows = cur.fetchall()
269281
print("First 10 rows:")
270282
for r in rows:
@@ -275,12 +287,6 @@ def store(
275287
print(f"Finished storing backups in {database} in {end - start} seconds")
276288

277289

278-
@app.command()
279-
def deidentify():
280-
"""Not implemented yet"""
281-
pass
282-
283-
284290
@app.command()
285291
def query():
286292
"""Not implemented yet"""

src/backups/models.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import json
2+
3+
4+
class Backup:
5+
def __init__(
6+
self,
7+
backup_id: str,
8+
created: str,
9+
course: str,
10+
assignment: str,
11+
student_email: str,
12+
is_late: bool,
13+
submitted: bool,
14+
autograder_output_location: str = None,
15+
grading_location: str = None,
16+
file_contents_location: str = None,
17+
analytics_location: str = None,
18+
scoring_location: str = None,
19+
unlock_location: str = None,
20+
):
21+
self.backup_id = backup_id
22+
self.created = created
23+
self.course = course
24+
self.assignment = assignment
25+
self.student_email = student_email
26+
27+
self.is_late = is_late
28+
self.submitted = submitted
29+
30+
self.autograder_output_location = autograder_output_location
31+
self.grading_location = grading_location
32+
self.file_contents_location = file_contents_location
33+
self.analytics_location = analytics_location
34+
self.scoring_location = scoring_location
35+
self.unlock_location = unlock_location
36+
37+
38+
class OkPyMessage:
39+
def __init__(self, contents):
40+
self.contents = contents
41+
42+
43+
class AutograderOutputMessage(OkPyMessage):
44+
@staticmethod
45+
def location(directory):
46+
return f"{directory}/autograder_output.txt"
47+
48+
def write(self, directory):
49+
with open(AutograderOutputMessage.location(directory), "w") as f:
50+
f.write(self.contents)
51+
52+
53+
class GradingMessage(OkPyMessage):
54+
@staticmethod
55+
def location(directory):
56+
return f"{directory}/grading.json"
57+
58+
def write(self, directory):
59+
with open(GradingMessage.location(directory), "w") as f:
60+
json.dump(self.contents, f, indent=2)
61+
62+
63+
class FileContentsMessage(OkPyMessage):
64+
@staticmethod
65+
def location(directory):
66+
# NOTE: a file content message's location is a DIRECTORY rather than a file
67+
# since there may be multiple source files in a student's backup
68+
return directory
69+
70+
def write(self, directory):
71+
for src_file_name, src_file_contents in self.contents.items():
72+
with open(
73+
f"{FileContentsMessage.location(directory)}/{src_file_name}", "w"
74+
) as f:
75+
f.write(str(src_file_contents))
76+
77+
78+
class AnalyticsMessage(OkPyMessage):
79+
@staticmethod
80+
def location(directory):
81+
return f"{directory}/analytics.json"
82+
83+
def write(self, directory):
84+
with open(AnalyticsMessage.location(directory), "w") as f:
85+
json.dump(self.contents, f, indent=2)
86+
87+
88+
class ScoringMessage(OkPyMessage):
89+
@staticmethod
90+
def location(directory):
91+
return f"{directory}/scoring.json"
92+
93+
def write(self, directory):
94+
with open(ScoringMessage.location(directory), "w") as f:
95+
json.dump(self.contents, f, indent=2)
96+
97+
98+
class UnlockMessage(OkPyMessage):
99+
@staticmethod
100+
def location(directory):
101+
return f"{directory}/unlock.json"
102+
103+
def write(self, directory):
104+
with open(GradingMessage.location(directory), "w") as f:
105+
json.dump(self.contents, f, indent=2)
106+
107+
108+
MESSAGE_KIND_TO_CLASS = {
109+
"autograder_output": AutograderOutputMessage,
110+
"grading": GradingMessage,
111+
"file_contents": FileContentsMessage,
112+
"analytics": AnalyticsMessage,
113+
"scoring": ScoringMessage,
114+
"unlock": UnlockMessage,
115+
}

0 commit comments

Comments
 (0)