Skip to content

Commit a0bfed1

Browse files
authored
Remaining dev work before Formative Study 2 session P2 (#116)
* Determine good candidates for study * Use 61A FA25 when seeding db * Fix backend bugs * Implement print statements * Adjust histogram buckets manually * Misc bugfixes/todos * Add TODO about EC problems * Run rubocop * ruff check --fix * Run ruff format on src/notebooks * Run ruff format on src/backups * Remove Rails.logger statements
1 parent 8512d11 commit a0bfed1

15 files changed

Lines changed: 1955 additions & 221 deletions

File tree

src/backups/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,18 @@ sqlite3 data/private/out/dev/c88c_fa25.db .dump > data/private/out/dev/c88c_fa25
9393
3. Update the SQL file:
9494
1. Remove `../../data/private/` prefix from paths. **IMPORTANT:** Make sure you are removing the trailing `/`.
9595
2. Remove/comment out `CREATE TABLE` statements since that will interfere with the Rails database migrations (Rails will already handle table creation on its own end, so if you have a duplicate `CREATE TABLE` statement Rails will error).
96+
3. also remove create index statements (may be created through data track)
9697
4. **Optional if not done already:** [Generate corresponding Rails model(s)](https://guides.rubyonrails.org/command_line.html#generating-models) **in the `src/snapshots-app` directory** by running the following command. If you are an internal contributor working with the toy data from `data.zip`, skip this step.
9798
```sh
9899
rails generate model <model_name> <column_name:data_type> ...
99100
```
100101
> [!CAUTION]
101102
> THE FOLLOWING STEP WILL RESET (e.g. delete everything) AND RE-MIGRATE THE RAILS DB. BE CAREFUL!
102-
5. Run the following command **in the `src/snapshots-app` directory**:
103+
1. Run the following command **in the `src/snapshots-app` directory**:
103104
```sh
104105
rails db:migrate:reset
105106
```
106-
6. Run the following command **in the root directory** of the repository to execute commands from output `.sql` dump into the Rails app `development.sqlite3` database. Replace `$PATH_TO_SQL_FILE` with the same value from steps 1 and 2:
107+
1. Run the following command **in the root directory** of the repository to execute commands from output `.sql` dump into the Rails app `development.sqlite3` database. Replace `$PATH_TO_SQL_FILE` with the same value from steps 1 and 2:
107108
```sh
108109
# General command
109110
sqlite3 src/snapshots-app/storage/development.sqlite3 < $PATH_TO_SQL_FILE

src/backups/configs/dev/dev2.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"okpy_api": {
3+
"course_endpoint": "cal/cs88/fa25",
4+
"limit": 150,
5+
"offset": 0
6+
},
7+
"course": {
8+
"lab_start": 1,
9+
"lab_end": 0,
10+
"hw_start": 1,
11+
"hw_end": 0,
12+
"projects": ["ants"],
13+
"assignment_files": {
14+
"ants": ["ants.py"]
15+
}
16+
},
17+
"data": {
18+
"in_roster": "../../data/private/out/dev2/roster.csv",
19+
"out_roster": "../../data/private/out/dev2/emails.txt",
20+
"dump": "../../data/private/out/dev2/okpy_api_output.json",
21+
"database": "../../data/private/out/dev2/snapshots.db",
22+
"lint_json": "../../data/private/out/dev2/lint_errors.json"
23+
},
24+
"deidentify": true
25+
}

src/backups/configs/dev/dev3.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"okpy_api": {
3+
"course_endpoint": "cal/cs61a/fa25",
4+
"limit": 150,
5+
"offset": 0
6+
},
7+
"course": {
8+
"lab_start": 1,
9+
"lab_end": 0,
10+
"hw_start": 1,
11+
"hw_end": 0,
12+
"projects": ["ants"],
13+
"assignment_files": {
14+
"ants": ["ants.py"]
15+
}
16+
},
17+
"data": {
18+
"in_roster": "../../data/private/out/dev3/roster.csv",
19+
"out_roster": "../../data/private/out/dev3/emails.txt",
20+
"dump": "../../data/private/out/dev3/okpy_api_output.json",
21+
"database": "../../data/private/out/dev3/snapshots.db",
22+
"lint_json": "../../data/private/out/dev3/lint_errors.json"
23+
},
24+
"deidentify": true
25+
}

src/backups/email_hash_lookup.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import hashlib
2+
3+
4+
def sha256_short(s: str) -> str:
5+
"""Returns first 8 characters in SHA256 hash of `s`."""
6+
return hashlib.sha256(s.encode()).hexdigest()[:8]
7+
8+
9+
def find_emails_by_hashes(input_file, output_file, target_hashes):
10+
# 1. Create a dictionary to map hashes back to emails
11+
# Key: hash string, Value: original email
12+
hash_map = {}
13+
14+
try:
15+
with open(input_file, "r") as f:
16+
for line in f:
17+
email = line.strip()
18+
if email:
19+
# Compute hash and store in map
20+
h = sha256_short(email)
21+
hash_map[h] = email
22+
23+
# 2. Identify which original emails match our target hash list
24+
matched_emails = []
25+
for h in target_hashes:
26+
if h in hash_map:
27+
matched_emails.append(hash_map[h])
28+
else:
29+
print(f"Warning: No email found for hash {h}")
30+
31+
# 3. Write the results to a new file
32+
with open(output_file, "w") as f:
33+
for email in matched_emails:
34+
f.write(f"{email}\n")
35+
36+
print(f"Successfully recovered {len(matched_emails)} emails to {output_file}")
37+
38+
except FileNotFoundError:
39+
print(f"Error: The file '{input_file}' or '{output_file}' was not found.")
40+
41+
42+
# TODO parameterize as CLI
43+
if __name__ == "__main__":
44+
target_list = [
45+
"e3384165",
46+
"1faf1492",
47+
"0757b4af",
48+
"5e0b5dff",
49+
"55d9e0b2",
50+
"4349b29d",
51+
"27f16a00",
52+
"1bcf17a8",
53+
"bbb281e4",
54+
"18e36d10",
55+
"d0d1b4b0",
56+
"94d2cb91",
57+
"09e6bcbc",
58+
"08a08a79",
59+
"1a3aee97",
60+
"3ff28b43",
61+
"4972bef4",
62+
"a8faf137",
63+
"d6797b5b",
64+
"fc1888f1",
65+
"c2b307c8",
66+
"395b6a1a",
67+
"f0cd1289",
68+
"90cfed97",
69+
"41a86dbb",
70+
]
71+
72+
find_emails_by_hashes(
73+
input_file="../../data/private/out/cs61a/fa25/emails.txt",
74+
output_file="../../data/private/out/dev3/emails.txt",
75+
target_hashes=target_list,
76+
)

src/backups/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ def store(
298298
print(f"Finished storing backups in {database} in {end - start} seconds")
299299

300300

301+
# TODO need to rerun lint with new schema for 61a and c88c fa25
301302
@app.command()
302303
def lint(
303304
database: Annotated[

0 commit comments

Comments
 (0)