Skip to content

Commit 3861f21

Browse files
committed
Merge branch 'develop' of github.com-work:datakind/sst-app-api into develop
2 parents 4a789cf + 7034c13 commit 3861f21

File tree

5 files changed

+904
-1
lines changed

5 files changed

+904
-1
lines changed

src/webapp/database.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ class Base(DeclarativeBase):
5959
LOCAL_PASSWORD = "tester_password"
6060
DATETIME_TESTING = datetime.datetime(2024, 12, 26, 19, 37, 59, 753357)
6161

62+
# Test institution - same ID as DEV USC Beaufort for testing
63+
TEST_INST_UUID = uuid.UUID("942d4b0e-12e7-4d2a-9187-9508ae3cef7c")
64+
TEST_BATCH_UUID = uuid.UUID("3182f472-e079-4678-a0a1-9ca5ead6c49a")
65+
6266

6367
@event.listens_for(Mapper, "before_insert")
6468
@event.listens_for(Mapper, "before_update")
@@ -106,6 +110,19 @@ def init_db(env: str) -> None:
106110
updated_at=DATETIME_TESTING,
107111
)
108112
)
113+
# USC Beaufort - matches DEV for testing
114+
session.merge(
115+
InstTable(
116+
id=TEST_INST_UUID,
117+
name="University of South Carolina - Beaufort",
118+
state="SC",
119+
pdp_id="345000",
120+
schemas=["COURSE", "STUDENT"],
121+
created_at=DATETIME_TESTING,
122+
updated_at=DATETIME_TESTING,
123+
created_by=LOCAL_USER_UUID,
124+
)
125+
)
109126
session.merge(
110127
ApiKeyTable(
111128
id=LOCAL_APIKEY_UUID,
@@ -118,6 +135,94 @@ def init_db(env: str) -> None:
118135
valid=True,
119136
)
120137
)
138+
# Create test files and batches for LOCAL environment
139+
if env == "LOCAL":
140+
# Create test files
141+
test_file_1 = FileTable(
142+
id=uuid.UUID("f0bb3a20-6d92-4254-afed-6a72f43c562a"),
143+
inst_id=LOCAL_INST_UUID,
144+
name="test_course_file.csv",
145+
source="MANUAL_UPLOAD",
146+
uploader=LOCAL_USER_UUID,
147+
sst_generated=False,
148+
valid=True,
149+
schemas=["COURSE"], # Using string literal to avoid circular import
150+
created_at=DATETIME_TESTING,
151+
updated_at=DATETIME_TESTING,
152+
)
153+
test_file_2 = FileTable(
154+
id=uuid.UUID("cb02d06c-2a59-486a-9bdd-d394a4fcb833"),
155+
inst_id=LOCAL_INST_UUID,
156+
name="test_cohort_file.csv",
157+
source="MANUAL_UPLOAD",
158+
uploader=LOCAL_USER_UUID,
159+
sst_generated=False,
160+
valid=True,
161+
schemas=[
162+
"STUDENT"
163+
], # Using string literal to avoid circular import
164+
created_at=DATETIME_TESTING,
165+
updated_at=DATETIME_TESTING,
166+
)
167+
# Create test batch for LOCAL_INST_UUID (using a different ID)
168+
test_batch = BatchTable(
169+
id=uuid.UUID("f0bb3a20-6d92-4254-afed-6a72f43c562b"),
170+
inst_id=LOCAL_INST_UUID,
171+
name="test_batch_1",
172+
created_by=LOCAL_USER_UUID,
173+
created_at=DATETIME_TESTING,
174+
updated_at=DATETIME_TESTING,
175+
)
176+
# Associate files with batch
177+
test_batch.files.add(test_file_1)
178+
test_batch.files.add(test_file_2)
179+
session.merge(test_file_1)
180+
session.merge(test_file_2)
181+
session.merge(test_batch)
182+
183+
# Create test files for EDA test institution (TEST_INST_UUID)
184+
# Real files from DEV batch 3182f472e0794678a0a19ca5ead6c49a
185+
test_file_student = FileTable(
186+
id=uuid.UUID("f1d7c0a4-5211-459f-a79a-a1c2752f45c5"),
187+
inst_id=TEST_INST_UUID,
188+
name="1762967705679_AO1600pdp_AO1600_AR_DEIDENTIFIED_STUDYID_20250522120554.csv",
189+
source="MANUAL_UPLOAD",
190+
uploader=uuid.UUID("c8b57138-2529-4e1f-9e89-07399d165f85"),
191+
sst_generated=False,
192+
valid=True,
193+
schemas=["STUDENT"],
194+
created_at=DATETIME_TESTING,
195+
updated_at=DATETIME_TESTING,
196+
)
197+
test_file_course = FileTable(
198+
id=uuid.UUID("d19d0129-96de-464c-98e9-694996965c7b"),
199+
inst_id=TEST_INST_UUID,
200+
name="1762967705683_AO1600pdp_AO1600_COURSE_LEVEL_AR_DEIDENTIFIED_STUDYID_20250522120554.csv",
201+
source="MANUAL_UPLOAD",
202+
uploader=uuid.UUID("c8b57138-2529-4e1f-9e89-07399d165f85"),
203+
sst_generated=False,
204+
valid=True,
205+
schemas=["COURSE"],
206+
created_at=DATETIME_TESTING,
207+
updated_at=DATETIME_TESTING,
208+
)
209+
210+
# Test batch - matches DEV USC Beaufort
211+
test_batch = BatchTable(
212+
id=TEST_BATCH_UUID,
213+
inst_id=TEST_INST_UUID,
214+
name="Batch_2025-11-12_1762967767400",
215+
completed=True,
216+
created_by=uuid.UUID("c8b57138-2529-4e1f-9e89-07399d165f85"),
217+
created_at=DATETIME_TESTING,
218+
updated_at=DATETIME_TESTING,
219+
)
220+
# Associate files with batch
221+
test_batch.files.add(test_file_student)
222+
test_batch.files.add(test_file_course)
223+
session.merge(test_file_student)
224+
session.merge(test_file_course)
225+
session.merge(test_batch)
121226
session.commit()
122227
except Exception as e:
123228
session.rollback()

src/webapp/gcsutil.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,3 +358,28 @@ def get_file_contents(self, bucket_name: str, file_name: str) -> Any:
358358
blob = bucket.blob(file_name)
359359
res = blob.download_as_bytes()
360360
return res
361+
362+
def read_csv_as_dataframe(self, bucket_name: str, file_name: str) -> Any:
363+
"""Read a CSV file from GCS and return as pandas DataFrame.
364+
365+
Args:
366+
bucket_name: GCS bucket name
367+
file_name: Full blob path (e.g., 'validated/filename.csv')
368+
369+
Returns:
370+
pandas DataFrame
371+
372+
Raises:
373+
ValueError: If bucket or file not found
374+
"""
375+
import pandas as pd
376+
377+
storage_client = storage.Client()
378+
bucket = storage_client.get_bucket(bucket_name)
379+
blob = bucket.blob(file_name)
380+
381+
if not blob.exists():
382+
raise ValueError(f"File not found: {file_name}")
383+
384+
with blob.open("r") as fh:
385+
return pd.read_csv(fh)

0 commit comments

Comments
 (0)