Skip to content

Commit b04545c

Browse files
committed
Allow supplemental API data to be passed to ColdfrontFetchProcessor
The logic for what API data files are fetched has changed slightly. If local API data files are provided through `COLDFRONT_API_FILEPATHS`, the processor loads all of them. Seperately, if `keycloak_client_id` is set, also fetches from remote Coldfront server Added unit test and updated e2e test
1 parent 72b6e76 commit b04545c

9 files changed

Lines changed: 148 additions & 17 deletions

File tree

process_report/process_report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def validate_required_env_vars(required_env_vars):
5252
def main():
5353
"""Remove non-billable PIs and projects"""
5454
required_env_vars = []
55-
if not invoice_settings.coldfront_api_filepath:
55+
if not invoice_settings.coldfront_api_filepaths:
5656
required_env_vars.extend(["KEYCLOAK_CLIENT_ID", "KEYCLOAK_CLIENT_SECRET"])
5757
validate_required_env_vars(required_env_vars)
5858

process_report/processors/coldfront_fetch_processor.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sys
33
import functools
44
import logging
5-
import json
5+
import yaml
66
from dataclasses import dataclass, field
77

88
import requests
@@ -32,7 +32,7 @@ class ColdfrontFetchProcessor(processor.Processor):
3232
nonbillable_projects: pandas.DataFrame = field(
3333
default_factory=loader.get_nonbillable_projects
3434
)
35-
coldfront_data_filepath: str = invoice_settings.coldfront_api_filepath
35+
coldfront_data_filepaths: tuple[str] = invoice_settings.coldfront_api_filepaths
3636

3737
@functools.cached_property
3838
def coldfront_client(self):
@@ -84,18 +84,22 @@ def _fetch_coldfront_allocation_api(self):
8484
return r.json()
8585

8686
def _get_coldfront_api_data(self):
87-
if self.coldfront_data_filepath:
88-
logger.info(
89-
f"Using Coldfront data from {self.coldfront_data_filepath} instead of API"
90-
)
91-
with open(self.coldfront_data_filepath, "r") as f:
92-
return json.load(f)
93-
else:
94-
return self._fetch_coldfront_allocation_api()
87+
api_data = []
88+
for api_data_file in self.coldfront_data_filepaths:
89+
logger.info(f"Using Coldfront data from {api_data_file}")
90+
with open(api_data_file) as f:
91+
api_data += yaml.safe_load(f)
92+
93+
if invoice_settings.keycloak_client_id:
94+
logger.info("Loading Coldfront data from remote server")
95+
api_data += self._fetch_coldfront_allocation_api()
96+
97+
return api_data
9598

9699
def _get_allocation_data(self, coldfront_api_data):
97100
"""Returns a mapping of (project ID, cluster name) tupels to a dict of project name, PI name, and institution code."""
98101
allocation_data = {}
102+
duplicate_projects = []
99103
for project_dict in coldfront_api_data:
100104
try:
101105
# Allow allocation to not have institute code
@@ -115,6 +119,8 @@ def _get_allocation_data(self, coldfront_api_data):
115119
project_dict["attributes"].get(CF_ATTR_IS_COURSE, "No").lower()
116120
== "yes"
117121
)
122+
if (project_id, cluster_name) in allocation_data:
123+
duplicate_projects.append((project_id, cluster_name))
118124
allocation_data[(project_id, cluster_name)] = {
119125
invoice.PROJECT_FIELD: project_name,
120126
invoice.PI_FIELD: pi_name,
@@ -125,6 +131,11 @@ def _get_allocation_data(self, coldfront_api_data):
125131
except KeyError:
126132
continue
127133

134+
if duplicate_projects:
135+
raise ValueError(
136+
f"Found allocations matched more than once in API data: {duplicate_projects}"
137+
)
138+
128139
return allocation_data
129140

130141
def _validate_allocation_data(self, allocation_data):

process_report/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
class Settings(BaseSettings):
99
# Coldfront info
10-
coldfront_api_filepath: str | None = None
10+
coldfront_api_filepaths: tuple[str, ...] = ()
1111
keycloak_client_id: str | None = None
1212
keycloak_client_secret: str | None = None
1313

process_report/tests/e2e/test_data/test_PI.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
PI,First Invoice Month,Initial Credits,1st Month Used,2nd Month Used
2+
pi4@harvard.edu,2025-06,1000.00,400.00,0.00
23
pi5@harvard.edu,2025-06,0.00,0.00,0.00
34
pi6@mit.edu,2025-06,0.00,0.00,0.00
45
pi4@example.edu,2025-06,0.00,0.00,0.00

process_report/tests/e2e/test_data/test_invoices/test_NERC OpenShift 2025-04.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ Invoice Month,Project - Allocation,Project - Allocation ID,Manager (PI),Cluster
33
2024-01,P1ID,P1ID,,shift,,,,,280,OpenStack GPUA100SXM4,0.013,100
44
2024-01,P2ID,P2ID,,shift,,,,,280,OpenShift CPU,0.013,200
55
2024-01,P3ID,P3ID,,shift,,,,,280,OpenShift CPU,0.013,300
6+
2024-01,P4-supplement,P4-supplement,,shift,,,,,300,OpenShift CPU,0.013,400
67
2024-01,P9ID,P9ID,,shift,,,,,280,OpenShift CPU,0.013,3000
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
- project:
2+
pi: pi4@harvard.edu
3+
attributes:
4+
Allocated Project ID: P4-supplement
5+
Allocated Project Name: P4-supplement-name
6+
resource:
7+
name: shift

process_report/tests/e2e/test_e2e_pipeline.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import json
23
from pathlib import Path
34
import pandas as pd
45
import pytest
@@ -112,7 +113,14 @@ def _prepare_pipeline_execution(
112113
# Environment setup for subprocess execution
113114
env = os.environ.copy()
114115
env["INVOICE_MONTH"] = INVOICE_MONTH
115-
env["COLDFRONT_API_FILEPATH"] = str(test_files["test_coldfront_api_data.json"])
116+
117+
# pydantic_settings parses complex types as JSON-encoded strings: https://pydantic.dev/docs/validation/latest/concepts/pydantic_settings/#parsing-environment-variable-values
118+
env["COLDFRONT_API_FILEPATHS"] = json.dumps(
119+
(
120+
str(test_files["test_coldfront_api_data.json"]),
121+
str(test_files["test_supplement_api_data.yaml"]),
122+
)
123+
)
116124
env["FETCH_FROM_S3"] = "false"
117125
env["UPLOAD_TO_S3"] = "false"
118126
env["invoice_path_template"] = str(test_files["test_invoice_dir"])

process_report/tests/unit/processors/test_coldfront_fetch_processor.py

Lines changed: 101 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
1-
from unittest import TestCase, mock
1+
from unittest import mock
2+
import yaml
3+
24
import pandas
35
import pytest
46

7+
from process_report import settings
58
from process_report.tests import util as test_utils
9+
from process_report.tests.base import BaseTestCaseWithTempDir
10+
611

12+
class TestColdfrontFetchProcessor(BaseTestCaseWithTempDir):
13+
def setUp(self):
14+
# To trigger fetch from Coldfront
15+
settings.invoice_settings.keycloak_client_id = "foo"
16+
return super().setUp()
717

8-
class TestColdfrontFetchProcessor(TestCase):
918
def _get_test_invoice(
1019
self,
1120
allocation_project_id,
@@ -259,3 +268,93 @@ def test_missing_project_cluster_tuples(self, mock_get_allocation_data):
259268
assert str(cm.value) == (
260269
f"Projects {expected_missing} not found in Coldfront and are billable! Please check the project names"
261270
)
271+
272+
@mock.patch(
273+
"process_report.processors.coldfront_fetch_processor.ColdfrontFetchProcessor._fetch_coldfront_allocation_api",
274+
)
275+
def test_supplement_api_data_used_when_coldfront_missing(
276+
self, mock_get_allocation_data
277+
):
278+
"""Supplement API rows are applied to invoice in _get_allocation_data()."""
279+
mock_get_allocation_data.return_value = self._get_mock_allocation_data(
280+
["P1"],
281+
["PI1"],
282+
["IC1"],
283+
["stack"],
284+
)
285+
286+
# Supplemental data should follow same structure as Coldfront data,
287+
# only missing "Is Course?" and "Institution-Specific Code" fields
288+
supplemental_df = self._get_mock_allocation_data(
289+
["P2"],
290+
["PI2"],
291+
["Delete Institude Code"],
292+
["stack"],
293+
)
294+
del supplemental_df[0]["attributes"]["Institution-Specific Code"]
295+
296+
supplemental_filepath = self.tempdir / "supplement.yaml"
297+
with open(supplemental_filepath, "w") as f:
298+
yaml.safe_dump(supplemental_df, f)
299+
300+
test_invoice = self._get_test_invoice(
301+
["P1", "P2"], cluster_name=["stack", "stack"]
302+
)
303+
304+
expected_invoice = self._get_test_invoice(
305+
["P1", "P2"],
306+
["P1-name", "P2-name"],
307+
["PI1", "PI2"],
308+
["IC1", "N/A"],
309+
["stack", "stack"],
310+
[False, False],
311+
)
312+
313+
test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
314+
data=test_invoice, coldfront_data_filepaths=[supplemental_filepath]
315+
)
316+
test_coldfront_fetch_proc.process()
317+
output_invoice = test_coldfront_fetch_proc.data
318+
319+
assert output_invoice.equals(expected_invoice)
320+
321+
@mock.patch(
322+
"process_report.processors.coldfront_fetch_processor.ColdfrontFetchProcessor._fetch_coldfront_allocation_api",
323+
)
324+
def test_duplicate_allocation_cluster_in_api_data(self, mock_get_allocation_data):
325+
"""Test that a ValueError is raised when API data contains duplicate (allocation, cluster) pairs."""
326+
mock_data = [
327+
{
328+
"resource": {"name": "stack"},
329+
"project": {"pi": "PI1"},
330+
"attributes": {
331+
"Allocated Project ID": "P1",
332+
"Allocated Project Name": "P1-name",
333+
"Institution-Specific Code": "IC1",
334+
},
335+
},
336+
{
337+
"resource": {"name": "stack"},
338+
"project": {"pi": "PI1"},
339+
"attributes": {
340+
"Allocated Project ID": "P1",
341+
"Allocated Project Name": "P1-name",
342+
"Institution-Specific Code": "IC1",
343+
},
344+
},
345+
]
346+
mock_get_allocation_data.return_value = mock_data
347+
348+
test_invoice = self._get_test_invoice(["P1"], cluster_name=["stack"])
349+
350+
test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
351+
data=test_invoice
352+
)
353+
354+
with pytest.raises(ValueError) as cm:
355+
test_coldfront_fetch_proc.process()
356+
357+
assert (
358+
str(cm.value)
359+
== "Found allocations matched more than once in API data: [('P1', 'stack')]"
360+
)

process_report/tests/util.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def new_coldfront_fetch_processor(
6262
invoice_month="0000-00",
6363
data=None,
6464
nonbillable_projects=None,
65-
coldfront_data_filepath=None,
65+
coldfront_data_filepaths=(),
6666
):
6767
if data is None:
6868
data = pandas.DataFrame()
@@ -71,7 +71,11 @@ def new_coldfront_fetch_processor(
7171
columns=["Project Name", "Cluster", "Is Timed", "Is Billable Override"]
7272
)
7373
return coldfront_fetch_processor.ColdfrontFetchProcessor(
74-
invoice_month, data, name, nonbillable_projects, coldfront_data_filepath
74+
invoice_month,
75+
data,
76+
name,
77+
nonbillable_projects,
78+
coldfront_data_filepaths,
7579
)
7680

7781

0 commit comments

Comments
 (0)