CCI-MOC · marcoagonzales007 · Apr 16, 2026 · Jun 11, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/process_report/loader.py b/process_report/loader.py
@@ -9,6 +9,7 @@
 from process_report import util
 from process_report.settings import invoice_settings
 from process_report.invoices import invoice
+from process_report.models.nonbillable_models import ExcludedProjectList, PIList
 
 # List of service invoices processed by pipeline. Change if new services are added.
 # Cannot simply filter by suffix because S3 can't do it
@@ -120,27 +121,18 @@ def load_prepay_credits(self) -> pandas.DataFrame:
         )
 
     @functools.lru_cache
-    def _load_pi_config(self, filepath: str) -> list[dict]:
-        with open(filepath) as file:
+    def _load_pi_config(self) -> PIList:
+        with open(invoice_settings.nonbillable_pis_filepath) as file:
             pi_list = yaml.safe_load(file)
 
-        if not isinstance(pi_list, list):
-            raise ValueError("pi.yaml must contain a YAML list")
-
-        return pi_list
+        return PIList.model_validate(pi_list)
 
     def get_nonbillable_pis(self) -> list[str]:
-        pi_list = self._load_pi_config(invoice_settings.nonbillable_pis_filepath)
-        return [pi["username"] for pi in pi_list if "non_billed_su_types" not in pi]
+        return self._load_pi_config().get_nonbillable_pis()
 
     def get_pi_non_billed_su_types(self) -> dict[str, list[str]]:
         """PI usernames -> list of SU types that receive credit (zeroed out)."""
-        pi_list = self._load_pi_config(invoice_settings.nonbillable_pis_filepath)
-        return {
-            pi["username"]: [su["name"] for su in pi["non_billed_su_types"]]
-            for pi in pi_list
-            if "non_billed_su_types" in pi
-        }
+        return self._load_pi_config().get_pi_non_billed_su_types()
 
     @functools.lru_cache
     def get_nonbillable_projects(self) -> pandas.DataFrame:
@@ -154,48 +146,10 @@ def get_nonbillable_projects(self) -> pandas.DataFrame:
            indicating whether matching projects should be treated as billable
         """
 
-        def _is_in_time_range(timed_object) -> bool:
-            # Leveraging inherent lexicographical order of YYYY-MM strings
-            return (
-                timed_object["start"] <= invoice_settings.invoice_month
-                and invoice_settings.invoice_month <= timed_object["end"]
-            )
-
-        project_list = []
         with open(invoice_settings.nonbillable_projects_filepath) as file:
-            projects_dict = yaml.safe_load(file)
-
-        for project in projects_dict:
-            project_name = project["name"]
-            cluster_list = project.get("clusters")
-            is_billable = project.get("is_billable", False)
-
-            if project.get("start"):
-                if not _is_in_time_range(project):
-                    continue
-
-                if cluster_list:
-                    for cluster in cluster_list:
-                        project_list.append(
-                            (project_name, cluster["name"], True, is_billable)
-                        )
-                else:
-                    project_list.append((project_name, None, True, is_billable))
-            elif cluster_list:
-                for cluster in cluster_list:
-                    cluster_start_time = cluster.get("start")
-                    if cluster_start_time:
-                        if _is_in_time_range(cluster):
-                            project_list.append(
-                                (project_name, cluster["name"], True, is_billable)
-                            )
-                    elif not cluster_start_time:
-                        project_list.append(
-                            (project_name, cluster["name"], False, is_billable)
-                        )
-            else:
-                project_list.append((project_name, None, False, is_billable))
-
+            data = yaml.safe_load(file)
+        projects = ExcludedProjectList.model_validate(data)
+        project_list = projects.get_nonbillable_projects(invoice_settings.invoice_month)
         return pandas.DataFrame(
             project_list,
             columns=[

diff --git a/process_report/models/cluster_names.txt b/process_report/models/cluster_names.txt
@@ -0,0 +1,6 @@
+ocp-prod
+virt
+ocp-test
+stack
+academic
+bm
diff --git a/process_report/models/nonbillable_models.py b/process_report/models/nonbillable_models.py
@@ -0,0 +1,168 @@
+import datetime
+import pydantic
+from typing import Annotated, TypeVar
+from functools import lru_cache
+from pathlib import Path
+
+_MODELS_DIR = Path(__file__).parent
+
+
+@lru_cache
+def get_allowed_clusters() -> set[str]:
+    with open(_MODELS_DIR / "cluster_names.txt") as f:
+        return set(f.read().strip().split("\n"))
+
+
+@lru_cache
+def get_allowed_su_types() -> set[str]:
+    with open(_MODELS_DIR / "su_types.txt") as f:
+        return set(f.read().strip().split("\n"))
+
+
+def validate_date(v: str) -> datetime.date:
+    return datetime.datetime.strptime(v, "%Y-%m").date()
+
+
+DateField = Annotated[datetime.date, pydantic.BeforeValidator(validate_date)]
+
+
+class NamedObject(pydantic.BaseModel):
+    name: str
+
+
+T = TypeVar("T", bound=NamedObject)
+
+
+class UniqueObjectList(pydantic.RootModel[list[T]]):
+    root: list[T]
+
+    @pydantic.model_validator(mode="after")
+    def validate_unique_names(self):
+        seen: set[str] = set()
+        for item in self.root:
+            if item.name in seen:
+                raise ValueError(f"{item.name}: found duplicate name")
+            seen.add(item.name)
+
+        return self
+
+
+class ExcludedCluster(NamedObject):
+    start: DateField | None = None
+    end: DateField | None = None
+    reason: str | None = None
+
+    @pydantic.field_validator("name")
+    def only_allowed_cluster_names(cls, v):
+        allowed = get_allowed_clusters()
+        if v not in allowed:
+            raise ValueError(f"'{v}' is not a valid cluster name")
+        return v
+
+
+ExcludedClusterList = UniqueObjectList[ExcludedCluster]
+
+
+class ExcludedProject(NamedObject):
+    clusters: ExcludedClusterList = ExcludedClusterList([])
+    start: DateField | None = None
+    end: DateField | None = None
+    reason: str | None = None
+    is_billable: bool = False
+
+    @pydantic.model_validator(mode="after")
+    def validate_time_periods(self):
+        def is_date_range_valid(
+            start: datetime.date | None, end: datetime.date | None
+        ) -> bool:
+            if start and end:
+                if end < start:
+                    raise ValueError(
+                        f"{self.name}: End date must be after start date for project"
+                    )
+            elif start or end:
+                raise ValueError(
+                    f"{self.name}: Start and end dates must be provided together or not at all"
+                )
+            return True
+
+        is_date_range_valid(self.start, self.end)
+        if self.clusters:
+            for excluded_cluster in self.clusters.root:
+                is_date_range_valid(excluded_cluster.start, excluded_cluster.end)
+
+        return self
+
+
+class NonBilledSUType(NamedObject):
+    @pydantic.field_validator("name")
+    def only_allowed_su_types(cls, v):
+        allowed = get_allowed_su_types()
+        if v not in allowed:
+            raise ValueError(f"'{v}' is not a valid SU type")
+        return v
+
+
+NonBilledSUTypeList = UniqueObjectList[NonBilledSUType]
+
+
+class PIParticipant(pydantic.BaseModel):
+    name: str = pydantic.Field(alias="username")
+    non_billed_su_types: NonBilledSUTypeList | None = None
+
+    model_config = pydantic.ConfigDict(populate_by_name=True)
+
+
+class PIList(UniqueObjectList[PIParticipant]):
+    def get_nonbillable_pis(self) -> list[str]:
+        return [pi.name for pi in self.root if pi.non_billed_su_types is None]
+
+    def get_pi_non_billed_su_types(self) -> dict[str, list[str]]:
+        return {
+            pi.name: [su.name for su in pi.non_billed_su_types.root]
+            for pi in self.root
+            if pi.non_billed_su_types is not None
+        }
+
+
+class ExcludedProjectList(UniqueObjectList[ExcludedProject]):
+    def get_nonbillable_projects(
+        self, invoice_month: str
+    ) -> list[tuple[str, str | None, bool, bool]]:
+        invoice_date = datetime.datetime.strptime(invoice_month, "%Y-%m").date()
+
+        def _is_in_time_range(start: datetime.date, end: datetime.date) -> bool:
+            return start <= invoice_date <= end
+
+        project_list = []
+
+        for project in self.root:
+            project_name = project.name
+            cluster_list = project.clusters.root
+            is_billable = project.is_billable
+
+            if project.start:
+                if not _is_in_time_range(project.start, project.end):
+                    continue
+                if cluster_list:
+                    for cluster in cluster_list:
+                        project_list.append(
+                            (project_name, cluster.name, True, is_billable)
+                        )
+                else:
+                    project_list.append((project_name, None, True, is_billable))
+            elif cluster_list:
+                for cluster in cluster_list:
+                    if cluster.start:
+                        if _is_in_time_range(cluster.start, cluster.end):
+                            project_list.append(
+                                (project_name, cluster.name, True, is_billable)
+                            )
+                    else:
+                        project_list.append(
+                            (project_name, cluster.name, False, is_billable)
+                        )
+            else:
+                project_list.append((project_name, None, False, is_billable))
+
+        return project_list
diff --git a/process_report/models/su_types.txt b/process_report/models/su_types.txt
@@ -0,0 +1,22 @@
+OpenStack GPUK80
+OpenShift Unknown GPU
+BM FC430
+OpenStack GPUA100SXM4
+OpenStack GPUV100
+OpenStack Storage
+OpenStack GPUA100
+OpenStack Object Storage
+OpenStack Volume Storage
+OpenShift GPUV100
+OpenShift GPUH100
+OpenShift NESE Storage
+BM FC830
+ESI GPUH100SXM5
+BM GPUH100SXM5
+BM R740XDAMD
+OpenShift GPUA100SXM4
+OpenShift CPU
+ESI A100SXM4
+OpenShift Storage
+BM GPUA100SXM4
+OpenStack CPU
diff --git a/process_report/tests/e2e/test_data/test_pi.yaml b/process_report/tests/e2e/test_data/test_pi.yaml
@@ -1,4 +1,4 @@
 - username: PI9
 - username: pi2@harvard.edu
   non_billed_su_types:
-    - name: Free CPU
+    - name: OpenStack CPU
diff --git a/process_report/tests/test-requirements.txt b/process_report/tests/test-requirements.txt
@@ -1,2 +1,3 @@
 pytest
+pytest-mock
 coverage