Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 9 additions & 55 deletions process_report/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from process_report import util
from process_report.settings import invoice_settings
from process_report.invoices import invoice
from process_report.models.nonbillable_models import ExcludedProjectList, PIList

# List of service invoices processed by pipeline. Change if new services are added.
# Cannot simply filter by suffix because S3 can't do it
Expand Down Expand Up @@ -120,27 +121,18 @@ def load_prepay_credits(self) -> pandas.DataFrame:
)

@functools.lru_cache
def _load_pi_config(self, filepath: str) -> list[dict]:
with open(filepath) as file:
def _load_pi_config(self) -> PIList:
with open(invoice_settings.nonbillable_pis_filepath) as file:
pi_list = yaml.safe_load(file)

if not isinstance(pi_list, list):
raise ValueError("pi.yaml must contain a YAML list")

return pi_list
return PIList.model_validate(pi_list)

def get_nonbillable_pis(self) -> list[str]:
pi_list = self._load_pi_config(invoice_settings.nonbillable_pis_filepath)
return [pi["username"] for pi in pi_list if "non_billed_su_types" not in pi]
return self._load_pi_config().get_nonbillable_pis()

def get_pi_non_billed_su_types(self) -> dict[str, list[str]]:
"""PI usernames -> list of SU types that receive credit (zeroed out)."""
pi_list = self._load_pi_config(invoice_settings.nonbillable_pis_filepath)
return {
pi["username"]: [su["name"] for su in pi["non_billed_su_types"]]
for pi in pi_list
if "non_billed_su_types" in pi
}
return self._load_pi_config().get_pi_non_billed_su_types()

@functools.lru_cache
def get_nonbillable_projects(self) -> pandas.DataFrame:
Expand All @@ -154,48 +146,10 @@ def get_nonbillable_projects(self) -> pandas.DataFrame:
indicating whether matching projects should be treated as billable
"""

def _is_in_time_range(timed_object) -> bool:
# Leveraging inherent lexicographical order of YYYY-MM strings
return (
timed_object["start"] <= invoice_settings.invoice_month
and invoice_settings.invoice_month <= timed_object["end"]
)

project_list = []
with open(invoice_settings.nonbillable_projects_filepath) as file:
projects_dict = yaml.safe_load(file)

for project in projects_dict:
project_name = project["name"]
cluster_list = project.get("clusters")
is_billable = project.get("is_billable", False)

if project.get("start"):
if not _is_in_time_range(project):
continue

if cluster_list:
for cluster in cluster_list:
project_list.append(
(project_name, cluster["name"], True, is_billable)
)
else:
project_list.append((project_name, None, True, is_billable))
elif cluster_list:
for cluster in cluster_list:
cluster_start_time = cluster.get("start")
if cluster_start_time:
if _is_in_time_range(cluster):
project_list.append(
(project_name, cluster["name"], True, is_billable)
)
elif not cluster_start_time:
project_list.append(
(project_name, cluster["name"], False, is_billable)
)
else:
project_list.append((project_name, None, False, is_billable))

data = yaml.safe_load(file)
projects = ExcludedProjectList.model_validate(data)
project_list = projects.get_nonbillable_projects(invoice_settings.invoice_month)
return pandas.DataFrame(
project_list,
columns=[
Expand Down
6 changes: 6 additions & 0 deletions process_report/models/cluster_names.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ocp-prod
virt
ocp-test
stack
academic
bm
168 changes: 168 additions & 0 deletions process_report/models/nonbillable_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import datetime
import pydantic
from typing import Annotated, TypeVar
from functools import lru_cache
from pathlib import Path

_MODELS_DIR = Path(__file__).parent


@lru_cache
def get_allowed_clusters() -> set[str]:
with open(_MODELS_DIR / "cluster_names.txt") as f:
return set(f.read().strip().split("\n"))


@lru_cache
def get_allowed_su_types() -> set[str]:
with open(_MODELS_DIR / "su_types.txt") as f:
return set(f.read().strip().split("\n"))


def validate_date(v: str) -> datetime.date:
return datetime.datetime.strptime(v, "%Y-%m").date()


DateField = Annotated[datetime.date, pydantic.BeforeValidator(validate_date)]


class NamedObject(pydantic.BaseModel):
name: str


T = TypeVar("T", bound=NamedObject)


class UniqueObjectList(pydantic.RootModel[list[T]]):
root: list[T]

@pydantic.model_validator(mode="after")
def validate_unique_names(self):
seen: set[str] = set()
for item in self.root:
if item.name in seen:
raise ValueError(f"{item.name}: found duplicate name")
seen.add(item.name)

return self


class ExcludedCluster(NamedObject):
start: DateField | None = None
end: DateField | None = None
reason: str | None = None

@pydantic.field_validator("name")
def only_allowed_cluster_names(cls, v):
allowed = get_allowed_clusters()
if v not in allowed:
raise ValueError(f"'{v}' is not a valid cluster name")
return v


ExcludedClusterList = UniqueObjectList[ExcludedCluster]


class ExcludedProject(NamedObject):
clusters: ExcludedClusterList = ExcludedClusterList([])
start: DateField | None = None
end: DateField | None = None
reason: str | None = None
is_billable: bool = False

@pydantic.model_validator(mode="after")
def validate_time_periods(self):
def is_date_range_valid(
start: datetime.date | None, end: datetime.date | None
) -> bool:
if start and end:
if end < start:
raise ValueError(
f"{self.name}: End date must be after start date for project"
)
elif start or end:
raise ValueError(
f"{self.name}: Start and end dates must be provided together or not at all"
)
return True

is_date_range_valid(self.start, self.end)
if self.clusters:
for excluded_cluster in self.clusters.root:
is_date_range_valid(excluded_cluster.start, excluded_cluster.end)

return self


class NonBilledSUType(NamedObject):
@pydantic.field_validator("name")
def only_allowed_su_types(cls, v):
allowed = get_allowed_su_types()
if v not in allowed:
raise ValueError(f"'{v}' is not a valid SU type")
return v


NonBilledSUTypeList = UniqueObjectList[NonBilledSUType]


class PIParticipant(pydantic.BaseModel):
name: str = pydantic.Field(alias="username")
non_billed_su_types: NonBilledSUTypeList | None = None

model_config = pydantic.ConfigDict(populate_by_name=True)


class PIList(UniqueObjectList[PIParticipant]):
def get_nonbillable_pis(self) -> list[str]:
return [pi.name for pi in self.root if pi.non_billed_su_types is None]

def get_pi_non_billed_su_types(self) -> dict[str, list[str]]:
return {
pi.name: [su.name for su in pi.non_billed_su_types.root]
for pi in self.root
if pi.non_billed_su_types is not None
}


class ExcludedProjectList(UniqueObjectList[ExcludedProject]):
def get_nonbillable_projects(
self, invoice_month: str
) -> list[tuple[str, str | None, bool, bool]]:
invoice_date = datetime.datetime.strptime(invoice_month, "%Y-%m").date()

def _is_in_time_range(start: datetime.date, end: datetime.date) -> bool:
return start <= invoice_date <= end

project_list = []

for project in self.root:
project_name = project.name
cluster_list = project.clusters.root
is_billable = project.is_billable

if project.start:
if not _is_in_time_range(project.start, project.end):
continue
if cluster_list:
for cluster in cluster_list:
project_list.append(
(project_name, cluster.name, True, is_billable)
)
else:
project_list.append((project_name, None, True, is_billable))
elif cluster_list:
for cluster in cluster_list:
if cluster.start:
if _is_in_time_range(cluster.start, cluster.end):
project_list.append(
(project_name, cluster.name, True, is_billable)
)
else:
project_list.append(
(project_name, cluster.name, False, is_billable)
)
else:
project_list.append((project_name, None, False, is_billable))

return project_list
22 changes: 22 additions & 0 deletions process_report/models/su_types.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
OpenStack GPUK80
OpenShift Unknown GPU
BM FC430
OpenStack GPUA100SXM4
OpenStack GPUV100
OpenStack Storage
OpenStack GPUA100
OpenStack Object Storage
OpenStack Volume Storage
OpenShift GPUV100
OpenShift GPUH100
OpenShift NESE Storage
BM FC830
ESI GPUH100SXM5
BM GPUH100SXM5
BM R740XDAMD
OpenShift GPUA100SXM4
OpenShift CPU
ESI A100SXM4
OpenShift Storage
BM GPUA100SXM4
OpenStack CPU
2 changes: 1 addition & 1 deletion process_report/tests/e2e/test_data/test_pi.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
- username: PI9
- username: pi2@harvard.edu
non_billed_su_types:
- name: Free CPU
- name: OpenStack CPU
1 change: 1 addition & 0 deletions process_report/tests/test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pytest
pytest-mock
coverage
Loading
Loading