Skip to content

Commit f2bdb8d

Browse files
Move nonbillable pydantic models from invoicing-private-data
1 parent 72b6e76 commit f2bdb8d

8 files changed

Lines changed: 411 additions & 136 deletions

File tree

process_report/loader.py

Lines changed: 9 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from process_report import util
1010
from process_report.settings import invoice_settings
1111
from process_report.invoices import invoice
12+
from process_report.models.nonbillable_models import ExcludedProjectList, PIList
1213

1314
# List of service invoices processed by pipeline. Change if new services are added.
1415
# Cannot simply filter by suffix because S3 can't do it
@@ -113,27 +114,18 @@ def load_dataframe(self, filepath: str) -> pandas.DataFrame:
113114
return pandas.read_csv(filepath)
114115

115116
@functools.lru_cache
116-
def _load_pi_config(self, filepath: str) -> list[dict]:
117-
with open(filepath) as file:
117+
def _load_pi_config(self) -> PIList:
118+
with open(invoice_settings.nonbillable_pis_filepath) as file:
118119
pi_list = yaml.safe_load(file)
119120

120-
if not isinstance(pi_list, list):
121-
raise ValueError("pi.yaml must contain a YAML list")
122-
123-
return pi_list
121+
return PIList.model_validate(pi_list)
124122

125123
def get_nonbillable_pis(self) -> list[str]:
126-
pi_list = self._load_pi_config(invoice_settings.nonbillable_pis_filepath)
127-
return [pi["username"] for pi in pi_list if "non_billed_su_types" not in pi]
124+
return self._load_pi_config().get_nonbillable_pis()
128125

129126
def get_pi_non_billed_su_types(self) -> dict[str, list[str]]:
130127
"""PI usernames -> list of SU types that receive credit (zeroed out)."""
131-
pi_list = self._load_pi_config(invoice_settings.nonbillable_pis_filepath)
132-
return {
133-
pi["username"]: [su["name"] for su in pi["non_billed_su_types"]]
134-
for pi in pi_list
135-
if "non_billed_su_types" in pi
136-
}
128+
return self._load_pi_config().get_pi_non_billed_su_types()
137129

138130
@functools.lru_cache
139131
def get_nonbillable_projects(self) -> pandas.DataFrame:
@@ -147,48 +139,10 @@ def get_nonbillable_projects(self) -> pandas.DataFrame:
147139
indicating whether matching projects should be treated as billable
148140
"""
149141

150-
def _is_in_time_range(timed_object) -> bool:
151-
# Leveraging inherent lexicographical order of YYYY-MM strings
152-
return (
153-
timed_object["start"] <= invoice_settings.invoice_month
154-
and invoice_settings.invoice_month <= timed_object["end"]
155-
)
156-
157-
project_list = []
158142
with open(invoice_settings.nonbillable_projects_filepath) as file:
159-
projects_dict = yaml.safe_load(file)
160-
161-
for project in projects_dict:
162-
project_name = project["name"]
163-
cluster_list = project.get("clusters")
164-
is_billable = project.get("is_billable", False)
165-
166-
if project.get("start"):
167-
if not _is_in_time_range(project):
168-
continue
169-
170-
if cluster_list:
171-
for cluster in cluster_list:
172-
project_list.append(
173-
(project_name, cluster["name"], True, is_billable)
174-
)
175-
else:
176-
project_list.append((project_name, None, True, is_billable))
177-
elif cluster_list:
178-
for cluster in cluster_list:
179-
cluster_start_time = cluster.get("start")
180-
if cluster_start_time:
181-
if _is_in_time_range(cluster):
182-
project_list.append(
183-
(project_name, cluster["name"], True, is_billable)
184-
)
185-
elif not cluster_start_time:
186-
project_list.append(
187-
(project_name, cluster["name"], False, is_billable)
188-
)
189-
else:
190-
project_list.append((project_name, None, False, is_billable))
191-
143+
data = yaml.safe_load(file)
144+
projects = ExcludedProjectList.model_validate(data)
145+
project_list = projects.get_nonbillable_projects(invoice_settings.invoice_month)
192146
return pandas.DataFrame(
193147
project_list,
194148
columns=[
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
ocp-prod
2+
virt
3+
ocp-test
4+
stack
5+
academic
6+
bm
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import datetime
2+
import pydantic
3+
from typing import Annotated, TypeVar
4+
from functools import lru_cache
5+
from pathlib import Path
6+
7+
_MODELS_DIR = Path(__file__).parent
8+
9+
10+
@lru_cache
11+
def get_allowed_clusters() -> set[str]:
12+
with open(_MODELS_DIR / "cluster_names.txt") as f:
13+
return set(f.read().strip().split("\n"))
14+
15+
16+
@lru_cache
17+
def get_allowed_su_types() -> set[str]:
18+
with open(_MODELS_DIR / "su_types.txt") as f:
19+
return set(f.read().strip().split("\n"))
20+
21+
22+
def validate_date(v: str) -> datetime.date:
23+
return datetime.datetime.strptime(v, "%Y-%m").date()
24+
25+
26+
DateField = Annotated[datetime.date, pydantic.BeforeValidator(validate_date)]
27+
28+
29+
class NamedObject(pydantic.BaseModel):
30+
name: str
31+
32+
33+
T = TypeVar("T", bound=NamedObject)
34+
35+
36+
class UniqueObjectList(pydantic.RootModel[list[T]]):
37+
root: list[T]
38+
39+
@pydantic.model_validator(mode="after")
40+
def validate_unique_names(self):
41+
seen: set[str] = set()
42+
for item in self.root:
43+
if item.name in seen:
44+
raise ValueError(f"{item.name}: found duplicate name")
45+
seen.add(item.name)
46+
47+
return self
48+
49+
50+
class ExcludedCluster(NamedObject):
51+
start: DateField | None = None
52+
end: DateField | None = None
53+
reason: str | None = None
54+
55+
@pydantic.field_validator("name")
56+
def only_allowed_cluster_names(cls, v):
57+
allowed = get_allowed_clusters()
58+
if v not in allowed:
59+
raise ValueError(f"'{v}' is not a valid cluster name")
60+
return v
61+
62+
63+
ExcludedClusterList = UniqueObjectList[ExcludedCluster]
64+
65+
66+
class ExcludedProject(NamedObject):
67+
clusters: ExcludedClusterList = ExcludedClusterList([])
68+
start: DateField | None = None
69+
end: DateField | None = None
70+
reason: str | None = None
71+
is_billable: bool = False
72+
73+
@pydantic.model_validator(mode="after")
74+
def validate_time_periods(self):
75+
def is_date_range_valid(
76+
start: datetime.date | None, end: datetime.date | None
77+
) -> bool:
78+
if start and end:
79+
if end < start:
80+
raise ValueError(
81+
f"{self.name}: End date must be after start date for project"
82+
)
83+
elif start or end:
84+
raise ValueError(
85+
f"{self.name}: Start and end dates must be provided together or not at all"
86+
)
87+
return True
88+
89+
is_date_range_valid(self.start, self.end)
90+
if self.clusters:
91+
for excluded_cluster in self.clusters.root:
92+
is_date_range_valid(excluded_cluster.start, excluded_cluster.end)
93+
94+
return self
95+
96+
97+
class NonBilledSUType(NamedObject):
98+
@pydantic.field_validator("name")
99+
def only_allowed_su_types(cls, v):
100+
allowed = get_allowed_su_types()
101+
if v not in allowed:
102+
raise ValueError(f"'{v}' is not a valid SU type")
103+
return v
104+
105+
106+
NonBilledSUTypeList = UniqueObjectList[NonBilledSUType]
107+
108+
109+
class PIParticipant(pydantic.BaseModel):
110+
name: str = pydantic.Field(alias="username")
111+
non_billed_su_types: NonBilledSUTypeList | None = None
112+
113+
model_config = pydantic.ConfigDict(populate_by_name=True)
114+
115+
116+
class PIList(UniqueObjectList[PIParticipant]):
117+
def get_nonbillable_pis(self) -> list[str]:
118+
return [pi.name for pi in self.root if pi.non_billed_su_types is None]
119+
120+
def get_pi_non_billed_su_types(self) -> dict[str, list[str]]:
121+
return {
122+
pi.name: [su.name for su in pi.non_billed_su_types.root]
123+
for pi in self.root
124+
if pi.non_billed_su_types is not None
125+
}
126+
127+
128+
class ExcludedProjectList(UniqueObjectList[ExcludedProject]):
129+
def get_nonbillable_projects(
130+
self, invoice_month: str
131+
) -> list[tuple[str, str | None, bool, bool]]:
132+
invoice_date = datetime.datetime.strptime(invoice_month, "%Y-%m").date()
133+
134+
def _is_in_time_range(start: datetime.date, end: datetime.date) -> bool:
135+
return start <= invoice_date <= end
136+
137+
project_list = []
138+
139+
for project in self.root:
140+
project_name = project.name
141+
cluster_list = project.clusters.root
142+
is_billable = project.is_billable
143+
144+
if project.start:
145+
if not _is_in_time_range(project.start, project.end):
146+
continue
147+
if cluster_list:
148+
for cluster in cluster_list:
149+
project_list.append(
150+
(project_name, cluster.name, True, is_billable)
151+
)
152+
else:
153+
project_list.append((project_name, None, True, is_billable))
154+
elif cluster_list:
155+
for cluster in cluster_list:
156+
if cluster.start:
157+
if _is_in_time_range(cluster.start, cluster.end):
158+
project_list.append(
159+
(project_name, cluster.name, True, is_billable)
160+
)
161+
else:
162+
project_list.append(
163+
(project_name, cluster.name, False, is_billable)
164+
)
165+
else:
166+
project_list.append((project_name, None, False, is_billable))
167+
168+
return project_list

process_report/models/su_types.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
OpenStack GPUK80
2+
OpenShift Unknown GPU
3+
BM FC430
4+
OpenStack GPUA100SXM4
5+
OpenStack GPUV100
6+
OpenStack Storage
7+
OpenStack GPUA100
8+
OpenStack Object Storage
9+
OpenStack Volume Storage
10+
OpenShift GPUV100
11+
OpenShift GPUH100
12+
OpenShift NESE Storage
13+
BM FC830
14+
ESI GPUH100SXM5
15+
BM GPUH100SXM5
16+
BM R740XDAMD
17+
OpenShift GPUA100SXM4
18+
OpenShift CPU
19+
ESI A100SXM4
20+
OpenShift Storage
21+
BM GPUA100SXM4
22+
OpenStack CPU
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
- username: PI9
22
- username: PI10
33
non_billed_su_types:
4-
- name: SU1
4+
- name: OpenStack CPU
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
pytest
2+
pytest-mock

0 commit comments

Comments
 (0)