Skip to content

Commit dee51ba

Browse files
committed
As part of our ongoing lifetime alerting discussion we have decided to alert based on the "Cost" column
I have made the changes to the code as necessary, switching from summing "Balance" to "Cost" instead
1 parent 7ef0bf9 commit dee51ba

2 files changed

Lines changed: 17 additions & 17 deletions

File tree

process_report/data_tools/costs.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
_LIFETIME_COLS = [
1515
invoice.PROJECT_ID_FIELD,
1616
invoice.CLUSTER_NAME_FIELD,
17-
invoice.BALANCE_FIELD,
17+
invoice.COST_FIELD,
1818
]
1919

2020

@@ -140,13 +140,13 @@ def aggregate_by(
140140

141141

142142
def calculate_lifetime_costs(**filters: FilterValue) -> pd.DataFrame:
143-
"""Group invoice data by project and cluster, summing balance per group.
143+
"""Group invoice data by project and cluster, summing the COST column per group.
144144
145145
Args:
146146
**filters: Column names as keys, values to filter by. Values must be str, int, or float.
147147
148148
Returns:
149-
DataFrame with columns: Project - Allocation, Cluster Name, lifetime_allocation_balance.
149+
DataFrame with columns: Project - Allocation, Cluster Name, lifetime_allocation_cost.
150150
151151
Example:
152152
>>> filters = {invoice.PROJECT_ID_FIELD: "vllm-test"}
@@ -156,7 +156,7 @@ def calculate_lifetime_costs(**filters: FilterValue) -> pd.DataFrame:
156156
return aggregate_by(
157157
tuple(_LIFETIME_COLS),
158158
(invoice.PROJECT_ID_FIELD, invoice.CLUSTER_NAME_FIELD),
159-
agg_col=invoice.BALANCE_FIELD,
160-
agg_name="lifetime_allocation_balance",
159+
agg_col=invoice.COST_FIELD,
160+
agg_name="lifetime_allocation_cost",
161161
**filters,
162162
)

process_report/tests/unit/data_tools/test_data_tools.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# These are the column names in the iceberg table using string literals instead of the invoice module to test column name correctness
77
PID = "Project - Allocation ID"
88
CLUSTER = "Cluster Name"
9-
BALANCE = "Balance"
9+
COST = "Cost"
1010

1111

1212
@pytest.fixture(autouse=True)
@@ -22,7 +22,7 @@ def sample_invoice_dataframe() -> pd.DataFrame:
2222
{
2323
PID: ["vllm-test", "vllm-test", "webrca-1b021a"],
2424
CLUSTER: ["ocp-test", "ocp-test", "ocp-prod"],
25-
BALANCE: [1.234, 2.345, None],
25+
COST: [1.234, 2.345, None],
2626
}
2727
)
2828

@@ -58,15 +58,15 @@ def _fake_loader(cols=None, **filters):
5858
monkeypatch.setattr(costs, "get_invoice_dataframe", _fake_loader)
5959

6060
result = costs.aggregate_by(
61-
(BALANCE,),
61+
(COST,),
6262
(PID, CLUSTER),
63-
agg_col=BALANCE,
63+
agg_col=COST,
6464
agg_name="lifetime_allocation_balance",
6565
**{PID: "vllm-test"},
6666
)
6767

6868
assert captured["filters"] == {PID: "vllm-test"}
69-
assert captured["cols"] == (BALANCE, PID, CLUSTER)
69+
assert captured["cols"] == (COST, PID, CLUSTER)
7070

7171
values = sorted(result["lifetime_allocation_balance"].tolist())
7272
assert values == [costs.Decimal("0.00"), costs.Decimal("3.58")]
@@ -77,7 +77,7 @@ def test_group_and_sum_is_pure_transform(sample_invoice_dataframe: pd.DataFrame)
7777
result = costs.group_and_sum(
7878
sample_invoice_dataframe,
7979
(PID, CLUSTER),
80-
agg_col=BALANCE,
80+
agg_col=COST,
8181
agg_name="lifetime_allocation_balance",
8282
)
8383

@@ -97,7 +97,7 @@ def test_group_and_sum_is_pure_transform(sample_invoice_dataframe: pd.DataFrame)
9797
def test_calculate_lifetime_costs_invalid_queries_return_empty(
9898
monkeypatch: pytest.MonkeyPatch, invalid_filters: dict[str, str]
9999
):
100-
empty_df = pd.DataFrame(columns=[PID, CLUSTER, BALANCE])
100+
empty_df = pd.DataFrame(columns=[PID, CLUSTER, COST])
101101
monkeypatch.setattr(costs, "get_invoice_dataframe", lambda cols=None, **f: empty_df)
102102

103103
result = costs.calculate_lifetime_costs(**invalid_filters)
@@ -125,18 +125,18 @@ def to_pandas(self):
125125
def test_get_invoice_dataframe_warns_when_no_rows_match(
126126
monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
127127
):
128-
table = _FakeIcebergTable(pd.DataFrame(columns=[PID, BALANCE]))
128+
table = _FakeIcebergTable(pd.DataFrame(columns=[PID, COST]))
129129
monkeypatch.setattr(costs, "get_table", lambda: table)
130130

131131
with caplog.at_level("WARNING", logger=costs.__name__):
132-
result = costs.get_invoice_dataframe((PID, BALANCE), **{PID: "does-not-exist"})
132+
result = costs.get_invoice_dataframe((PID, COST), **{PID: "does-not-exist"})
133133

134134
assert result.empty
135135
assert "No invoice rows matched filters" in caplog.text
136136

137137

138138
def test_get_invoice_dataframe_caches_repeated_query(monkeypatch: pytest.MonkeyPatch):
139-
table = _FakeIcebergTable(pd.DataFrame({PID: ["vllm-test"], BALANCE: [1.0]}))
139+
table = _FakeIcebergTable(pd.DataFrame({PID: ["vllm-test"], COST: [1.0]}))
140140
call_counter = {"count": 0}
141141

142142
def _fake_get_table():
@@ -145,8 +145,8 @@ def _fake_get_table():
145145

146146
monkeypatch.setattr(costs, "get_table", _fake_get_table)
147147

148-
first = costs.get_invoice_dataframe((PID, BALANCE), **{PID: "vllm-test"})
149-
second = costs.get_invoice_dataframe((PID, BALANCE), **{PID: "vllm-test"})
148+
first = costs.get_invoice_dataframe((PID, COST), **{PID: "vllm-test"})
149+
second = costs.get_invoice_dataframe((PID, COST), **{PID: "vllm-test"})
150150

151151
assert call_counter["count"] == 1
152152
assert first is second

0 commit comments

Comments
 (0)