As part of our ongoing lifetime alerting discussion we have decided to alert based on the "Cost" column

jimmysway · jimmysway · commit dee51ba3820f · 2026-02-24T10:30:37.000-05:00
I have made the changes to the code as necessary, switching from summing "Balance" to "Cost" instead
diff --git a/process_report/data_tools/costs.py b/process_report/data_tools/costs.py
@@ -14,7 +14,7 @@
 _LIFETIME_COLS = [
     invoice.PROJECT_ID_FIELD,
     invoice.CLUSTER_NAME_FIELD,
-    invoice.BALANCE_FIELD,
+    invoice.COST_FIELD,
 ]
 
 
@@ -140,13 +140,13 @@ def aggregate_by(
 
 
 def calculate_lifetime_costs(**filters: FilterValue) -> pd.DataFrame:
-    """Group invoice data by project and cluster, summing balance per group.
+    """Group invoice data by project and cluster, summing the COST column per group.
 
     Args:
         **filters: Column names as keys, values to filter by. Values must be str, int, or float.
 
     Returns:
-        DataFrame with columns: Project - Allocation, Cluster Name, lifetime_allocation_balance.
+        DataFrame with columns: Project - Allocation, Cluster Name, lifetime_allocation_cost.
 
     Example:
         >>> filters = {invoice.PROJECT_ID_FIELD: "vllm-test"}
@@ -156,7 +156,7 @@ def calculate_lifetime_costs(**filters: FilterValue) -> pd.DataFrame:
     return aggregate_by(
         tuple(_LIFETIME_COLS),
         (invoice.PROJECT_ID_FIELD, invoice.CLUSTER_NAME_FIELD),
-        agg_col=invoice.BALANCE_FIELD,
-        agg_name="lifetime_allocation_balance",
+        agg_col=invoice.COST_FIELD,
+        agg_name="lifetime_allocation_cost",
         **filters,
     )
diff --git a/process_report/tests/unit/data_tools/test_data_tools.py b/process_report/tests/unit/data_tools/test_data_tools.py
@@ -6,7 +6,7 @@
 # These are the column names in the iceberg table using string literals instead of the invoice module to test column name correctness
 PID = "Project - Allocation ID"
 CLUSTER = "Cluster Name"
-BALANCE = "Balance"
+COST = "Cost"
 
 
 @pytest.fixture(autouse=True)
@@ -22,7 +22,7 @@ def sample_invoice_dataframe() -> pd.DataFrame:
         {
             PID: ["vllm-test", "vllm-test", "webrca-1b021a"],
             CLUSTER: ["ocp-test", "ocp-test", "ocp-prod"],
-            BALANCE: [1.234, 2.345, None],
+            COST: [1.234, 2.345, None],
         }
     )
 
@@ -58,15 +58,15 @@ def _fake_loader(cols=None, **filters):
     monkeypatch.setattr(costs, "get_invoice_dataframe", _fake_loader)
 
     result = costs.aggregate_by(
-        (BALANCE,),
+        (COST,),
         (PID, CLUSTER),
-        agg_col=BALANCE,
+        agg_col=COST,
         agg_name="lifetime_allocation_balance",
         **{PID: "vllm-test"},
     )
 
     assert captured["filters"] == {PID: "vllm-test"}
-    assert captured["cols"] == (BALANCE, PID, CLUSTER)
+    assert captured["cols"] == (COST, PID, CLUSTER)
 
     values = sorted(result["lifetime_allocation_balance"].tolist())
     assert values == [costs.Decimal("0.00"), costs.Decimal("3.58")]
@@ -77,7 +77,7 @@ def test_group_and_sum_is_pure_transform(sample_invoice_dataframe: pd.DataFrame)
     result = costs.group_and_sum(
         sample_invoice_dataframe,
         (PID, CLUSTER),
-        agg_col=BALANCE,
+        agg_col=COST,
         agg_name="lifetime_allocation_balance",
     )
 
@@ -97,7 +97,7 @@ def test_group_and_sum_is_pure_transform(sample_invoice_dataframe: pd.DataFrame)
 def test_calculate_lifetime_costs_invalid_queries_return_empty(
     monkeypatch: pytest.MonkeyPatch, invalid_filters: dict[str, str]
 ):
-    empty_df = pd.DataFrame(columns=[PID, CLUSTER, BALANCE])
+    empty_df = pd.DataFrame(columns=[PID, CLUSTER, COST])
     monkeypatch.setattr(costs, "get_invoice_dataframe", lambda cols=None, **f: empty_df)
 
     result = costs.calculate_lifetime_costs(**invalid_filters)
@@ -125,18 +125,18 @@ def to_pandas(self):
 def test_get_invoice_dataframe_warns_when_no_rows_match(
     monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
 ):
-    table = _FakeIcebergTable(pd.DataFrame(columns=[PID, BALANCE]))
+    table = _FakeIcebergTable(pd.DataFrame(columns=[PID, COST]))
     monkeypatch.setattr(costs, "get_table", lambda: table)
 
     with caplog.at_level("WARNING", logger=costs.__name__):
-        result = costs.get_invoice_dataframe((PID, BALANCE), **{PID: "does-not-exist"})
+        result = costs.get_invoice_dataframe((PID, COST), **{PID: "does-not-exist"})
 
     assert result.empty
     assert "No invoice rows matched filters" in caplog.text
 
 
 def test_get_invoice_dataframe_caches_repeated_query(monkeypatch: pytest.MonkeyPatch):
-    table = _FakeIcebergTable(pd.DataFrame({PID: ["vllm-test"], BALANCE: [1.0]}))
+    table = _FakeIcebergTable(pd.DataFrame({PID: ["vllm-test"], COST: [1.0]}))
     call_counter = {"count": 0}
 
     def _fake_get_table():
@@ -145,8 +145,8 @@ def _fake_get_table():
 
     monkeypatch.setattr(costs, "get_table", _fake_get_table)
 
-    first = costs.get_invoice_dataframe((PID, BALANCE), **{PID: "vllm-test"})
-    second = costs.get_invoice_dataframe((PID, BALANCE), **{PID: "vllm-test"})
+    first = costs.get_invoice_dataframe((PID, COST), **{PID: "vllm-test"})
+    second = costs.get_invoice_dataframe((PID, COST), **{PID: "vllm-test"})
 
     assert call_counter["count"] == 1
     assert first is second