diff --git a/process_report/invoices/bm_invoice.py b/process_report/invoices/bm_invoice.py new file mode 100644 index 00000000..c1afb3dc --- /dev/null +++ b/process_report/invoices/bm_invoice.py @@ -0,0 +1,29 @@ +from dataclasses import dataclass + + +from process_report.invoices import invoice + + +@dataclass +class BMInvoice(invoice.Invoice): + name: str = "bm_projects" + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.COST_FIELD, + invoice.CREDIT_FIELD, + invoice.CREDIT_CODE_FIELD, + invoice.BALANCE_FIELD, + ] + + def _prepare_export(self): + self.export_data = self.data[self.data[invoice.CLUSTER_NAME_FIELD] == "bm"] diff --git a/process_report/process_report.py b/process_report/process_report.py index b3e62d93..cc6bff6e 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -10,6 +10,7 @@ from process_report import util from process_report.invoices import ( invoice, + bm_invoice, lenovo_invoice, nonbillable_invoice, billable_invoice, @@ -31,6 +32,7 @@ bu_subsidy_processor, prepayment_processor, validate_cluster_name_processor, + bm_usage_processor, ) @@ -72,6 +74,7 @@ def main(): validate_cluster_name_processor.ValidateClusterNameProcessor, coldfront_fetch_processor.ColdfrontFetchProcessor, validate_pi_alias_processor.ValidatePIAliasProcessor, + bm_usage_processor.BMUsageProcessor, add_institution_processor.AddInstitutionProcessor, lenovo_processor.LenovoProcessor, validate_billable_pi_processor.ValidateBillablePIsProcessor, @@ -96,6 +99,7 @@ def main(): MOCA_prepaid_invoice.MOCAPrepaidInvoice, prepay_credits_snapshot.PrepayCreditsSnapshot, ocp_test_invoice.OcpTestInvoice, + bm_invoice.BMInvoice, ], invoice_settings.upload_to_s3, ) diff --git a/process_report/processors/bm_usage_processor.py b/process_report/processors/bm_usage_processor.py new file mode 100644 index 00000000..21dbadff --- /dev/null +++ b/process_report/processors/bm_usage_processor.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass + +from process_report.invoices import invoice +from process_report.processors import processor + + +@dataclass +class BMUsageProcessor(processor.Processor): + def _get_bm_project_mask(self): + return self.data[invoice.CLUSTER_NAME_FIELD] == "bm" + + def _process(self): + bm_projects_mask = self._get_bm_project_mask() + self.data.loc[bm_projects_mask, invoice.PROJECT_FIELD] = self.data.loc[ + bm_projects_mask, invoice.PROJECT_FIELD + ].apply(lambda v: v + " BM Usage") + self.data.loc[bm_projects_mask, invoice.PROJECT_ID_FIELD] = "ESI Bare Metal" + self.data.loc[bm_projects_mask, invoice.INVOICE_EMAIL_FIELD] = "nclinton@bu.edu" diff --git a/process_report/processors/bu_subsidy_processor.py b/process_report/processors/bu_subsidy_processor.py index f2e6c787..0d8a6c9a 100644 --- a/process_report/processors/bu_subsidy_processor.py +++ b/process_report/processors/bu_subsidy_processor.py @@ -33,7 +33,10 @@ def _get_subsidy_eligible_projects(data): ] filtered_data = filtered_data[ filtered_data[invoice.INSTITUTION_FIELD] == "Boston University" - ].copy() + ] + filtered_data = filtered_data[ + ~(filtered_data[invoice.CLUSTER_NAME_FIELD] == "bm") + ] return filtered_data diff --git a/process_report/processors/new_pi_credit_processor.py b/process_report/processors/new_pi_credit_processor.py index f1de23f9..b0f8d4a9 100644 --- a/process_report/processors/new_pi_credit_processor.py +++ b/process_report/processors/new_pi_credit_processor.py @@ -138,10 +138,14 @@ def _filter_nonbillables(self, data): def _filter_missing_pis(self, data): return data[~data["Missing PI"]] + def _filter_bm_projects(self, data): + return data[~(data[invoice.CLUSTER_NAME_FIELD] == "bm")] + def _get_credit_eligible_projects(self, data: pandas.DataFrame): filtered_data = self._filter_nonbillables(data) filtered_data = self._filter_missing_pis(filtered_data) filtered_data = self._filter_excluded_su_types(filtered_data) + filtered_data = self._filter_bm_projects(filtered_data) if self.limit_new_pi_credit_to_partners: filtered_data = self._filter_partners(filtered_data) diff --git a/process_report/tests/e2e/test_e2e_pipeline.py b/process_report/tests/e2e/test_e2e_pipeline.py index 36cc7277..21a95e01 100644 --- a/process_report/tests/e2e/test_e2e_pipeline.py +++ b/process_report/tests/e2e/test_e2e_pipeline.py @@ -21,6 +21,7 @@ "MOCA-A_Prepaid_Groups-2025-06-Invoice.csv", "NERC_Prepaid_Group-Credits-2025-06.csv", "OCP_TEST 2025-06.csv", + "bm_projects 2025-06.csv", ] EXPECTED_DIRECTORIES = ["pi_invoices"] diff --git a/process_report/tests/unit/processors/test_bm_usage_processor.py b/process_report/tests/unit/processors/test_bm_usage_processor.py new file mode 100644 index 00000000..cbba4916 --- /dev/null +++ b/process_report/tests/unit/processors/test_bm_usage_processor.py @@ -0,0 +1,35 @@ +from unittest import TestCase + +import pandas + +from process_report.tests import util as test_utils + + +class TestBMUsageProcessor(TestCase): + def test_process_bm_usage(self): + test_invoice = pandas.DataFrame( + { + "Project - Allocation": ["test", "test bm-bm", "not-bm"], + "Project - Allocation ID": [None] * 3, + "Invoice Email": [None] * 3, + "Cluster Name": ["bm", "bm", "ocp"], + } + ) + + answer_invoice = pandas.DataFrame( + { + "Project - Allocation": [ + "test BM Usage", + "test bm-bm BM Usage", + "not-bm", + ], + "Project - Allocation ID": ["ESI Bare Metal"] * 2 + [None], + "Invoice Email": ["nclinton@bu.edu"] * 2 + [None], + "Cluster Name": ["bm", "bm", "ocp"], + } + ) + + bm_usage_proc = test_utils.new_bm_usage_processor(data=test_invoice) + bm_usage_proc.process() + answer_invoice = answer_invoice.astype(bm_usage_proc.data.dtypes) + self.assertTrue(bm_usage_proc.data.equals(answer_invoice)) diff --git a/process_report/tests/unit/processors/test_bu_subsidy_processor.py b/process_report/tests/unit/processors/test_bu_subsidy_processor.py index 6761e209..d99e6d74 100644 --- a/process_report/tests/unit/processors/test_bu_subsidy_processor.py +++ b/process_report/tests/unit/processors/test_bu_subsidy_processor.py @@ -32,6 +32,7 @@ def _get_test_invoice( institution=None, is_billable=None, missing_pi=None, + clusters=None, ): if not balances: balances = pi_balances @@ -48,6 +49,9 @@ def _get_test_invoice( if not missing_pi: missing_pi = [False for _ in range(len(pi))] + if not clusters: + clusters = ["" for _ in range(len(pi))] + return pandas.DataFrame( { "Manager (PI)": pi, @@ -57,6 +61,7 @@ def _get_test_invoice( "Institution": institution, "Is Billable": is_billable, "Missing PI": missing_pi, + "Cluster Name": clusters, } ) @@ -175,3 +180,21 @@ def test_two_pi(self): answer_invoice["PI Balance"] = [0, 60, 0, 0] self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) + + def test_exclude_bm_cluster(self): + """Projects in the 'bm' cluster should be excluded from BU subsidy calculation.""" + subsidy_amount = 100 + test_invoice = self._get_test_invoice( + ["PI"] * 2, # single PI (will be broadcast to two rows by lengths below) + pi_balances=[60, 60], + project_names=["P1", "P2"], + clusters=["bm", "ocp"], + ) + + answer_invoice = test_invoice.copy() + answer_invoice["Project"] = answer_invoice["Project - Allocation"] + # bm allocation gets no subsidy, non-bm allocation gets up to its PI balance (60) + answer_invoice["Subsidy"] = [0, 60] + answer_invoice["PI Balance"] = [60, 0] + + self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) diff --git a/process_report/tests/unit/processors/test_new_pi_credit_processor.py b/process_report/tests/unit/processors/test_new_pi_credit_processor.py index 969862e1..1380c437 100644 --- a/process_report/tests/unit/processors/test_new_pi_credit_processor.py +++ b/process_report/tests/unit/processors/test_new_pi_credit_processor.py @@ -97,6 +97,7 @@ def _get_test_invoice( is_billable=None, missing_pi=None, institution=None, + clusters=None, ): if not su_type: su_type = ["CPU" for _ in range(len(pi))] @@ -109,6 +110,10 @@ def _get_test_invoice( if not institution: institution = ["Foo University" for _ in range(len(pi))] + + if not clusters: + clusters = ["" for _ in range(len(pi))] + return pandas.DataFrame( { "Manager (PI)": pi, @@ -117,15 +122,18 @@ def _get_test_invoice( "Is Billable": is_billable, "Missing PI": missing_pi, "Institution": institution, + "Cluster Name": clusters, } ) + def setUp(self): + super().setUp() + self.test_old_pi_file = self.tempdir / "old_pi_file.csv" + def test_no_new_pi(self): test_invoice = self._get_test_invoice( ["PI" for _ in range(3)], [100 for _ in range(3)] ) - test_old_pi_file = self.tempdir / "old_pi.csv" - # Other fields of old PI file not accessed if PI is no longer # eligible for new-PI credit test_old_pi_df = pandas.DataFrame( @@ -137,7 +145,7 @@ def test_no_new_pi(self): "2nd Month Used": [None], } ) - test_old_pi_df.to_csv(test_old_pi_file, index=False) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) answer_invoice = pandas.concat( [ @@ -159,7 +167,7 @@ def test_no_new_pi(self): self._assert_result_invoice_and_old_pi_file( "2024-06", test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -170,7 +178,6 @@ def test_one_new_pi(self): # One allocation invoice_month = "2024-06" test_invoice = self._get_test_invoice(["PI"], [100]) - test_old_pi_file = self.tempdir / "old_pi.csv" test_old_pi_df = pandas.DataFrame( columns=[ "PI", @@ -180,7 +187,7 @@ def test_one_new_pi(self): "2nd Month Used", ] ) - test_old_pi_df.to_csv(test_old_pi_file, index=False) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) answer_invoice = pandas.concat( [ @@ -210,7 +217,7 @@ def test_one_new_pi(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -246,7 +253,7 @@ def test_one_new_pi(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -282,7 +289,7 @@ def test_one_new_pi(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -293,7 +300,6 @@ def test_one_month_pi(self): # Remaining credits completely covers costs invoice_month = "2024-07" test_invoice = self._get_test_invoice(["PI"], [200]) - test_old_pi_file = self.tempdir / "old_pi.csv" test_old_pi_df = pandas.DataFrame( { "PI": ["PI"], @@ -303,7 +309,7 @@ def test_one_month_pi(self): "2nd Month Used": [0], } ) - test_old_pi_df.to_csv(test_old_pi_file, index=False) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) answer_invoice = pandas.concat( [ @@ -333,7 +339,7 @@ def test_one_month_pi(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -369,7 +375,7 @@ def test_one_month_pi(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -380,7 +386,6 @@ def test_two_new_pi(self): # Costs partially and completely covered invoice_month = "2024-07" test_invoice = self._get_test_invoice(["PI1", "PI1", "PI2"], [800, 500, 500]) - test_old_pi_file = self.tempdir / "old_pi.csv" test_old_pi_df = pandas.DataFrame( { "PI": ["PI1"], @@ -390,7 +395,7 @@ def test_two_new_pi(self): "2nd Month Used": [0], } ) - test_old_pi_df.to_csv(test_old_pi_file, index=False) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) answer_invoice = pandas.concat( [ @@ -420,7 +425,7 @@ def test_two_new_pi(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -431,7 +436,6 @@ def test_old_pi_file_overwritten(self): invoice_month = "2024-06" test_invoice = self._get_test_invoice(["PI", "PI"], [500, 500]) - test_old_pi_file = self.tempdir / "old_pi.csv" test_old_pi_df = pandas.DataFrame( { "PI": ["PI"], @@ -441,7 +445,7 @@ def test_old_pi_file_overwritten(self): "2nd Month Used": [0], } ) - test_old_pi_df.to_csv(test_old_pi_file, index=False) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) answer_invoice = pandas.concat( [ @@ -473,7 +477,7 @@ def test_old_pi_file_overwritten(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, credit_amount=200, # Test that old PI entry is overwritten with new initial credit amount @@ -493,7 +497,6 @@ def test_excluded_su_types(self): "OpenStack GPUH100", ], ) - test_old_pi_file = self.tempdir / "old_pi.csv" test_old_pi_df = pandas.DataFrame( columns=[ "PI", @@ -503,7 +506,7 @@ def test_excluded_su_types(self): "2nd Month Used", ] ) - test_old_pi_df.to_csv(test_old_pi_file, index=False) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) answer_invoice = pandas.concat( [ @@ -534,7 +537,7 @@ def test_excluded_su_types(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -598,7 +601,6 @@ def test_newly_eligible_pi_existing_old_pi_entry(self): invoice_month = "2024-07" test_invoice = self._get_test_invoice(["PI"], [800]) # Eligible institution - test_old_pi_file = self.tempdir / "old_pi.csv" test_old_pi_df = pandas.DataFrame( { "PI": ["PI"], @@ -610,7 +612,7 @@ def test_newly_eligible_pi_existing_old_pi_entry(self): "2nd Month Used": [0], } ) - test_old_pi_df.to_csv(test_old_pi_file, index=False) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) answer_invoice = pandas.concat( [ @@ -642,7 +644,7 @@ def test_newly_eligible_pi_existing_old_pi_entry(self): self._assert_result_invoice_and_old_pi_file( invoice_month, test_invoice, - str(test_old_pi_file), + str(self.test_old_pi_file), answer_invoice, answer_old_pi_df, ) @@ -656,3 +658,61 @@ def test_apply_credit_error(self): test_invoice = test_utils.new_new_pi_credit_processor() with pytest.raises(SystemExit): test_invoice._get_pi_age(old_pi_df, "PI1", invoice_month) + + def test_excluded_bm_cluster(self): + """Projects in the 'bm' cluster should not receive New-PI credits.""" + invoice_month = "2024-06" + # Single PI with two allocations: one in 'bm' (should be excluded), one eligible + test_invoice = self._get_test_invoice( + ["PI", "PI"], + [600, 600], + clusters=["bm", "compute"], + ) + + # Start with empty old PI file to simulate a new PI + test_old_pi_df = pandas.DataFrame( + columns=[ + "PI", + "First Invoice Month", + "Initial Credits", + "1st Month Used", + "2nd Month Used", + ] + ) + test_old_pi_df.to_csv(self.test_old_pi_file, index=False) + + answer_invoice = pandas.concat( + [ + test_invoice, + pandas.DataFrame( + { + # 'bm' row excluded -> no credit; eligible row gets credit equal to its cost + "Credit": [None, 600], + "Credit Code": [None, "0002"], + # follow existing tests' convention where excluded row keeps its cost as PI Balance + "PI Balance": [600, 0], + "Balance": [600, 0], + } + ), + ], + axis=1, + ) + + # Old PI file should now record the new PI and used amount for the first month + answer_old_pi_df = pandas.DataFrame( + { + "PI": ["PI"], + "First Invoice Month": ["2024-06"], + "Initial Credits": [1000], + "1st Month Used": [600], + "2nd Month Used": [0], + } + ) + + self._assert_result_invoice_and_old_pi_file( + invoice_month, + test_invoice, + str(self.test_old_pi_file), + answer_invoice, + answer_old_pi_df, + ) diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 85065ed0..162d12c5 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -16,6 +16,7 @@ bu_subsidy_processor, prepayment_processor, validate_cluster_name_processor, + bm_usage_processor, ) @@ -205,3 +206,17 @@ def new_validate_cluster_name_processor( return validate_cluster_name_processor.ValidateClusterNameProcessor( invoice_month, data, name ) + + +def new_bm_usage_processor( + name="", + invoice_month="0000-00", + data=None, +): + if data is None: + data = pandas.DataFrame() + return bm_usage_processor.BMUsageProcessor( + invoice_month, + data, + name, + )