diff --git a/process_report/invoices/MOCA_group_specific_invoice.py b/process_report/invoices/MOCA_group_specific_invoice.py new file mode 100644 index 00000000..fbf9326e --- /dev/null +++ b/process_report/invoices/MOCA_group_specific_invoice.py @@ -0,0 +1,134 @@ +import os +from dataclasses import dataclass +import tempfile + +import pandas + +from process_report.invoices import invoice, pdf_invoice + + +@dataclass +class MOCAGroupInvoice(pdf_invoice.PDFInvoice): + CREDIT_COLUMN_COPY_LIST = [ + invoice.INVOICE_DATE_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + ] + TOTAL_COLUMN_LIST = [ + invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, + invoice.CREDIT_FIELD, + invoice.BALANCE_FIELD, + ] + + DOLLAR_COLUMN_LIST = [ + invoice.RATE_FIELD, + invoice.GROUP_BALANCE_FIELD, + invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, + invoice.CREDIT_FIELD, + invoice.BALANCE_FIELD, + ] + + export_columns_list = [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.PROJECT_ID_FIELD, + invoice.PI_FIELD, + invoice.INVOICE_EMAIL_FIELD, + invoice.INVOICE_ADDRESS_FIELD, + invoice.INSTITUTION_FIELD, + invoice.INSTITUTION_ID_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + invoice.RATE_FIELD, + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + invoice.GROUP_BALANCE_FIELD, + invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, + invoice.CREDIT_FIELD, + invoice.CREDIT_CODE_FIELD, + invoice.BALANCE_FIELD, + ] + + prepay_credits: pandas.DataFrame + + def _prepare(self): + self.export_data = self.data[ + self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] + ] + self.export_data = self.export_data[ + ~self.export_data[invoice.GROUP_NAME_FIELD].isna() + ] + self.group_list = self.export_data[invoice.GROUP_NAME_FIELD].unique() + + def _get_group_dataframe(self, data, group): + group_projects = ( + data[data[invoice.GROUP_NAME_FIELD] == group].copy().reset_index(drop=True) + ) + + # Add row for each prepay credit for the group in the invoice month + group_credit_mask = ( + self.prepay_credits[invoice.PREPAY_MONTH_FIELD] == self.invoice_month + ) & (self.prepay_credits[invoice.PREPAY_GROUP_NAME_FIELD] == group) + group_credit_info = self.prepay_credits[group_credit_mask] + for _, credit_info in group_credit_info.iterrows(): + group_credit = credit_info[invoice.PREPAY_CREDIT_FIELD] + group_projects.loc[len(group_projects)] = None + + # In this "credit row", certain values should be + # the same for every columns (i.e Invoice Month, Group Name, etc.) + for column_name in self.CREDIT_COLUMN_COPY_LIST: + if column_name in group_projects.columns: + group_projects.loc[group_projects.index[-1], column_name] = ( + group_projects.loc[0, column_name] + ) + + # Group is billed for the credit amount + group_projects.loc[ + group_projects.index[-1], [invoice.COST_FIELD, invoice.BALANCE_FIELD] + ] = [group_credit] * 2 + + # Add sum row + column_sums = [] + sum_columns_list = [] + for column_name in self.TOTAL_COLUMN_LIST: + if column_name in group_projects.columns: + column_sums.append(group_projects[column_name].sum()) + sum_columns_list.append(column_name) + group_projects.loc[len(group_projects)] = ( + None # Adds a new row to end of dataframe initialized with None + ) + group_projects.loc[group_projects.index[-1], invoice.INVOICE_DATE_FIELD] = ( + "Total" + ) + group_projects.loc[group_projects.index[-1], sum_columns_list] = column_sums + + # Add dollar signs + for column_name in self.DOLLAR_COLUMN_LIST: + if column_name in group_projects.columns: + group_projects[column_name] = group_projects[column_name].apply( + lambda data: data if pandas.isna(data) else f"${data}" + ) + + group_projects.fillna("", inplace=True) + + return group_projects + + def export(self): + self._filter_columns() + + if not os.path.exists(self.name): + os.mkdir(self.name) + + for group in self.group_list: + group_dataframe = self._get_group_dataframe(self.export_data, group) + group_instituition = group_dataframe[invoice.GROUP_INSTITUTION_FIELD].iat[0] + group_contact_email = group_dataframe[invoice.INVOICE_EMAIL_FIELD].iat[0] + group_invoice_path = f"{self.name}/{group_instituition}_{group_contact_email}_{self.invoice_month}.pdf" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".html") as temp_fd: + self._create_html_invoice(temp_fd, group_dataframe, "pi_invoice.html") + self._create_pdf_invoice(temp_fd.name, group_invoice_path) diff --git a/process_report/invoices/pdf_invoice.py b/process_report/invoices/pdf_invoice.py new file mode 100644 index 00000000..48920fae --- /dev/null +++ b/process_report/invoices/pdf_invoice.py @@ -0,0 +1,59 @@ +import os +import sys +from dataclasses import dataclass +import subprocess + +import pandas +from jinja2 import Environment, FileSystemLoader + +import process_report.invoices.invoice as invoice +import process_report.util as util + + +TEMPLATE_DIR_PATH = "process_report/templates" + + +@dataclass +class PDFInvoice(invoice.Invoice): + @staticmethod + def _create_html_invoice(temp_fd, data: pandas.DataFrame, template_filename: str): + environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH)) + template = environment.get_template(template_filename) + content = template.render( + data=data, + ) + temp_fd.write(content) + temp_fd.flush() + + @staticmethod + def _create_pdf_invoice(html_filepath: str, output_pdf_path: str): + chrome_binary_location = os.environ.get("CHROME_BIN_PATH", "/usr/bin/chromium") + if not os.path.exists(chrome_binary_location): + sys.exit( + f"Chrome binary does not exist at {chrome_binary_location}. Make sure the env var CHROME_BIN_PATH is set correctly or that Google Chrome is installed" + ) + + subprocess.run( + [ + chrome_binary_location, + "--headless", + "--no-sandbox", + f"--print-to-pdf={output_pdf_path}", + "--no-pdf-header-footer", + "file://" + html_filepath, + ], + capture_output=True, + ) + + def export_s3(self, s3_bucket): + def _export_s3_group_invoice(invoice): + invoice_path = os.path.join(self.name, invoice) + striped_invoice_path = os.path.splitext(invoice_path)[0] + output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.pdf" + output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.pdf" + s3_bucket.upload_file(invoice_path, output_s3_path) + s3_bucket.upload_file(invoice_path, output_s3_archive_path) + + # self.name is name of folder storing PDF invoices + for invoice_filename in os.listdir(self.name): + _export_s3_group_invoice(invoice_filename) diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py index c5d94823..9dbcd66f 100644 --- a/process_report/invoices/pi_specific_invoice.py +++ b/process_report/invoices/pi_specific_invoice.py @@ -1,15 +1,11 @@ import os -import sys from dataclasses import dataclass -import subprocess import tempfile import logging import pandas -from jinja2 import Environment, FileSystemLoader -import process_report.invoices.invoice as invoice -import process_report.util as util +from process_report.invoices import invoice, pdf_invoice TEMPLATE_DIR_PATH = "process_report/templates" @@ -20,7 +16,7 @@ @dataclass -class PIInvoice(invoice.Invoice): +class PIInvoice(pdf_invoice.PDFInvoice): """ This invoice operates on data processed by these Processors: - ValidateBillablePIsProcessor @@ -110,39 +106,6 @@ def _get_pi_dataframe(self, data, pi): return pi_projects def export(self): - def _create_html_invoice(temp_fd): - environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH)) - template = environment.get_template("pi_invoice.html") - content = template.render( - data=pi_dataframe, - ) - temp_fd.write(content) - temp_fd.flush() - - def _create_pdf_invoice(temp_fd_name): - chrome_binary_location = os.environ.get( - "CHROME_BIN_PATH", "/usr/bin/chromium" - ) - if not os.path.exists(chrome_binary_location): - sys.exit( - f"Chrome binary does not exist at {chrome_binary_location}. Make sure the env var CHROME_BIN_PATH is set correctly and that Google Chrome is installed" - ) - - invoice_pdf_path = ( - f"{self.name}/{pi_instituition}_{pi}_{self.invoice_month}.pdf" - ) - subprocess.run( - [ - chrome_binary_location, - "--headless", - "--no-sandbox", - f"--print-to-pdf={invoice_pdf_path}", - "--no-pdf-header-footer", - f"file://{temp_fd_name}", - ], - capture_output=True, - ) - self._filter_columns() # self.name is name of folder storing invoices @@ -154,19 +117,10 @@ def _create_pdf_invoice(temp_fd_name): pi_dataframe = self._get_pi_dataframe(self.export_data, pi) pi_instituition = pi_dataframe[invoice.INSTITUTION_FIELD].iat[0] + invoice_pdf_path = ( + f"{self.name}/{pi_instituition}_{pi}_{self.invoice_month}.pdf" + ) with tempfile.NamedTemporaryFile(mode="w", suffix=".html") as temp_fd: - _create_html_invoice(temp_fd) - _create_pdf_invoice(temp_fd.name) - - def export_s3(self, s3_bucket): - def _export_s3_pi_invoice(pi_invoice): - pi_invoice_path = os.path.join(self.name, pi_invoice) - striped_invoice_path = os.path.splitext(pi_invoice_path)[0] - output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.pdf" - output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.pdf" - s3_bucket.upload_file(pi_invoice_path, output_s3_path) - s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path) - - for pi_invoice in os.listdir(self.name): - _export_s3_pi_invoice(pi_invoice) + self._create_html_invoice(temp_fd, pi_dataframe, "pi_invoice.html") + self._create_pdf_invoice(temp_fd.name, invoice_pdf_path) diff --git a/process_report/process_report.py b/process_report/process_report.py index 31893f0f..39732a9f 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -20,6 +20,7 @@ MOCA_prepaid_invoice, prepay_credits_snapshot, ocp_test_invoice, + MOCA_group_specific_invoice, ) from process_report.processors import ( coldfront_fetch_processor, @@ -184,6 +185,12 @@ def main(): default="pi_invoices", help="Name of output folder containing pi-specific invoice csvs", ) + parser.add_argument( + "--prepay-groups-output-folder", + required=False, + default="group_invoices", + help="Name of output folder containing prepay-group-specific invoice PDFs", + ) parser.add_argument( "--BU-invoice-file", required=False, @@ -391,6 +398,13 @@ def main(): prepay_contacts=prepay_info, ) + moca_group_inv = MOCA_group_specific_invoice.MOCAGroupInvoice( + name=args.prepay_groups_output_folder, + invoice_month=invoice_month, + data=processed_data, + prepay_credits=prepay_credits, + ) + ocp_test_inv = ocp_test_invoice.OcpTestInvoice( name="", invoice_month=invoice_month, data=processed_data.copy() ) @@ -406,6 +420,7 @@ def main(): moca_prepaid_inv, prepay_credits_snap, ocp_test_inv, + moca_group_inv, ], args.upload_to_s3, ) diff --git a/process_report/tests/unit/invoices/test_moca_group_specific_invoice.py b/process_report/tests/unit/invoices/test_moca_group_specific_invoice.py new file mode 100644 index 00000000..4582a7fc --- /dev/null +++ b/process_report/tests/unit/invoices/test_moca_group_specific_invoice.py @@ -0,0 +1,233 @@ +from unittest import TestCase, mock +import tempfile +import pandas + +from process_report.tests import util as test_utils + + +class TestMOCAGroupSpecificInvoice(TestCase): + @staticmethod + def _add_dollar_sign(data): + if pandas.isna(data): + return data + else: + return "$" + str(data) + + def _get_test_invoice( + self, + project, + balance=0, + is_billable=True, + missing_pi=False, + group_name=None, + group_contact=None, + group_institution=None, + cost=0, + group_balance_used=0, + credits=0, + ): + return pandas.DataFrame( + { + "Invoice Email": group_contact, + "Project - Allocation": project, + "Is Billable": is_billable, + "Missing PI": missing_pi, + "Prepaid Group Name": group_name, + "Prepaid Group Institution": group_institution, + "Cost": cost, + "Prepaid Group Used": group_balance_used, + "Credit": credits, + "Balance": balance, + } + ) + + def _get_answer_invoice(self, test_invoice, group_name, sum_row): + answer_invoice = ( + test_invoice[test_invoice["Prepaid Group Name"] == group_name] + .copy() + .reset_index(drop=True) + ) + answer_invoice.loc[len(answer_invoice)] = None + answer_invoice.loc[ + answer_invoice.index[-1], + ["Invoice Month", "Cost", "Prepaid Group Used", "Credit", "Balance"], + ] = sum_row + for column_name in [ + "Cost", + "Prepaid Group Used", + "Credit", + "Balance", + ]: + answer_invoice[column_name] = answer_invoice[column_name].apply( + lambda data: data if pandas.isna(data) else f"${data}" + ) + answer_invoice.fillna("", inplace=True) + return answer_invoice + + def _add_credit_row(self, invoice, credit_row): + """Modify input dataframe to add a row for prepay credit.""" + invoice.loc[len(invoice)] = None + invoice.loc[invoice.index[-1], ["Prepaid Group Name", "Cost", "Balance"]] = ( + credit_row + ) + return invoice + + def _get_test_prepay_credits(self, months, group_names, credits): + return pandas.DataFrame( + {"Month": months, "Group Name": group_names, "Credit": credits} + ) + + def test_filter_rows(self): + """Are nonbillables and non-group projects correctly filtered out?""" + test_invoice = self._get_test_invoice( + ["P1", "P2", "P3", "P4", "P5"], + is_billable=[True, False, True, True, True], + missing_pi=[False, False, True, False, False], + group_name=["G1", None, None, None, "G1"], + ) + answer_invoice = test_invoice.copy() + answer_invoice = answer_invoice.iloc[[0, 4]] + + group_inv = test_utils.new_MOCA_group_specific_invoice(data=test_invoice) + group_inv._prepare() + + self.assertTrue(answer_invoice.equals(group_inv.export_data)) + + def test_get_dataframe_one_group(self): + """One prepay group with three projects, with balance used and credits""" + invoice_month = "2024-01" + group_name = "G1" + test_invoice = self._get_test_invoice( + ["P1", "P2", "P3"], + group_name=[group_name] * 3, + balance=[100, 200, 300], + group_balance_used=[1000, 2000, 0], + credits=[50, None, 50], + ) + test_prepay_credits = self._get_test_prepay_credits( + [invoice_month], [group_name], [5000] + ) + answer_invoice = test_invoice.copy() + answer_invoice = self._add_credit_row(answer_invoice, [group_name, 5000, 5000]) + answer_invoice = self._get_answer_invoice( + answer_invoice, group_name, ["Total", 5000, 3000, 100, 5600] + ) + + group_inv = test_utils.new_MOCA_group_specific_invoice( + invoice_month=invoice_month, prepay_credits=test_prepay_credits + ) + output_invoice = group_inv._get_group_dataframe(test_invoice, group_name) + self.assertTrue(answer_invoice.equals(output_invoice)) + + def test_get_group_dataframe(self): + """Two prepay groups with one project each, given credits at different times""" + + # Neither groups have credits on current invoice month + invoice_month = "2024-01" + group_names = ["G1", "G2"] + test_prepay_credits = self._get_test_prepay_credits( + ["2023-12", "2024-02", "2024-03", "2024-03", "2024-03"], + ["G1", "G1", "G2", "G2", "G1"], + [1000, 2000, 3000, 4000, 2000], + ) + test_invoice = self._get_test_invoice( + ["P1", "P2"], + group_name=group_names, + balance=[100, 200], + group_balance_used=[1000, 2000], + ) + + answer_invoice_G1 = self._get_answer_invoice( + test_invoice, "G1", sum_row=["Total", 0, 1000, 0, 100] + ) + answer_invoice_G2 = self._get_answer_invoice( + test_invoice, "G2", sum_row=["Total", 0, 2000, 0, 200] + ) + + group_inv = test_utils.new_MOCA_group_specific_invoice( + invoice_month=invoice_month, prepay_credits=test_prepay_credits + ) + output_invoice = group_inv._get_group_dataframe(test_invoice, "G1") + self.assertTrue(answer_invoice_G1.equals(output_invoice)) + + output_invoice = group_inv._get_group_dataframe(test_invoice, "G2") + self.assertTrue(answer_invoice_G2.equals(output_invoice)) + + # One group has a credit on invoice month. Invoice for G2 is unchanged + invoice_month = "2024-02" + + answer_invoice_G1 = test_invoice.copy() + answer_invoice_G1 = self._add_credit_row( + answer_invoice_G1, credit_row=["G1", 2000, 2000] + ) + answer_invoice_G1 = self._get_answer_invoice( + answer_invoice_G1, "G1", sum_row=["Total", 2000, 1000, 0, 2100] + ) + + group_inv.invoice_month = invoice_month + output_invoice = group_inv._get_group_dataframe(test_invoice, "G1") + self.assertTrue(answer_invoice_G1.equals(output_invoice)) + + output_invoice = group_inv._get_group_dataframe(test_invoice, "G2") + self.assertTrue(answer_invoice_G2.equals(output_invoice)) + + # G2 has two credits, G1 has one credit (should be unchanged) + invoice_month = "2024-03" + + answer_invoice_G2 = test_invoice.copy() + answer_invoice_G2 = self._add_credit_row( + answer_invoice_G2, credit_row=["G2", 3000, 3000] + ) + answer_invoice_G2 = self._add_credit_row( + answer_invoice_G2, credit_row=["G2", 4000, 4000] + ) + answer_invoice_G2 = self._get_answer_invoice( + answer_invoice_G2, "G2", sum_row=["Total", 7000, 2000, 0, 7200] + ) + + group_inv.invoice_month = invoice_month + output_invoice = group_inv._get_group_dataframe(test_invoice, "G1") + self.assertTrue(answer_invoice_G1.equals(output_invoice)) + + output_invoice = group_inv._get_group_dataframe(test_invoice, "G2") + self.assertTrue(answer_invoice_G2.equals(output_invoice)) + + @mock.patch("process_report.invoices.invoice.Invoice._filter_columns") + @mock.patch("os.path.exists") + @mock.patch("subprocess.run") + def test_export(self, mock_subprocess_run, mock_path_exists, mock_filter_cols): + """Are PDFs exported as desired?""" + invoice_month = "2024-10" + test_invoice = self._get_test_invoice( + ["P1", "P2", "P3", "P4"], + group_name=["G1", "G1", "G2", None], + group_contact=["G1@bu.edu", "G1@bu.edu", "G2@hu.edu", None], + group_institution=["BU", "BU", "HU", None], + ) + test_prepay_credits = self._get_test_prepay_credits([], [], []) + + mock_filter_cols.return_value = test_invoice + mock_path_exists.return_value = True + + with tempfile.TemporaryDirectory() as test_dir: + group_inv = test_utils.new_MOCA_group_specific_invoice( + test_dir, + invoice_month, + data=test_invoice, + prepay_credits=test_prepay_credits, + ) + group_inv.process() + group_inv.export() + group_pdf_1 = f"{test_dir}/BU_G1@bu.edu_{invoice_month}.pdf" + group_pdf_2 = f"{test_dir}/HU_G2@hu.edu_{invoice_month}.pdf" + + for i, group_pdf_path in enumerate([group_pdf_1, group_pdf_2]): + chrome_arglist, _ = mock_subprocess_run.call_args_list[i] + answer_arglist = [ + "/usr/bin/chromium", + "--headless", + "--no-sandbox", + f"--print-to-pdf={group_pdf_path}", + "--no-pdf-header-footer", + ] + self.assertTrue(answer_arglist == chrome_arglist[0][:-1]) diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 5b8abac3..a64c1e34 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -6,6 +6,7 @@ pi_specific_invoice, prepay_credits_snapshot, NERC_total_invoice, + MOCA_group_specific_invoice, ) from process_report.processors import ( @@ -97,6 +98,18 @@ def new_coldfront_fetch_processor( ) +def new_MOCA_group_specific_invoice( + name="", invoice_month="0000-00", data=None, prepay_credits=None +): + if data is None: + data = pandas.DataFrame() + if prepay_credits is None: + prepay_credits = pandas.DataFrame() + return MOCA_group_specific_invoice.MOCAGroupInvoice( + name, invoice_month, data, prepay_credits + ) + + def new_add_institution_processor( name="", invoice_month="0000-00",