Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class InvoiceColumn:
CREDIT_CODE_FIELD = "Credit Code"
SUBSIDY_FIELD = "Subsidy"
BALANCE_FIELD = "Balance"
ROYALTY_FIELD = "Royalty"
###

### Internally used field names
Expand All @@ -86,6 +87,7 @@ class InvoiceColumn:
GROUP_MANAGED_FIELD = "MGHPCC Managed"
CLUSTER_NAME_FIELD = "Cluster Name"
IS_COURSE_FIELD = "Is Course"
IS_EXTERNALLY_FUNDED_FIELD = "Is Externally Funded"
###

### Initialized Column objects
Expand Down Expand Up @@ -142,6 +144,10 @@ class InvoiceColumn:
IS_COURSE_COLUMN = InvoiceColumn(
name=IS_COURSE_FIELD, dtype=BOOL_FIELD_TYPE, default_value=False
)
IS_EXTERNALLY_FUNDED_COLUMN = InvoiceColumn(
name=IS_EXTERNALLY_FUNDED_FIELD, dtype=BOOL_FIELD_TYPE, default_value=False
) # TODO: We are fine with this default?
ROYALTY_COLUMN = InvoiceColumn(name=ROYALTY_FIELD, dtype=BALANCE_FIELD_TYPE)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@joachimweyl @naved001 @knikolla Are we fine with False as the default for this field. This will mean, if a Coldfront project does not specify the Is Externally Funded attribute, invoicing will assume it is internally funded.

###


Expand Down
21 changes: 21 additions & 0 deletions process_report/invoices/royalty_invoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from dataclasses import dataclass

import process_report.invoices.invoice as invoice


@dataclass
class RoyaltyInvoice(invoice.Invoice):
name: str = "Royalties"
export_columns_list = [ # TODO: Confirm list of information we want to include in royalty report
invoice.INVOICE_DATE_FIELD,
invoice.PROJECT_FIELD,
invoice.PI_FIELD,
invoice.CLUSTER_NAME_FIELD,
invoice.INSTITUTION_FIELD,
invoice.SU_TYPE_FIELD,
invoice.BALANCE_FIELD,
invoice.ROYALTY_FIELD,
]
Comment on lines +9 to +18

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@joachimweyl @knikolla @naved001 Are we fine with these sets of columns for now for the royalty invoice? Are there any other info we want to provide to RH?


def _prepare_export(self):
self.export_data = self.data[~self.data[invoice.ROYALTY_FIELD].isna()]
5 changes: 5 additions & 0 deletions process_report/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,5 +215,10 @@ def get_nonbillable_timed_projects(self) -> list[tuple[str, str]]:
].itertuples(index=False, name=None)
)

@functools.lru_cache
def get_royalty_exempt_institutions_list(self) -> tuple[str]:
with open(invoice_settings.royalty_exempt_institutions_filepath) as f:
return tuple(f.read().splitlines())


loader = Loader()
4 changes: 4 additions & 0 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
MOCA_prepaid_invoice,
prepay_credits_snapshot,
ocp_test_invoice,
royalty_invoice,
)
from process_report.processors import (
coldfront_fetch_processor,
Expand All @@ -31,6 +32,7 @@
bu_subsidy_processor,
prepayment_processor,
validate_cluster_name_processor,
royalty_processor,
)

PROCESSING_ORDER = [
Expand All @@ -45,6 +47,7 @@
new_pi_credit_processor.NewPICreditProcessor,
bu_subsidy_processor.BUSubsidyProcessor,
prepayment_processor.PrepaymentProcessor,
royalty_processor.RoyaltyProcessor,
]


Expand Down Expand Up @@ -97,6 +100,7 @@ def main():
MOCA_prepaid_invoice.MOCAPrepaidInvoice,
prepay_credits_snapshot.PrepayCreditsSnapshot,
ocp_test_invoice.OcpTestInvoice,
royalty_invoice.RoyaltyInvoice,
],
invoice_settings.upload_to_s3,
)
Expand Down
16 changes: 15 additions & 1 deletion process_report/processors/coldfront_fetch_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
CF_ATTR_ALLOCATED_PROJECT_ID = "Allocated Project ID"
CF_ATTR_INSTITUTION_SPECIFIC_CODE = "Institution-Specific Code"
CF_ATTR_IS_COURSE = "Is Course?"
CF_ATTR_IS_EXTERNALLY_FUNDED = "Is Externally Funded"


@dataclass
Expand All @@ -34,7 +35,10 @@ class ColdfrontFetchProcessor(processor.Processor):
)
coldfront_data_filepath: str = invoice_settings.coldfront_api_filepath

initializes_columns = (invoice.IS_COURSE_COLUMN,)
initializes_columns = (
invoice.IS_COURSE_COLUMN,
invoice.IS_EXTERNALLY_FUNDED_COLUMN,
)
operates_on_columns = (
*initializes_columns,
invoice.PROJECT_COLUMN,
Expand Down Expand Up @@ -125,12 +129,19 @@ def _get_allocation_data(self, coldfront_api_data):
project_dict["attributes"].get(CF_ATTR_IS_COURSE, "No").lower()
== "yes"
)
is_externally_funded = (
project_dict["project"]["attributes"]
.get(CF_ATTR_IS_EXTERNALLY_FUNDED, "No")
.lower()
== "yes"
)
allocation_data[(project_id, cluster_name)] = {
invoice.PROJECT_FIELD: project_name,
invoice.PI_FIELD: pi_name,
invoice.INSTITUTION_ID_FIELD: institute_code,
invoice.CLUSTER_NAME_FIELD: cluster_name,
invoice.IS_COURSE_FIELD: is_course,
invoice.IS_EXTERNALLY_FUNDED_FIELD: is_externally_funded,
}
except KeyError:
continue
Expand Down Expand Up @@ -164,6 +175,9 @@ def _apply_allocation_data(self, allocation_data):
invoice.INSTITUTION_ID_FIELD
]
self.data.loc[mask, invoice.IS_COURSE_FIELD] = data[invoice.IS_COURSE_FIELD]
self.data.loc[mask, invoice.IS_EXTERNALLY_FUNDED_FIELD] = data[
invoice.IS_EXTERNALLY_FUNDED_FIELD
]

def _process(self):
api_data = self._get_coldfront_api_data()
Expand Down
2 changes: 1 addition & 1 deletion process_report/processors/prepayment_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

@dataclass
class PrepaymentProcessor(discount_processor.DiscountProcessor):
IS_DISCOUNT_BY_NERC = True
IS_DISCOUNT_BY_NERC = False

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change was to address the fact that prepayments are still subject to the royalty. With this change, the prepay discount will no longer impact the Balance column, which we've used to indicate the money we (the MOC) expect to obtain, which is our revenue, which is the actually value subject to the royalty, as opposed to the money PIs are expected to pay (PI Balance), which are not subject to the royalty.

This change will impact how the PI-specific invoices are exported, and as mentioned by @knikolla in slack, we may want to review how we handle these balance columns and refactor them.

@knikolla @naved001 Given that we have no prepayments yet, are we fine with postponing refactoring the balance fields after this PR is merged?

PREPAY_DEBITS_S3_FILEPATH = "Prepay/prepay_debits.csv"

initializes_columns = (
Expand Down
44 changes: 44 additions & 0 deletions process_report/processors/royalty_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from decimal import Decimal
import logging
from dataclasses import dataclass, field

from process_report.loader import loader
from process_report.settings import invoice_settings
from process_report.invoices import invoice
from process_report.processors import processor


logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


@dataclass
class RoyaltyProcessor(processor.Processor):
"""
Given a percentage royalty rate and list of exemept institutions, creates a new `Royalty` column equal to `Balance` * royalty_rate
"""

royalty_rate: Decimal = invoice_settings.royalty_rate
royalty_exempt_institution_list: tuple[str] = field(
default_factory=loader.get_royalty_exempt_institutions_list
)

initializes_columns = (invoice.ROYALTY_COLUMN,)
operates_on_columns = (
*initializes_columns,
invoice.INSTITUTION_COLUMN,
invoice.IS_EXTERNALLY_FUNDED_COLUMN,
invoice.BALANCE_COLUMN,
)

def _process(self):
non_moc_member_mask = ~self.data[invoice.INSTITUTION_FIELD].isin(
self.royalty_exempt_institution_list
)
externally_funded_mask = self.data[invoice.INSTITUTION_FIELD].isin(
self.royalty_exempt_institution_list
) & (self.data[invoice.IS_EXTERNALLY_FUNDED_FIELD] == True) # noqa: E712

self.data[invoice.ROYALTY_FIELD] = (
self.data[invoice.BALANCE_FIELD] * self.royalty_rate
).where(non_moc_member_mask | externally_funded_mask)
4 changes: 4 additions & 0 deletions process_report/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class Settings(BaseSettings):
prepay_credits_filepath: str = "prepaid_credits.csv"
prepay_contacts_filepath: str = "prepaid_contacts.csv"

# Royalty configuration
royalty_rate: Decimal = Decimal("0.00")
royalty_exempt_institutions_filepath: str = "royalty_exempt_institutions.txt"

# nerc_rates info
new_pi_credit_amount: Decimal | None = None
limit_new_pi_credit_to_partners: bool | None = None
Expand Down
2 changes: 2 additions & 0 deletions process_report/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
"MGHPCC Managed": BOOL_FIELD_TYPE,
"Cluster Name": STRING_FIELD_TYPE,
"Is Course": BOOL_FIELD_TYPE,
"Is Externally Funded": BOOL_FIELD_TYPE,
"Royalty": BALANCE_FIELD_TYPE,
}


Expand Down
11 changes: 8 additions & 3 deletions process_report/tests/e2e/test_data/test_coldfront_api_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
{
"id": 1,
"project": {
"pi": "pi1@bu.edu"
"pi": "pi1@bu.edu",
"attributes": {
"Is Externally Funded": "Yes"
}
},
"resource": {
"name": "shift"
Expand All @@ -15,7 +18,8 @@
{
"id": 1,
"project": {
"pi": "pi1@bu.edu"
"pi": "pi1@bu.edu",
"attributes": {}
},
"resource": {
"name": "shift"
Expand All @@ -28,7 +32,8 @@
{
"id": 1,
"project": {
"pi": "pi2@harvard.edu"
"pi": "pi2@harvard.edu",
"attributes": {}
},
"resource": {
"name": "shift"
Expand Down
3 changes: 3 additions & 0 deletions process_report/tests/e2e/test_e2e_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"MOCA-A_Prepaid_Groups-2025-06-Invoice.csv",
"NERC_Prepaid_Group-Credits-2025-06.csv",
"OCP_TEST 2025-06.csv",
"Royalties 2025-06.csv",
]

EXPECTED_DIRECTORIES = ["pi_invoices"]
Expand Down Expand Up @@ -127,6 +128,8 @@ def _prepare_pipeline_execution(
env["nonbillable_pis_filepath"] = str(test_files["test_pi.yaml"])
env["nonbillable_projects_filepath"] = str(test_files["test_projects.yaml"])

env["ROYALTY_RATE"] = "0.1"

# Fallback ensures test works even when CI environment doesn't set Chrome path
env.setdefault("CHROME_BIN_PATH", "/usr/bin/chromium")
env["PYTHONPATH"] = str(project_root) + ":" + env.get("PYTHONPATH", "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def _get_test_invoice(
institute_code=None,
cluster_name=None,
is_course=None,
externally_funded=None,
):
if not pi:
pi = [""] * len(allocation_project_id)
Expand All @@ -31,6 +32,9 @@ def _get_test_invoice(
if not is_course:
is_course = [False] * len(allocation_project_id)

if not externally_funded:
externally_funded = [False] * len(allocation_project_id)

return self.create_test_invoice(
{
"Manager (PI)": pi,
Expand All @@ -39,6 +43,7 @@ def _get_test_invoice(
"Institution - Specific Code": institute_code,
"Cluster Name": cluster_name,
"Is Course": is_course,
"Is Externally Funded": externally_funded,
}
)

Expand All @@ -49,6 +54,7 @@ def _get_mock_allocation_data(
institute_code_list,
cluster_list,
is_course_list=None,
externally_funded_list=None,
):
mock_data = []
for i, project in enumerate(project_id_list):
Expand All @@ -58,6 +64,7 @@ def _get_mock_allocation_data(
},
"project": {
"pi": pi_list[i],
"attributes": {},
},
"attributes": {
"Allocated Project ID": project,
Expand All @@ -69,6 +76,11 @@ def _get_mock_allocation_data(
if is_course_list:
mock_project_dict["attributes"]["Is Course?"] = is_course_list[i]

if externally_funded_list:
mock_project_dict["project"]["attributes"]["Is Externally Funded"] = (
externally_funded_list[i]
)

mock_data.append(mock_project_dict)

return mock_data
Expand All @@ -93,6 +105,7 @@ def test_coldfront_fetch(self, mock_get_allocation_data):
["IC1", "IC1", "", "", "IC2"],
["stack"] * 5,
is_course=[False] * 5,
externally_funded=[False] * 5,
)
test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
data=test_invoice
Expand Down Expand Up @@ -231,6 +244,65 @@ def test_is_course_values(self, mock_get_allocation_data):
output_invoice = test_coldfront_fetch_proc.data
assert output_invoice.equals(answer_invoice)

@mock.patch(
"process_report.processors.coldfront_fetch_processor.ColdfrontFetchProcessor._fetch_coldfront_allocation_api",
)
def test_is_externally_funded_default(self, mock_get_allocation_data):
"""If 'Is Externally Funded' is not set in the API data, default to False"""
mock_get_allocation_data.return_value = self._get_mock_allocation_data(
["P1", "P2", "P3"],
["PI1", "PI2", "PI3"],
["IC1", "IC2", "IC3"],
["stack", "stack", "stack"],
)
test_invoice = self._get_test_invoice(
["P1", "P2", "P3"], cluster_name=["stack", "stack", "stack"]
)
answer_invoice = self._get_test_invoice(
["P1", "P2", "P3"],
["P1-name", "P2-name", "P3-name"],
["PI1", "PI2", "PI3"],
["IC1", "IC2", "IC3"],
["stack", "stack", "stack"],
externally_funded=[False, False, False],
)
test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
data=test_invoice
)
test_coldfront_fetch_proc.process()
output_invoice = test_coldfront_fetch_proc.data
assert output_invoice.equals(answer_invoice)

@mock.patch(
"process_report.processors.coldfront_fetch_processor.ColdfrontFetchProcessor._fetch_coldfront_allocation_api",
)
def test_is_externally_funded_values(self, mock_get_allocation_data):
"""If 'Is Externally Funded' is set in the API data, the output 'Is Externally Funded' column reflects True/False"""
mock_get_allocation_data.return_value = self._get_mock_allocation_data(
["P1", "P2", "P3"],
["PI1", "PI2", "PI3"],
["IC1", "IC2", "IC3"],
["stack", "stack", "stack"],
externally_funded_list=["Yes", "No", "yes"],
)
test_invoice = self._get_test_invoice(
["P1", "P2", "P3"], cluster_name=["stack", "stack", "stack"]
)
answer_invoice = self._get_test_invoice(
["P1", "P2", "P3"],
["P1-name", "P2-name", "P3-name"],
["PI1", "PI2", "PI3"],
["IC1", "IC2", "IC3"],
["stack", "stack", "stack"],
externally_funded=[True, False, True],
)
test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
data=test_invoice
)
test_coldfront_fetch_proc.process()
output_invoice = test_coldfront_fetch_proc.data
assert output_invoice.equals(answer_invoice)

@mock.patch(
"process_report.processors.coldfront_fetch_processor.ColdfrontFetchProcessor._fetch_coldfront_allocation_api",
)
Expand Down
Loading