Skip to content

Commit ae8ff45

Browse files
committed
Resolved future warnings regarding dtype compatibility
Rebased ontop of latest upstream branch Updated dtypes in pipeline and in unit tests to strings instead of objects
1 parent 030ac24 commit ae8ff45

5 files changed

Lines changed: 64 additions & 14 deletions

File tree

process_report/invoices/pi_specific_invoice.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,28 @@ def _get_pi_dataframe(self, data, pi):
8585
axis=1,
8686
)
8787

88-
# Add a row containing sums for certain columns
89-
column_sums = []
90-
sum_columns_list = []
88+
# Create new row with proper dtypes instead of concatenating with None
89+
# Calculate totals first
90+
column_sums = {}
9191
for column_name in self.TOTAL_COLUMN_LIST:
9292
if column_name in pi_projects.columns:
93-
column_sums.append(pi_projects[column_name].sum())
94-
sum_columns_list.append(column_name)
93+
column_sums[column_name] = pi_projects[column_name].sum()
9594

96-
# Add a row with None values (this will convert int64 columns to float64 and bool to object)
97-
pi_projects.loc[len(pi_projects)] = None
95+
# Use pandas.concat with future-compatible approach
96+
# Create an empty row first, then populate it
97+
pi_projects = pi_projects.copy() # Ensure we have a copy
98+
new_index = len(pi_projects)
99+
100+
# Add empty row by reindexing
101+
pi_projects = pi_projects.reindex(range(len(pi_projects) + 1))
98102

99103
# Set Invoice Month and totals - add Invoice Month column if it doesn't exist
100104
if invoice.INVOICE_DATE_FIELD not in pi_projects.columns:
101-
pi_projects[invoice.INVOICE_DATE_FIELD] = None
105+
pi_projects[invoice.INVOICE_DATE_FIELD] = ""
102106

103-
pi_projects.loc[pi_projects.index[-1], invoice.INVOICE_DATE_FIELD] = "Total"
104-
for col, val in zip(sum_columns_list, column_sums):
105-
pi_projects.loc[pi_projects.index[-1], col] = val
107+
pi_projects.loc[new_index, invoice.INVOICE_DATE_FIELD] = "Total"
108+
for col, val in column_sums.items():
109+
pi_projects.loc[new_index, col] = val
106110

107111
# Add dollar sign to certain columns
108112
for column_name in self.DOLLAR_COLUMN_LIST:
@@ -111,7 +115,14 @@ def _get_pi_dataframe(self, data, pi):
111115
lambda data: data if pandas.isna(data) else f"${data}"
112116
)
113117

114-
pi_projects.fillna("", inplace=True)
118+
# Fill NaN values selectively - only fill non-numeric columns with empty strings
119+
# Keep numeric columns as they are to preserve their dtypes
120+
for col in pi_projects.columns:
121+
if not pandas.api.types.is_numeric_dtype(pi_projects[col]):
122+
pi_projects[col] = pi_projects[col].fillna("")
123+
124+
# Convert any remaining pandas NA values to empty strings for template compatibility
125+
pi_projects = pi_projects.fillna("")
115126

116127
return pi_projects
117128

process_report/process_report.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,8 @@ def merge_csv(files):
440440
dtype={
441441
COST_FIELD: pandas.ArrowDtype(pyarrow.decimal128(12, 2)),
442442
RATE_FIELD: str,
443+
PI_FIELD: "string", # Use pandas string dtype for proper string handling
444+
INSTITUTION_ID_FIELD: "string",
443445
},
444446
)
445447
dataframes.append(dataframe)

process_report/processors/coldfront_fetch_processor.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,14 @@ def _validate_allocation_data(self, allocation_data):
116116
)
117117

118118
def _apply_allocation_data(self, allocation_data):
119+
# Convert columns to string dtype to handle string values properly
120+
if invoice.PI_FIELD in self.data.columns:
121+
self.data[invoice.PI_FIELD] = self.data[invoice.PI_FIELD].astype("string")
122+
if invoice.INSTITUTION_ID_FIELD in self.data.columns:
123+
self.data[invoice.INSTITUTION_ID_FIELD] = self.data[
124+
invoice.INSTITUTION_ID_FIELD
125+
].astype("string")
126+
119127
for project_id, data in allocation_data.items():
120128
mask = self.data[invoice.PROJECT_ID_FIELD] == project_id
121129
self.data.loc[mask, invoice.PROJECT_FIELD] = data[invoice.PROJECT_FIELD]

process_report/processors/discount_processor.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,26 @@ def apply_discount_on_project(remaining_discount_amount, project_i, project):
5151
remaining_project_balance = project[pi_balance_field]
5252
applied_discount = min(remaining_project_balance, remaining_discount_amount)
5353
invoice.at[project_i, discount_field] = applied_discount
54-
invoice.at[project_i, pi_balance_field] -= applied_discount
54+
55+
# Convert applied_discount to the same dtype as the balance columns
56+
pi_balance_dtype = invoice[pi_balance_field].dtype
57+
balance_dtype = invoice[balance_field].dtype
58+
59+
applied_discount_pi = applied_discount
60+
if hasattr(applied_discount, "astype"):
61+
applied_discount_pi = applied_discount.astype(pi_balance_dtype)
62+
elif not isinstance(applied_discount, pi_balance_dtype.type):
63+
applied_discount_pi = pi_balance_dtype.type(applied_discount)
64+
65+
applied_discount_balance = applied_discount
66+
if hasattr(applied_discount, "astype"):
67+
applied_discount_balance = applied_discount.astype(balance_dtype)
68+
elif not isinstance(applied_discount, balance_dtype.type):
69+
applied_discount_balance = balance_dtype.type(applied_discount)
70+
71+
invoice.at[project_i, pi_balance_field] -= applied_discount_pi
5572
if self.IS_DISCOUNT_BY_NERC:
56-
invoice.at[project_i, balance_field] -= applied_discount
73+
invoice.at[project_i, balance_field] -= applied_discount_balance
5774
remaining_discount_amount -= applied_discount
5875
return remaining_discount_amount
5976

process_report/tests/unit/processors/test_coldfront_fetch_processor.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ def test_coldfront_fetch(self, mock_get_allocation_data):
6969
["PI1", "PI1", "PI1", "", "PI12"],
7070
["IC1", "IC1", "", "", "IC2"],
7171
)
72+
# Convert columns to string dtype to match processor output
73+
answer_invoice["Manager (PI)"] = answer_invoice["Manager (PI)"].astype("string")
74+
answer_invoice["Institution - Specific Code"] = answer_invoice[
75+
"Institution - Specific Code"
76+
].astype("string")
77+
7278
test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
7379
data=test_invoice
7480
)
@@ -123,6 +129,12 @@ def test_nonbillable_clusters(self, mock_get_allocation_data):
123129
["IC1", "IC2", "", ""],
124130
["ocp-prod", "stack", "ocp-test", "ocp-test"],
125131
)
132+
# Convert columns to string dtype to match processor output
133+
answer_invoice["Manager (PI)"] = answer_invoice["Manager (PI)"].astype("string")
134+
answer_invoice["Institution - Specific Code"] = answer_invoice[
135+
"Institution - Specific Code"
136+
].astype("string")
137+
126138
test_coldfront_fetch_proc = test_utils.new_coldfront_fetch_processor(
127139
data=test_invoice
128140
)

0 commit comments

Comments
 (0)