Skip to content

Commit 07a93d0

Browse files
committed
Fixes #180
Resolved future warnings regarding dtype compatibility for full pipeline runs Added unit test that test whether invoice processing runs without raising errors
1 parent 4b17726 commit 07a93d0

4 files changed

Lines changed: 266 additions & 15 deletions

File tree

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
import os
2+
import sys
3+
from dataclasses import dataclass
4+
import subprocess
5+
import tempfile
6+
import logging
7+
8+
import pandas
9+
from jinja2 import Environment, FileSystemLoader
10+
11+
import process_report.invoices.invoice as invoice
12+
import process_report.util as util
13+
14+
15+
TEMPLATE_DIR_PATH = "process_report/templates"
16+
17+
18+
logger = logging.getLogger(__name__)
19+
logging.basicConfig(level=logging.INFO)
20+
21+
22+
@dataclass
23+
class PIInvoice(invoice.Invoice):
24+
"""
25+
This invoice operates on data processed by these Processors:
26+
- ValidateBillablePIsProcessor
27+
- NewPICreditProcessor
28+
"""
29+
30+
TOTAL_COLUMN_LIST = [
31+
invoice.COST_FIELD,
32+
invoice.CREDIT_FIELD,
33+
invoice.BALANCE_FIELD,
34+
]
35+
36+
DOLLAR_COLUMN_LIST = [
37+
invoice.RATE_FIELD,
38+
invoice.GROUP_BALANCE_FIELD,
39+
invoice.COST_FIELD,
40+
invoice.GROUP_BALANCE_USED_FIELD,
41+
invoice.CREDIT_FIELD,
42+
invoice.BALANCE_FIELD,
43+
]
44+
45+
export_columns_list = [
46+
invoice.INVOICE_DATE_FIELD,
47+
invoice.PROJECT_FIELD,
48+
invoice.PROJECT_ID_FIELD,
49+
invoice.PI_FIELD,
50+
invoice.INVOICE_EMAIL_FIELD,
51+
invoice.INVOICE_ADDRESS_FIELD,
52+
invoice.INSTITUTION_FIELD,
53+
invoice.INSTITUTION_ID_FIELD,
54+
invoice.SU_HOURS_FIELD,
55+
invoice.SU_TYPE_FIELD,
56+
invoice.RATE_FIELD,
57+
invoice.GROUP_NAME_FIELD,
58+
invoice.GROUP_INSTITUTION_FIELD,
59+
invoice.GROUP_BALANCE_FIELD,
60+
invoice.COST_FIELD,
61+
invoice.GROUP_BALANCE_USED_FIELD,
62+
invoice.CREDIT_FIELD,
63+
invoice.CREDIT_CODE_FIELD,
64+
invoice.BALANCE_FIELD,
65+
]
66+
67+
def _prepare(self):
68+
self.export_data = self.data[
69+
self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD]
70+
]
71+
self.pi_list = self.export_data[invoice.PI_FIELD].unique()
72+
73+
def _get_pi_dataframe(self, data, pi):
74+
pi_projects = data[data[invoice.PI_FIELD] == pi].copy().reset_index(drop=True)
75+
76+
# Remove prepay group data if it's empty
77+
if pandas.isna(pi_projects[invoice.GROUP_NAME_FIELD]).all():
78+
pi_projects = pi_projects.drop(
79+
[
80+
invoice.GROUP_NAME_FIELD,
81+
invoice.GROUP_INSTITUTION_FIELD,
82+
invoice.GROUP_BALANCE_FIELD,
83+
invoice.GROUP_BALANCE_USED_FIELD,
84+
],
85+
axis=1,
86+
)
87+
88+
# Add a row containing sums for certain columns
89+
column_sums = []
90+
sum_columns_list = []
91+
for column_name in self.TOTAL_COLUMN_LIST:
92+
if column_name in pi_projects.columns:
93+
column_sums.append(pi_projects[column_name].sum())
94+
sum_columns_list.append(column_name)
95+
96+
# Create a new row with proper dtypes
97+
new_row = {col: None for col in pi_projects.columns}
98+
99+
# Add Invoice Month column if it doesn't exist
100+
if invoice.INVOICE_DATE_FIELD not in pi_projects.columns:
101+
pi_projects[invoice.INVOICE_DATE_FIELD] = None
102+
new_row[invoice.INVOICE_DATE_FIELD] = None
103+
104+
new_row[invoice.INVOICE_DATE_FIELD] = "Total"
105+
for col, val in zip(sum_columns_list, column_sums):
106+
new_row[col] = val
107+
108+
# Convert all columns to object type before concatenation to avoid dtype warnings
109+
pi_projects = pi_projects.astype("object")
110+
111+
# Add the totals row
112+
pi_projects = pandas.concat(
113+
[pi_projects, pandas.DataFrame([new_row]).astype("object")],
114+
ignore_index=True,
115+
)
116+
117+
# Add dollar sign to certain columns
118+
for column_name in self.DOLLAR_COLUMN_LIST:
119+
if column_name in pi_projects.columns:
120+
pi_projects[column_name] = pi_projects[column_name].apply(
121+
lambda data: data if pandas.isna(data) else f"${float(data)}"
122+
)
123+
124+
# Convert all numeric columns to strings before filling NA values
125+
# This prevents dtype incompatibility warnings
126+
for col in pi_projects.columns:
127+
# First ensure all columns are object type
128+
if pi_projects[col].dtype.name.startswith(("float", "int")):
129+
pi_projects[col] = pi_projects[col].astype("object")
130+
131+
# Then fill NA values with empty strings
132+
pi_projects[col] = pi_projects[col].fillna("")
133+
134+
return pi_projects
135+
136+
def export(self):
137+
def _create_html_invoice(temp_fd):
138+
environment = Environment(loader=FileSystemLoader(TEMPLATE_DIR_PATH))
139+
template = environment.get_template("pi_invoice.html")
140+
content = template.render(
141+
data=pi_dataframe,
142+
)
143+
temp_fd.write(content)
144+
temp_fd.flush()
145+
146+
def _create_pdf_invoice(temp_fd_name):
147+
chrome_binary_location = os.environ.get(
148+
"CHROME_BIN_PATH", "/usr/bin/chromium"
149+
)
150+
if not os.path.exists(chrome_binary_location):
151+
sys.exit(
152+
f"Chrome binary does not exist at {chrome_binary_location}. Make sure the env var CHROME_BIN_PATH is set correctly and that Google Chrome is installed"
153+
)
154+
155+
invoice_pdf_path = (
156+
f"{self.name}/{pi_instituition}_{pi}_{self.invoice_month}.pdf"
157+
)
158+
subprocess.run(
159+
[
160+
chrome_binary_location,
161+
"--headless",
162+
"--no-sandbox",
163+
f"--print-to-pdf={invoice_pdf_path}",
164+
"--no-pdf-header-footer",
165+
f"file://{temp_fd_name}",
166+
],
167+
capture_output=True,
168+
)
169+
170+
self._filter_columns()
171+
172+
# self.name is name of folder storing invoices
173+
os.makedirs(self.name, exist_ok=True)
174+
175+
for pi in self.pi_list:
176+
if pandas.isna(pi):
177+
continue
178+
179+
pi_dataframe = self._get_pi_dataframe(self.export_data, pi)
180+
pi_instituition = pi_dataframe[invoice.INSTITUTION_FIELD].iat[0]
181+
182+
with tempfile.NamedTemporaryFile(mode="w", suffix=".html") as temp_fd:
183+
_create_html_invoice(temp_fd)
184+
_create_pdf_invoice(temp_fd.name)
185+
186+
def export_s3(self, s3_bucket):
187+
def _export_s3_pi_invoice(pi_invoice):
188+
pi_invoice_path = os.path.join(self.name, pi_invoice)
189+
striped_invoice_path = os.path.splitext(pi_invoice_path)[0]
190+
output_s3_path = f"Invoices/{self.invoice_month}/{striped_invoice_path}.pdf"
191+
output_s3_archive_path = f"Invoices/{self.invoice_month}/Archive/{striped_invoice_path} {util.get_iso8601_time()}.pdf"
192+
s3_bucket.upload_file(pi_invoice_path, output_s3_path)
193+
s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path)
194+
195+
for pi_invoice in os.listdir(self.name):
196+
_export_s3_pi_invoice(pi_invoice)

process_report/invoices/pi_specific_invoice.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,18 @@ def _get_pi_dataframe(self, data, pi):
9393
if column_name in pi_projects.columns:
9494
column_sums.append(pi_projects[column_name].sum())
9595
sum_columns_list.append(column_name)
96-
pi_projects.loc[len(pi_projects)] = (
97-
None # Adds a new row to end of dataframe initialized with None
98-
)
99-
pi_projects.loc[pi_projects.index[-1], invoice.INVOICE_DATE_FIELD] = "Total"
100-
pi_projects.loc[pi_projects.index[-1], sum_columns_list] = column_sums
96+
97+
# Copy the first row and modify values to keep row formatting
98+
totals_row = pi_projects.iloc[[0]].copy()
99+
# Clear all values to empty strings
100+
for col in totals_row.columns:
101+
totals_row[col] = ""
102+
103+
totals_row[invoice.INVOICE_DATE_FIELD] = "Total"
104+
for col, sum_val in zip(sum_columns_list, column_sums):
105+
totals_row[col] = sum_val
106+
107+
pi_projects = pandas.concat([pi_projects, totals_row], ignore_index=True)
101108

102109
# Add dollar sign to certain columns
103110
for column_name in self.DOLLAR_COLUMN_LIST:
@@ -106,7 +113,11 @@ def _get_pi_dataframe(self, data, pi):
106113
lambda data: data if pandas.isna(data) else f"${data}"
107114
)
108115

109-
pi_projects.fillna("", inplace=True)
116+
# Convert to StringDtype for template compatibility before filling NA values
117+
pi_projects = pi_projects.astype(pandas.StringDtype())
118+
119+
# Convert any remaining pandas NA values to empty strings for template compatibility
120+
pi_projects = pi_projects.fillna("")
110121

111122
return pi_projects
112123

@@ -167,4 +178,4 @@ def _export_s3_pi_invoice(pi_invoice):
167178
s3_bucket.upload_file(pi_invoice_path, output_s3_archive_path)
168179

169180
for pi_invoice in os.listdir(self.name):
170-
_export_s3_pi_invoice(pi_invoice)
181+
_export_s3_pi_invoice(pi_invoice)

process_report/process_report.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,10 @@ def merge_csv(files):
411411
dataframe = pandas.read_csv(
412412
file,
413413
dtype={
414+
414415
invoice.COST_FIELD: pandas.ArrowDtype(pyarrow.decimal128(12, 2)),
415416
invoice.RATE_FIELD: str,
417+
416418
},
417419
)
418420
dataframes.append(dataframe)

process_report/tests/unit/invoices/test_pi_specific_invoice.py

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,23 @@ def add_dollar_sign(data):
5757
[100, 200, 300, 400],
5858
group_name=[None, "G1", None, None],
5959
)
60+
6061
answer_invoice_pi1 = (
6162
test_invoice[test_invoice["Manager (PI)"] == "PI1"]
6263
.copy()
6364
.reset_index(drop=True)
6465
)
65-
answer_invoice_pi1.loc[len(answer_invoice_pi1)] = None
66-
answer_invoice_pi1.loc[
67-
answer_invoice_pi1.index[-1], ["Invoice Month", "Balance"]
68-
] = ["Total", 300]
66+
# Create totals row by copying first row and modifying
67+
totals_row = answer_invoice_pi1.iloc[[0]].copy()
68+
for col in totals_row.columns:
69+
totals_row[col] = ""
70+
totals_row["Invoice Month"] = "Total"
71+
totals_row["Balance"] = 300
72+
answer_invoice_pi1 = pandas.concat(
73+
[answer_invoice_pi1, totals_row], ignore_index=True
74+
)
75+
76+
# Apply dollar formatting
6977
for column_name in [
7078
"Prepaid Group Balance",
7179
"Prepaid Group Used",
@@ -74,17 +82,26 @@ def add_dollar_sign(data):
7482
answer_invoice_pi1[column_name] = answer_invoice_pi1[column_name].apply(
7583
add_dollar_sign
7684
)
85+
answer_invoice_pi1 = answer_invoice_pi1.astype(pandas.StringDtype())
7786
answer_invoice_pi1.fillna("", inplace=True)
7887

7988
answer_invoice_pi2 = (
8089
test_invoice[test_invoice["Manager (PI)"] == "PI2"]
8190
.copy()
8291
.reset_index(drop=True)
8392
)
84-
answer_invoice_pi2.loc[len(answer_invoice_pi2)] = None
85-
answer_invoice_pi2.loc[
86-
answer_invoice_pi2.index[-1], ["Invoice Month", "Balance"]
87-
] = ["Total", 700]
93+
94+
# Create totals row by copying first row and modifying to preserve formatting
95+
totals_row = answer_invoice_pi2.iloc[[0]].copy()
96+
for col in totals_row.columns:
97+
totals_row[col] = ""
98+
totals_row["Invoice Month"] = "Total"
99+
totals_row["Balance"] = 700
100+
answer_invoice_pi2 = pandas.concat(
101+
[answer_invoice_pi2, totals_row], ignore_index=True
102+
)
103+
104+
# Drop prepay columns (they're all NA for PI2)
88105
answer_invoice_pi2 = answer_invoice_pi2.drop(
89106
[
90107
"Prepaid Group Name",
@@ -97,6 +114,7 @@ def add_dollar_sign(data):
97114
answer_invoice_pi2["Balance"] = answer_invoice_pi2["Balance"].apply(
98115
add_dollar_sign
99116
)
117+
answer_invoice_pi2 = answer_invoice_pi2.astype(pandas.StringDtype())
100118
answer_invoice_pi2.fillna("", inplace=True)
101119

102120
pi_inv = test_utils.new_pi_specific_invoice(data=test_invoice)
@@ -145,3 +163,27 @@ def test_export_pi(self, mock_subprocess_run, mock_path_exists, mock_filter_cols
145163
"--no-pdf-header-footer",
146164
]
147165
self.assertTrue(answer_arglist == chrome_arglist[0][:-1])
166+
167+
@mock.patch("process_report.invoices.invoice.Invoice._filter_columns")
168+
@mock.patch("os.path.exists")
169+
@mock.patch("subprocess.run")
170+
def test_process_no_warnings(
171+
self, mock_subprocess_run, mock_path_exists, mock_filter_cols
172+
):
173+
"""Test that no warnings are raised during invoice processing"""
174+
invoice_month = "2024-10"
175+
test_invoice = self._get_test_invoice(
176+
["PI1", "PI1", "PI2", "PI2"],
177+
["BU", "BU", "HU", "HU"],
178+
[100, 200, 300, 400],
179+
group_name=[None, "G1", None, None],
180+
)
181+
with tempfile.TemporaryDirectory() as test_dir:
182+
pi_inv = test_utils.new_pi_specific_invoice(
183+
test_dir, invoice_month, data=test_invoice
184+
)
185+
with self.assertNoLogs(
186+
"process_report.invoices.pi_specific_invoice", level="WARNING"
187+
):
188+
pi_inv.process()
189+
pi_inv.export()

0 commit comments

Comments
 (0)