1+ import os
2+ import sys
3+ from dataclasses import dataclass
4+ import subprocess
5+ import tempfile
6+ import logging
7+
8+ import pandas
9+ from jinja2 import Environment , FileSystemLoader
10+
11+ import process_report .invoices .invoice as invoice
12+ import process_report .util as util
13+
14+
15+ TEMPLATE_DIR_PATH = "process_report/templates"
16+
17+
18+ logger = logging .getLogger (__name__ )
19+ logging .basicConfig (level = logging .INFO )
20+
21+
22+ @dataclass
23+ class PIInvoice (invoice .Invoice ):
24+ """
25+ This invoice operates on data processed by these Processors:
26+ - ValidateBillablePIsProcessor
27+ - NewPICreditProcessor
28+ """
29+
30+ TOTAL_COLUMN_LIST = [
31+ invoice .COST_FIELD ,
32+ invoice .CREDIT_FIELD ,
33+ invoice .BALANCE_FIELD ,
34+ ]
35+
36+ DOLLAR_COLUMN_LIST = [
37+ invoice .RATE_FIELD ,
38+ invoice .GROUP_BALANCE_FIELD ,
39+ invoice .COST_FIELD ,
40+ invoice .GROUP_BALANCE_USED_FIELD ,
41+ invoice .CREDIT_FIELD ,
42+ invoice .BALANCE_FIELD ,
43+ ]
44+
45+ export_columns_list = [
46+ invoice .INVOICE_DATE_FIELD ,
47+ invoice .PROJECT_FIELD ,
48+ invoice .PROJECT_ID_FIELD ,
49+ invoice .PI_FIELD ,
50+ invoice .INVOICE_EMAIL_FIELD ,
51+ invoice .INVOICE_ADDRESS_FIELD ,
52+ invoice .INSTITUTION_FIELD ,
53+ invoice .INSTITUTION_ID_FIELD ,
54+ invoice .SU_HOURS_FIELD ,
55+ invoice .SU_TYPE_FIELD ,
56+ invoice .RATE_FIELD ,
57+ invoice .GROUP_NAME_FIELD ,
58+ invoice .GROUP_INSTITUTION_FIELD ,
59+ invoice .GROUP_BALANCE_FIELD ,
60+ invoice .COST_FIELD ,
61+ invoice .GROUP_BALANCE_USED_FIELD ,
62+ invoice .CREDIT_FIELD ,
63+ invoice .CREDIT_CODE_FIELD ,
64+ invoice .BALANCE_FIELD ,
65+ ]
66+
67+ def _prepare (self ):
68+ self .export_data = self .data [
69+ self .data [invoice .IS_BILLABLE_FIELD ] & ~ self .data [invoice .MISSING_PI_FIELD ]
70+ ]
71+ self .pi_list = self .export_data [invoice .PI_FIELD ].unique ()
72+
73+ def _get_pi_dataframe (self , data , pi ):
74+ pi_projects = data [data [invoice .PI_FIELD ] == pi ].copy ().reset_index (drop = True )
75+
76+ # Remove prepay group data if it's empty
77+ if pandas .isna (pi_projects [invoice .GROUP_NAME_FIELD ]).all ():
78+ pi_projects = pi_projects .drop (
79+ [
80+ invoice .GROUP_NAME_FIELD ,
81+ invoice .GROUP_INSTITUTION_FIELD ,
82+ invoice .GROUP_BALANCE_FIELD ,
83+ invoice .GROUP_BALANCE_USED_FIELD ,
84+ ],
85+ axis = 1 ,
86+ )
87+
88+ # Add a row containing sums for certain columns
89+ column_sums = []
90+ sum_columns_list = []
91+ for column_name in self .TOTAL_COLUMN_LIST :
92+ if column_name in pi_projects .columns :
93+ column_sums .append (pi_projects [column_name ].sum ())
94+ sum_columns_list .append (column_name )
95+
96+ # Create a new row with proper dtypes
97+ new_row = {col : None for col in pi_projects .columns }
98+
99+ # Add Invoice Month column if it doesn't exist
100+ if invoice .INVOICE_DATE_FIELD not in pi_projects .columns :
101+ pi_projects [invoice .INVOICE_DATE_FIELD ] = None
102+ new_row [invoice .INVOICE_DATE_FIELD ] = None
103+
104+ new_row [invoice .INVOICE_DATE_FIELD ] = "Total"
105+ for col , val in zip (sum_columns_list , column_sums ):
106+ new_row [col ] = val
107+
108+ # Convert all columns to object type before concatenation to avoid dtype warnings
109+ pi_projects = pi_projects .astype ("object" )
110+
111+ # Add the totals row
112+ pi_projects = pandas .concat (
113+ [pi_projects , pandas .DataFrame ([new_row ]).astype ("object" )],
114+ ignore_index = True ,
115+ )
116+
117+ # Add dollar sign to certain columns
118+ for column_name in self .DOLLAR_COLUMN_LIST :
119+ if column_name in pi_projects .columns :
120+ pi_projects [column_name ] = pi_projects [column_name ].apply (
121+ lambda data : data if pandas .isna (data ) else f"${ float (data )} "
122+ )
123+
124+ # Convert all numeric columns to strings before filling NA values
125+ # This prevents dtype incompatibility warnings
126+ for col in pi_projects .columns :
127+ # First ensure all columns are object type
128+ if pi_projects [col ].dtype .name .startswith (("float" , "int" )):
129+ pi_projects [col ] = pi_projects [col ].astype ("object" )
130+
131+ # Then fill NA values with empty strings
132+ pi_projects [col ] = pi_projects [col ].fillna ("" )
133+
134+ return pi_projects
135+
136+ def export (self ):
137+ def _create_html_invoice (temp_fd ):
138+ environment = Environment (loader = FileSystemLoader (TEMPLATE_DIR_PATH ))
139+ template = environment .get_template ("pi_invoice.html" )
140+ content = template .render (
141+ data = pi_dataframe ,
142+ )
143+ temp_fd .write (content )
144+ temp_fd .flush ()
145+
146+ def _create_pdf_invoice (temp_fd_name ):
147+ chrome_binary_location = os .environ .get (
148+ "CHROME_BIN_PATH" , "/usr/bin/chromium"
149+ )
150+ if not os .path .exists (chrome_binary_location ):
151+ sys .exit (
152+ f"Chrome binary does not exist at { chrome_binary_location } . Make sure the env var CHROME_BIN_PATH is set correctly and that Google Chrome is installed"
153+ )
154+
155+ invoice_pdf_path = (
156+ f"{ self .name } /{ pi_instituition } _{ pi } _{ self .invoice_month } .pdf"
157+ )
158+ subprocess .run (
159+ [
160+ chrome_binary_location ,
161+ "--headless" ,
162+ "--no-sandbox" ,
163+ f"--print-to-pdf={ invoice_pdf_path } " ,
164+ "--no-pdf-header-footer" ,
165+ f"file://{ temp_fd_name } " ,
166+ ],
167+ capture_output = True ,
168+ )
169+
170+ self ._filter_columns ()
171+
172+ # self.name is name of folder storing invoices
173+ os .makedirs (self .name , exist_ok = True )
174+
175+ for pi in self .pi_list :
176+ if pandas .isna (pi ):
177+ continue
178+
179+ pi_dataframe = self ._get_pi_dataframe (self .export_data , pi )
180+ pi_instituition = pi_dataframe [invoice .INSTITUTION_FIELD ].iat [0 ]
181+
182+ with tempfile .NamedTemporaryFile (mode = "w" , suffix = ".html" ) as temp_fd :
183+ _create_html_invoice (temp_fd )
184+ _create_pdf_invoice (temp_fd .name )
185+
186+ def export_s3 (self , s3_bucket ):
187+ def _export_s3_pi_invoice (pi_invoice ):
188+ pi_invoice_path = os .path .join (self .name , pi_invoice )
189+ striped_invoice_path = os .path .splitext (pi_invoice_path )[0 ]
190+ output_s3_path = f"Invoices/{ self .invoice_month } /{ striped_invoice_path } .pdf"
191+ output_s3_archive_path = f"Invoices/{ self .invoice_month } /Archive/{ striped_invoice_path } { util .get_iso8601_time ()} .pdf"
192+ s3_bucket .upload_file (pi_invoice_path , output_s3_path )
193+ s3_bucket .upload_file (pi_invoice_path , output_s3_archive_path )
194+
195+ for pi_invoice in os .listdir (self .name ):
196+ _export_s3_pi_invoice (pi_invoice )
0 commit comments