Skip to content

Commit 00835b4

Browse files
Merge branch 'main' of https://github.com/CCI-MOC/invoicing into nonbillable-pydantic-models
2 parents 8806ba5 + 095c2c1 commit 00835b4

38 files changed

Lines changed: 635 additions & 236 deletions

Dockerfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ COPY requirements.txt .
88
RUN pip install -r requirements.txt
99

1010
COPY tools/ tools/
11-
COPY process_report/process_report.py process_report/
12-
COPY process_report/institute_list.yaml process_report/
11+
COPY process_report process_report
1312

14-
CMD ["tools/clone_nonbillables_and_process_invoice.sh"]
13+
CMD ["tools/setup_and_process.sh"]

k8s/base/invoice-daily-check.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: invoice-daily-check
5+
spec:
6+
schedule: "0 7 * * *"
7+
jobTemplate:
8+
spec:
9+
template:
10+
spec:
11+
containers:
12+
- name: invoice-daily-check
13+
image: ghcr.io/cci-moc/process-csv-report:latest
14+
env:
15+
- name: GH_NONBILLABLE_DEPLOYKEY
16+
valueFrom:
17+
secretKeyRef:
18+
name: gh-nonbillables
19+
key: ssh-privatekey
20+
- name: S3_KEY_ID
21+
valueFrom:
22+
secretKeyRef:
23+
name: nerc-invoices-s3-bucket
24+
key: s3-key-id
25+
- name: S3_APP_KEY
26+
valueFrom:
27+
secretKeyRef:
28+
name: nerc-invoices-s3-bucket
29+
key: s3-app-key
30+
- name: KEYCLOAK_CLIENT_ID
31+
valueFrom:
32+
secretKeyRef:
33+
name: coldfront-api
34+
key: keycloak-client-id
35+
- name: KEYCLOAK_CLIENT_SECRET
36+
valueFrom:
37+
secretKeyRef:
38+
name: coldfront-api
39+
key: keycloak-client-secret
40+
- name: UPLOAD_TO_S3
41+
value: "False"
42+
restartPolicy: OnFailure

k8s/base/invoice-processing.yaml

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,29 @@ spec:
1414
env:
1515
- name: GH_NONBILLABLE_DEPLOYKEY
1616
valueFrom:
17-
secretKeyRef:
18-
name: gh-nonbillables
19-
key: ssh-deploykey
17+
secretKeyRef:
18+
name: gh-nonbillables
19+
key: ssh-privatekey
2020
- name: S3_KEY_ID
2121
valueFrom:
22-
secretKeyRef:
23-
name: nerc-invoices-s3-bucket
24-
key: s3-key-id
22+
secretKeyRef:
23+
name: nerc-invoices-s3-bucket
24+
key: s3-key-id
2525
- name: S3_APP_KEY
2626
valueFrom:
27-
secretKeyRef:
28-
name: nerc-invoices-s3-bucket
29-
key: s3-app-key
27+
secretKeyRef:
28+
name: nerc-invoices-s3-bucket
29+
key: s3-app-key
30+
- name: KEYCLOAK_CLIENT_ID
31+
valueFrom:
32+
secretKeyRef:
33+
name: coldfront-api
34+
key: keycloak-client-id
35+
- name: KEYCLOAK_CLIENT_SECRET
36+
valueFrom:
37+
secretKeyRef:
38+
name: coldfront-api
39+
key: keycloak-client-secret
40+
- name: UPLOAD_TO_S3
41+
value: "True"
3042
restartPolicy: OnFailure

k8s/base/kustomization.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
resources:
22
- invoice-processing.yaml
3+
- invoice-daily-check.yaml

k8s/overlay/kustomization.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
resources:
22
- ../base
3-
- secret-b2-old-pi.yaml
3+
- secret-nerc-invoices-s3-bucket.yaml
44
- secret-gh-nonbillables.yaml
5+
- secret-coldfront-api.yaml
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: v1
2+
kind: Secret
3+
metadata:
4+
name: coldfront-api
5+
type: Opaque
6+
data:
7+
keycloak-client-id: test
8+
keycloak-client-secret: test
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ metadata:
44
name: gh-nonbillables
55
type: kubernetes.io/ssh-auth
66
data:
7-
ssh-deploykey: test
7+
ssh-privatekey: test
File renamed without changes.

process_report/invoices/invoice.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,32 @@
11
from dataclasses import dataclass
2+
from typing import Any, Callable
3+
from decimal import Decimal
24
import pandas
5+
import pyarrow
6+
import logging
37

48
import process_report.util as util
59

610

11+
logger = logging.getLogger(__name__)
12+
13+
14+
@dataclass
15+
class InvoiceColumn:
16+
name: str
17+
dtype: Any
18+
default_value: Any | None = None
19+
default_initializer: Callable[[pandas.DataFrame], pandas.Series] | None = None
20+
21+
22+
# Field type definitions
23+
BALANCE_FIELD_TYPE = pandas.ArrowDtype(pyarrow.decimal128(21, 2))
24+
RATE_FIELD_TYPE = pandas.ArrowDtype(pyarrow.decimal128(21, 13))
25+
INTEGER_FIELD_TYPE = pandas.ArrowDtype(pyarrow.int64())
26+
STRING_FIELD_TYPE = pandas.StringDtype()
27+
BOOL_FIELD_TYPE = pandas.BooleanDtype()
28+
29+
730
### PI file field names
831
PI_PI_FIELD = "PI"
932
PI_FIRST_MONTH = "First Invoice Month"
@@ -65,18 +88,77 @@
6588
IS_COURSE_FIELD = "Is Course"
6689
###
6790

91+
### Initialized Column objects
92+
INVOICE_DATE_COLUMN = InvoiceColumn(name=INVOICE_DATE_FIELD, dtype=STRING_FIELD_TYPE)
93+
PROJECT_COLUMN = InvoiceColumn(name=PROJECT_FIELD, dtype=STRING_FIELD_TYPE)
94+
PROJECT_ID_COLUMN = InvoiceColumn(name=PROJECT_ID_FIELD, dtype=STRING_FIELD_TYPE)
95+
PI_COLUMN = InvoiceColumn(name=PI_FIELD, dtype=STRING_FIELD_TYPE)
96+
INVOICE_EMAIL_COLUMN = InvoiceColumn(name=INVOICE_EMAIL_FIELD, dtype=STRING_FIELD_TYPE)
97+
INVOICE_ADDRESS_COLUMN = InvoiceColumn(
98+
name=INVOICE_ADDRESS_FIELD, dtype=STRING_FIELD_TYPE
99+
)
100+
INSTITUTION_COLUMN = InvoiceColumn(name=INSTITUTION_FIELD, dtype=STRING_FIELD_TYPE)
101+
INSTITUTION_ID_COLUMN = InvoiceColumn(
102+
name=INSTITUTION_ID_FIELD, dtype=STRING_FIELD_TYPE
103+
)
104+
GROUP_NAME_COLUMN = InvoiceColumn(name=GROUP_NAME_FIELD, dtype=STRING_FIELD_TYPE)
105+
GROUP_INSTITUTION_COLUMN = InvoiceColumn(
106+
name=GROUP_INSTITUTION_FIELD, dtype=STRING_FIELD_TYPE
107+
)
108+
GROUP_BALANCE_COLUMN = InvoiceColumn(name=GROUP_BALANCE_FIELD, dtype=BALANCE_FIELD_TYPE)
109+
GROUP_BALANCE_USED_COLUMN = InvoiceColumn(
110+
name=GROUP_BALANCE_USED_FIELD, dtype=BALANCE_FIELD_TYPE
111+
)
112+
SU_HOURS_COLUMN = InvoiceColumn(name=SU_HOURS_FIELD, dtype=INTEGER_FIELD_TYPE)
113+
SU_TYPE_COLUMN = InvoiceColumn(name=SU_TYPE_FIELD, dtype=STRING_FIELD_TYPE)
114+
SU_CHARGE_COLUMN = InvoiceColumn(name=SU_CHARGE_FIELD, dtype=BALANCE_FIELD_TYPE)
115+
LENOVO_CHARGE_COLUMN = InvoiceColumn(name=LENOVO_CHARGE_FIELD, dtype=BALANCE_FIELD_TYPE)
116+
RATE_COLUMN = InvoiceColumn(
117+
name=RATE_FIELD, dtype=RATE_FIELD_TYPE
118+
) # Using decimal to suppress scientific notation in export
119+
COST_COLUMN = InvoiceColumn(name=COST_FIELD, dtype=BALANCE_FIELD_TYPE)
120+
CREDIT_COLUMN = InvoiceColumn(name=CREDIT_FIELD, dtype=BALANCE_FIELD_TYPE)
121+
CREDIT_CODE_COLUMN = InvoiceColumn(name=CREDIT_CODE_FIELD, dtype=STRING_FIELD_TYPE)
122+
SUBSIDY_COLUMN = InvoiceColumn(
123+
name=SUBSIDY_FIELD, dtype=BALANCE_FIELD_TYPE, default_value=Decimal(0)
124+
)
125+
BALANCE_COLUMN = InvoiceColumn(
126+
name=BALANCE_FIELD,
127+
dtype=BALANCE_FIELD_TYPE,
128+
default_initializer=lambda df: df[COST_FIELD],
129+
)
130+
PI_BALANCE_COLUMN = InvoiceColumn(
131+
name=PI_BALANCE_FIELD,
132+
dtype=BALANCE_FIELD_TYPE,
133+
default_initializer=lambda df: df[COST_FIELD],
134+
)
135+
136+
# Internally used fields
137+
IS_BILLABLE_COLUMN = InvoiceColumn(name=IS_BILLABLE_FIELD, dtype=BOOL_FIELD_TYPE)
138+
MISSING_PI_COLUMN = InvoiceColumn(name=MISSING_PI_FIELD, dtype=BOOL_FIELD_TYPE)
139+
PROJECT_NAME_COLUMN = InvoiceColumn(name=PROJECT_NAME_FIELD, dtype=STRING_FIELD_TYPE)
140+
GROUP_MANAGED_COLUMN = InvoiceColumn(name=GROUP_MANAGED_FIELD, dtype=BOOL_FIELD_TYPE)
141+
CLUSTER_NAME_COLUMN = InvoiceColumn(name=CLUSTER_NAME_FIELD, dtype=STRING_FIELD_TYPE)
142+
IS_COURSE_COLUMN = InvoiceColumn(
143+
name=IS_COURSE_FIELD, dtype=BOOL_FIELD_TYPE, default_value=False
144+
)
145+
###
146+
68147

69148
@dataclass
70149
class Invoice:
71150
export_columns_list = list()
72151
exported_columns_map = dict()
152+
initializes_columns = tuple()
153+
operates_on_columns = tuple()
73154

74155
invoice_month: str
75156
data: pandas.DataFrame
76157
name: str = ""
77158
export_data = None
78159

79160
def process(self):
161+
self._init_columns()
80162
self._prepare()
81163
self._process()
82164
self._prepare_export()
@@ -93,6 +175,24 @@ def output_s3_key(self) -> str:
93175
def output_s3_archive_key(self):
94176
return f"Invoices/{self.invoice_month}/Archive/{self.name} {self.invoice_month} {util.get_iso8601_time()}.csv"
95177

178+
def _init_columns(self):
179+
"""Initializes columns specified in `initializes_columns` and cast them to appropriate types
180+
181+
If column already exists, only do casting
182+
If no default value is given, column initialized to None
183+
"""
184+
for field in self.initializes_columns:
185+
if field.name not in self.data.columns:
186+
field_default = field.default_value
187+
if field.default_initializer:
188+
field_default = field.default_initializer(self.data)
189+
self.data[field.name] = field_default
190+
elif self.data.dtypes[field.name] != field.dtype:
191+
logger.warning(
192+
f"Column {field.name} has dtype {self.data.dtypes[field.name]} instead of expected {field.dtype}."
193+
)
194+
self.data = self.data.astype({field.name: field.dtype})
195+
96196
def _prepare(self):
97197
"""Prepares the data for processing.
98198

process_report/process_report.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import os
44

55
import pandas
6-
import pyarrow
76

87
from process_report.settings import invoice_settings
98
from process_report.loader import loader
@@ -22,6 +21,7 @@
2221
)
2322
from process_report.processors import (
2423
coldfront_fetch_processor,
24+
validate_input_column_processor,
2525
validate_pi_alias_processor,
2626
add_institution_processor,
2727
lenovo_processor,
@@ -33,6 +33,20 @@
3333
validate_cluster_name_processor,
3434
)
3535

36+
PROCESSING_ORDER = [
37+
validate_input_column_processor.ValidateInputColumnsProcessor,
38+
validate_cluster_name_processor.ValidateClusterNameProcessor,
39+
coldfront_fetch_processor.ColdfrontFetchProcessor,
40+
validate_pi_alias_processor.ValidatePIAliasProcessor,
41+
add_institution_processor.AddInstitutionProcessor,
42+
lenovo_processor.LenovoProcessor,
43+
validate_billable_pi_processor.ValidateBillablePIsProcessor,
44+
pi_su_credit_processor.PISUCreditProcessor,
45+
new_pi_credit_processor.NewPICreditProcessor,
46+
bu_subsidy_processor.BUSubsidyProcessor,
47+
prepayment_processor.PrepaymentProcessor,
48+
]
49+
3650

3751
PI_S3_FILEPATH = "PIs/PI.csv"
3852
ALIAS_S3_FILEPATH = "PIs/alias.csv"
@@ -66,20 +80,7 @@ def main():
6680

6781
### Preliminary processing
6882
processed_data = process_merged_dataframe(
69-
invoice_month,
70-
merged_dataframe,
71-
[
72-
validate_cluster_name_processor.ValidateClusterNameProcessor,
73-
coldfront_fetch_processor.ColdfrontFetchProcessor,
74-
validate_pi_alias_processor.ValidatePIAliasProcessor,
75-
add_institution_processor.AddInstitutionProcessor,
76-
lenovo_processor.LenovoProcessor,
77-
validate_billable_pi_processor.ValidateBillablePIsProcessor,
78-
pi_su_credit_processor.PISUCreditProcessor,
79-
new_pi_credit_processor.NewPICreditProcessor,
80-
bu_subsidy_processor.BUSubsidyProcessor,
81-
prepayment_processor.PrepaymentProcessor,
82-
],
83+
invoice_month, merged_dataframe, PROCESSING_ORDER
8384
)
8485

8586
### Export invoices
@@ -109,8 +110,19 @@ def merge_csv(files):
109110
file,
110111
engine="pyarrow",
111112
dtype={
112-
invoice.COST_FIELD: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
113-
invoice.RATE_FIELD: str,
113+
invoice.INVOICE_DATE_COLUMN.name: invoice.INVOICE_DATE_COLUMN.dtype,
114+
invoice.PROJECT_COLUMN.name: invoice.PROJECT_COLUMN.dtype,
115+
invoice.PROJECT_ID_COLUMN.name: invoice.PROJECT_ID_COLUMN.dtype,
116+
invoice.PI_COLUMN.name: invoice.PI_COLUMN.dtype,
117+
invoice.CLUSTER_NAME_COLUMN.name: invoice.CLUSTER_NAME_COLUMN.dtype,
118+
invoice.INVOICE_EMAIL_COLUMN.name: invoice.INVOICE_EMAIL_COLUMN.dtype,
119+
invoice.INVOICE_ADDRESS_COLUMN.name: invoice.INVOICE_ADDRESS_COLUMN.dtype,
120+
invoice.INSTITUTION_COLUMN.name: invoice.INSTITUTION_COLUMN.dtype,
121+
invoice.INSTITUTION_ID_COLUMN.name: invoice.INSTITUTION_ID_COLUMN.dtype,
122+
invoice.SU_HOURS_COLUMN.name: invoice.SU_HOURS_COLUMN.dtype,
123+
invoice.SU_TYPE_COLUMN.name: invoice.SU_TYPE_COLUMN.dtype,
124+
invoice.RATE_COLUMN.name: invoice.RATE_COLUMN.dtype,
125+
invoice.COST_COLUMN.name: invoice.COST_COLUMN.dtype,
114126
},
115127
quotechar="|",
116128
)

0 commit comments

Comments
 (0)