Skip to content

Commit 009c1eb

Browse files
authored
Merge pull request #203 from MITLibraries/IN-1194-merge-libhr-tables
IN 1194- merge employee appointment tables
2 parents 159084c + 5498f0a commit 009c1eb

11 files changed

+924
-1314
lines changed

Pipfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ name = "pypi"
77
click = "*"
88
sentry-sdk = "*"
99
oracledb = "*"
10-
luigi = "*"
10+
luigi = "3.5.1"
1111
pandas = "*"
1212
pandas-stubs = "*"
1313
attrs = "*"

Pipfile.lock

+913-829
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

hrqb/base/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99

1010
__all__ = [
1111
"HRQBLocalTarget",
12-
"PandasPickleTarget",
13-
"QuickbaseTableTarget",
1412
"HRQBPipelineTask",
1513
"HRQBTask",
14+
"PandasPickleTarget",
1615
"PandasPickleTask",
17-
"SQLQueryExtractTask",
16+
"QuickbaseTableTarget",
1817
"QuickbaseUpsertTask",
18+
"SQLQueryExtractTask",
1919
]

hrqb/tasks/employee_appointments.py

+3-70
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from hrqb.base.task import PandasPickleTask, QuickbaseUpsertTask, SQLQueryExtractTask
77
from hrqb.utils import md5_hash_from_values, normalize_dataframe_dates
8-
from hrqb.utils.quickbase import QBClient
98

109

1110
class ExtractDWEmployeeAppointments(SQLQueryExtractTask):
@@ -18,77 +17,16 @@ def sql_file(self) -> str:
1817
return "hrqb/tasks/sql/employee_appointments.sql"
1918

2019

21-
class ExtractQBLibHREmployeeAppointments(PandasPickleTask):
22-
"""Query Quickbase for data provided by Library HR about employee appointments."""
23-
24-
stage = luigi.Parameter("Extract")
25-
26-
def get_dataframe(self) -> pd.DataFrame: # pragma: nocover
27-
qbclient = QBClient()
28-
return qbclient.get_table_as_df(
29-
qbclient.get_table_id("LibHR Employee Appointments")
30-
)
31-
32-
33-
class ExtractQBDepartments(PandasPickleTask):
34-
"""Query Quickbase for Department data to merge with Library HR data."""
35-
36-
stage = luigi.Parameter("Extract")
37-
38-
def get_dataframe(self) -> pd.DataFrame: # pragma: nocover
39-
qbclient = QBClient()
40-
return qbclient.get_table_as_df(qbclient.get_table_id("Departments"))
41-
42-
4320
class TransformEmployeeAppointments(PandasPickleTask):
44-
"""Combine Data Warehouse and Library HR data for Employee Appointments QB table."""
21+
"""Transform Data Warehouse data for Employee Appointments QB table."""
4522

4623
stage = luigi.Parameter("Transform")
4724

4825
def requires(self) -> list[luigi.Task]: # pragma: nocover
49-
return [
50-
ExtractDWEmployeeAppointments(pipeline=self.pipeline),
51-
ExtractQBLibHREmployeeAppointments(pipeline=self.pipeline),
52-
ExtractQBDepartments(pipeline=self.pipeline),
53-
]
26+
return [ExtractDWEmployeeAppointments(pipeline=self.pipeline)]
5427

5528
def get_dataframe(self) -> pd.DataFrame:
56-
dw_emp_appts_df = self.named_inputs["ExtractDWEmployeeAppointments"].read()
57-
libhr_df = self.named_inputs["ExtractQBLibHREmployeeAppointments"].read()
58-
depts_df = self.named_inputs["ExtractQBDepartments"].read()
59-
60-
# filter libhr data to active appointments, with position IDs
61-
libhr_df = libhr_df[(libhr_df["Active"]) & ~(libhr_df["Position ID"].isna())]
62-
63-
# normalize position id to string and pad zeros
64-
libhr_df["Position ID"] = libhr_df["Position ID"].apply(
65-
lambda x: str(int(x)).zfill(8)
66-
)
67-
68-
# merge data warehouse data with libhr data to create new employee appointments df
69-
emp_appts_df = dw_emp_appts_df.merge(
70-
libhr_df[
71-
[
72-
"Related Employee MIT ID",
73-
"Position ID",
74-
"Related Supervisor MIT ID",
75-
"HC ID",
76-
"Related Department ID",
77-
"Cost Object",
78-
]
79-
],
80-
how="left",
81-
left_on=["position_id", "mit_id"],
82-
right_on=["Position ID", "Related Employee MIT ID"],
83-
)
84-
85-
# merge on departments to get directorates
86-
emp_appts_df = emp_appts_df.merge(
87-
depts_df[["Record ID#", "Directorate"]],
88-
how="left",
89-
left_on="Related Department ID",
90-
right_on="Record ID#",
91-
)
29+
emp_appts_df = self.single_input_dataframe
9230

9331
emp_appts_df = normalize_dataframe_dates(
9432
emp_appts_df,
@@ -110,20 +48,15 @@ def get_dataframe(self) -> pd.DataFrame:
11048

11149
fields = {
11250
"mit_id": "MIT ID",
113-
"HC ID": "HC ID",
11451
"employee_type": "Related Employee Type",
11552
"appt_begin_date": "Begin Date",
11653
"appt_end_date": "End Date",
117-
"Directorate": "Related Directorate",
118-
"Related Department ID": "Related Department ID",
119-
"Related Supervisor MIT ID": "Supervisor",
12054
"job_title_long": "Related Job Title",
12155
"position_title_long": "Related Position Title",
12256
"job_family": "Job Family",
12357
"job_subfamily": "Job Subfamily",
12458
"job_track": "Job Track",
12559
"position_id": "Position ID",
126-
"Cost Object": "Cost Object",
12760
"exempt": "Exempt / NE",
12861
"union_name": "Union Name",
12962
"term_or_perm": "Term or Permanent",

hrqb/tasks/libhr_employee_appointments.py

-177
This file was deleted.

hrqb/tasks/pipelines.py

-28
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,3 @@ def requires(self) -> Iterator[luigi.Task]: # pragma: no cover
2424
yield LoadEmployeeLeave(pipeline=self.pipeline_name)
2525
yield LoadPerformanceReviews(pipeline=self.pipeline_name)
2626
yield LoadEmployeeLeaveBalances(pipeline=self.pipeline_name)
27-
28-
29-
class UpdateLibHRData(HRQBPipelineTask):
30-
"""Pipeline to load Library HR employee appointment data from static CSV file.
31-
32-
This pipeline loads the table 'LibHR Employee Appointments', which contains
33-
information known only by Library HR, that we cannot get from the data warehouse.
34-
35-
This Quickbase table is used by the 'Employee Appointments' table to fill in gaps from
36-
warehouse data alone. This pipeline is useful for initial loading and bulk changes,
37-
but this table is primarily managed directly in Quickbase by HR staff.
38-
39-
This pipeline requires a 'csv_filepath' parameter is defined when running, e.g.:
40-
pipenv run hrqb --verbose \
41-
pipeline -p UpdateLibHRData \
42-
--pipeline-parameters=csv_filepath=<PATH/TO/CSV> \
43-
run
44-
"""
45-
46-
csv_filepath = luigi.Parameter()
47-
48-
def requires(self) -> Iterator[luigi.Task]: # pragma: no cover
49-
from hrqb.tasks.libhr_employee_appointments import LoadLibHREmployeeAppointments
50-
51-
yield LoadLibHREmployeeAppointments(
52-
pipeline=self.pipeline_name,
53-
csv_filepath=self.csv_filepath,
54-
)

pyproject.toml

+1-3
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,7 @@ show-fixes = true
3030
select = ["ALL", "PT"]
3131
ignore = [
3232
# default
33-
"ANN101",
34-
"ANN102",
35-
"COM812",
33+
"COM812",
3634
"D107",
3735
"N812",
3836
"PTH",

0 commit comments

Comments
 (0)