Skip to content

Commit 5498f0a

Browse files
committed
Remove LibHR table management
Why these changes are being introduced: For reasons beyond the scope of this commit, it has been determined that having a table like "LibHR Employee Appointments" (LIBHR) distinct from the more widely used "Employee Appointments" (EA) is more problematic than it is helpful. As such, it has been decided that data formerly found in the LIBHR table will now be directly added and managed in the EA table. How this addresses that need: * Completely removes any tasks associated with the LIBHR table * Removes any copying of data to the EA table from the LIBHR table Side effects of this change: * LIBHR table is no longer a concern for this middleware Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/IN-1194
1 parent b59c20d commit 5498f0a

6 files changed

+3
-475
lines changed

hrqb/tasks/employee_appointments.py

+3-70
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from hrqb.base.task import PandasPickleTask, QuickbaseUpsertTask, SQLQueryExtractTask
77
from hrqb.utils import md5_hash_from_values, normalize_dataframe_dates
8-
from hrqb.utils.quickbase import QBClient
98

109

1110
class ExtractDWEmployeeAppointments(SQLQueryExtractTask):
@@ -18,77 +17,16 @@ def sql_file(self) -> str:
1817
return "hrqb/tasks/sql/employee_appointments.sql"
1918

2019

21-
class ExtractQBLibHREmployeeAppointments(PandasPickleTask):
22-
"""Query Quickbase for data provided by Library HR about employee appointments."""
23-
24-
stage = luigi.Parameter("Extract")
25-
26-
def get_dataframe(self) -> pd.DataFrame: # pragma: nocover
27-
qbclient = QBClient()
28-
return qbclient.get_table_as_df(
29-
qbclient.get_table_id("LibHR Employee Appointments")
30-
)
31-
32-
33-
class ExtractQBDepartments(PandasPickleTask):
34-
"""Query Quickbase for Department data to merge with Library HR data."""
35-
36-
stage = luigi.Parameter("Extract")
37-
38-
def get_dataframe(self) -> pd.DataFrame: # pragma: nocover
39-
qbclient = QBClient()
40-
return qbclient.get_table_as_df(qbclient.get_table_id("Departments"))
41-
42-
4320
class TransformEmployeeAppointments(PandasPickleTask):
44-
"""Combine Data Warehouse and Library HR data for Employee Appointments QB table."""
21+
"""Transform Data Warehouse data for Employee Appointments QB table."""
4522

4623
stage = luigi.Parameter("Transform")
4724

4825
def requires(self) -> list[luigi.Task]: # pragma: nocover
49-
return [
50-
ExtractDWEmployeeAppointments(pipeline=self.pipeline),
51-
ExtractQBLibHREmployeeAppointments(pipeline=self.pipeline),
52-
ExtractQBDepartments(pipeline=self.pipeline),
53-
]
26+
return [ExtractDWEmployeeAppointments(pipeline=self.pipeline)]
5427

5528
def get_dataframe(self) -> pd.DataFrame:
56-
dw_emp_appts_df = self.named_inputs["ExtractDWEmployeeAppointments"].read()
57-
libhr_df = self.named_inputs["ExtractQBLibHREmployeeAppointments"].read()
58-
depts_df = self.named_inputs["ExtractQBDepartments"].read()
59-
60-
# filter libhr data to active appointments, with position IDs
61-
libhr_df = libhr_df[(libhr_df["Active"]) & ~(libhr_df["Position ID"].isna())]
62-
63-
# normalize position id to string and pad zeros
64-
libhr_df["Position ID"] = libhr_df["Position ID"].apply(
65-
lambda x: str(int(x)).zfill(8)
66-
)
67-
68-
# merge data warehouse data with libhr data to create new employee appointments df
69-
emp_appts_df = dw_emp_appts_df.merge(
70-
libhr_df[
71-
[
72-
"Related Employee MIT ID",
73-
"Position ID",
74-
"Related Supervisor MIT ID",
75-
"HC ID",
76-
"Related Department ID",
77-
"Cost Object",
78-
]
79-
],
80-
how="left",
81-
left_on=["position_id", "mit_id"],
82-
right_on=["Position ID", "Related Employee MIT ID"],
83-
)
84-
85-
# merge on departments to get directorates
86-
emp_appts_df = emp_appts_df.merge(
87-
depts_df[["Record ID#", "Directorate"]],
88-
how="left",
89-
left_on="Related Department ID",
90-
right_on="Record ID#",
91-
)
29+
emp_appts_df = self.single_input_dataframe
9230

9331
emp_appts_df = normalize_dataframe_dates(
9432
emp_appts_df,
@@ -110,20 +48,15 @@ def get_dataframe(self) -> pd.DataFrame:
11048

11149
fields = {
11250
"mit_id": "MIT ID",
113-
"HC ID": "HC ID",
11451
"employee_type": "Related Employee Type",
11552
"appt_begin_date": "Begin Date",
11653
"appt_end_date": "End Date",
117-
"Directorate": "Related Directorate",
118-
"Related Department ID": "Related Department ID",
119-
"Related Supervisor MIT ID": "Supervisor",
12054
"job_title_long": "Related Job Title",
12155
"position_title_long": "Related Position Title",
12256
"job_family": "Job Family",
12357
"job_subfamily": "Job Subfamily",
12458
"job_track": "Job Track",
12559
"position_id": "Position ID",
126-
"Cost Object": "Cost Object",
12760
"exempt": "Exempt / NE",
12861
"union_name": "Union Name",
12962
"term_or_perm": "Term or Permanent",

hrqb/tasks/libhr_employee_appointments.py

-177
This file was deleted.

hrqb/tasks/pipelines.py

-28
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,3 @@ def requires(self) -> Iterator[luigi.Task]: # pragma: no cover
2424
yield LoadEmployeeLeave(pipeline=self.pipeline_name)
2525
yield LoadPerformanceReviews(pipeline=self.pipeline_name)
2626
yield LoadEmployeeLeaveBalances(pipeline=self.pipeline_name)
27-
28-
29-
class UpdateLibHRData(HRQBPipelineTask):
30-
"""Pipeline to load Library HR employee appointment data from static CSV file.
31-
32-
This pipeline loads the table 'LibHR Employee Appointments', which contains
33-
information known only by Library HR, that we cannot get from the data warehouse.
34-
35-
This Quickbase table is used by the 'Employee Appointments' table to fill in gaps from
36-
warehouse data alone. This pipeline is useful for initial loading and bulk changes,
37-
but this table is primarily managed directly in Quickbase by HR staff.
38-
39-
This pipeline requires a 'csv_filepath' parameter is defined when running, e.g.:
40-
pipenv run hrqb --verbose \
41-
pipeline -p UpdateLibHRData \
42-
--pipeline-parameters=csv_filepath=<PATH/TO/CSV> \
43-
run
44-
"""
45-
46-
csv_filepath = luigi.Parameter()
47-
48-
def requires(self) -> Iterator[luigi.Task]: # pragma: no cover
49-
from hrqb.tasks.libhr_employee_appointments import LoadLibHREmployeeAppointments
50-
51-
yield LoadLibHREmployeeAppointments(
52-
pipeline=self.pipeline_name,
53-
csv_filepath=self.csv_filepath,
54-
)

0 commit comments

Comments
 (0)