MITLibraries · ghukill · Feb 26, 2025 · Feb 25, 2025 · Feb 25, 2025
diff --git a/Pipfile b/Pipfile
@@ -7,7 +7,7 @@ name = "pypi"
 click = "*"
 sentry-sdk = "*"
 oracledb = "*"
-luigi = "*"
+luigi = "3.5.1"
 pandas = "*"
 pandas-stubs = "*"
 attrs = "*"

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/hrqb/base/__init__.py b/hrqb/base/__init__.py
@@ -9,11 +9,11 @@
 
 __all__ = [
     "HRQBLocalTarget",
-    "PandasPickleTarget",
-    "QuickbaseTableTarget",
     "HRQBPipelineTask",
     "HRQBTask",
+    "PandasPickleTarget",
     "PandasPickleTask",
-    "SQLQueryExtractTask",
+    "QuickbaseTableTarget",
     "QuickbaseUpsertTask",
+    "SQLQueryExtractTask",
 ]
diff --git a/hrqb/tasks/employee_appointments.py b/hrqb/tasks/employee_appointments.py
@@ -5,7 +5,6 @@
 
 from hrqb.base.task import PandasPickleTask, QuickbaseUpsertTask, SQLQueryExtractTask
 from hrqb.utils import md5_hash_from_values, normalize_dataframe_dates
-from hrqb.utils.quickbase import QBClient
 
 
 class ExtractDWEmployeeAppointments(SQLQueryExtractTask):
@@ -18,77 +17,16 @@ def sql_file(self) -> str:
         return "hrqb/tasks/sql/employee_appointments.sql"
 
 
-class ExtractQBLibHREmployeeAppointments(PandasPickleTask):
-    """Query Quickbase for data provided by Library HR about employee appointments."""
-
-    stage = luigi.Parameter("Extract")
-
-    def get_dataframe(self) -> pd.DataFrame:  # pragma: nocover
-        qbclient = QBClient()
-        return qbclient.get_table_as_df(
-            qbclient.get_table_id("LibHR Employee Appointments")
-        )
-
-
-class ExtractQBDepartments(PandasPickleTask):
-    """Query Quickbase for Department data to merge with Library HR data."""
-
-    stage = luigi.Parameter("Extract")
-
-    def get_dataframe(self) -> pd.DataFrame:  # pragma: nocover
-        qbclient = QBClient()
-        return qbclient.get_table_as_df(qbclient.get_table_id("Departments"))
-
-
 class TransformEmployeeAppointments(PandasPickleTask):
-    """Combine Data Warehouse and Library HR data for Employee Appointments QB table."""
+    """Transform Data Warehouse data for Employee Appointments QB table."""
 
     stage = luigi.Parameter("Transform")
 
     def requires(self) -> list[luigi.Task]:  # pragma: nocover
-        return [
-            ExtractDWEmployeeAppointments(pipeline=self.pipeline),
-            ExtractQBLibHREmployeeAppointments(pipeline=self.pipeline),
-            ExtractQBDepartments(pipeline=self.pipeline),
-        ]
+        return [ExtractDWEmployeeAppointments(pipeline=self.pipeline)]
 
     def get_dataframe(self) -> pd.DataFrame:
-        dw_emp_appts_df = self.named_inputs["ExtractDWEmployeeAppointments"].read()
-        libhr_df = self.named_inputs["ExtractQBLibHREmployeeAppointments"].read()
-        depts_df = self.named_inputs["ExtractQBDepartments"].read()
-
-        # filter libhr data to active appointments, with position IDs
-        libhr_df = libhr_df[(libhr_df["Active"]) & ~(libhr_df["Position ID"].isna())]
-
-        # normalize position id to string and pad zeros
-        libhr_df["Position ID"] = libhr_df["Position ID"].apply(
-            lambda x: str(int(x)).zfill(8)
-        )
-
-        # merge data warehouse data with libhr data to create new employee appointments df
-        emp_appts_df = dw_emp_appts_df.merge(
-            libhr_df[
-                [
-                    "Related Employee MIT ID",
-                    "Position ID",
-                    "Related Supervisor MIT ID",
-                    "HC ID",
-                    "Related Department ID",
-                    "Cost Object",
-                ]
-            ],
-            how="left",
-            left_on=["position_id", "mit_id"],
-            right_on=["Position ID", "Related Employee MIT ID"],
-        )
-
-        # merge on departments to get directorates
-        emp_appts_df = emp_appts_df.merge(
-            depts_df[["Record ID#", "Directorate"]],
-            how="left",
-            left_on="Related Department ID",
-            right_on="Record ID#",
-        )
+        emp_appts_df = self.single_input_dataframe
 
         emp_appts_df = normalize_dataframe_dates(
             emp_appts_df,
@@ -110,20 +48,15 @@ def get_dataframe(self) -> pd.DataFrame:
 
         fields = {
             "mit_id": "MIT ID",
-            "HC ID": "HC ID",
             "employee_type": "Related Employee Type",
             "appt_begin_date": "Begin Date",
             "appt_end_date": "End Date",
-            "Directorate": "Related Directorate",
-            "Related Department ID": "Related Department ID",
-            "Related Supervisor MIT ID": "Supervisor",
             "job_title_long": "Related Job Title",
             "position_title_long": "Related Position Title",
             "job_family": "Job Family",
             "job_subfamily": "Job Subfamily",
             "job_track": "Job Track",
             "position_id": "Position ID",
-            "Cost Object": "Cost Object",
             "exempt": "Exempt / NE",
             "union_name": "Union Name",
             "term_or_perm": "Term or Permanent",

diff --git a/hrqb/tasks/libhr_employee_appointments.py b/hrqb/tasks/libhr_employee_appointments.py
diff --git a/hrqb/tasks/pipelines.py b/hrqb/tasks/pipelines.py
@@ -24,31 +24,3 @@ def requires(self) -> Iterator[luigi.Task]:  # pragma: no cover
         yield LoadEmployeeLeave(pipeline=self.pipeline_name)
         yield LoadPerformanceReviews(pipeline=self.pipeline_name)
         yield LoadEmployeeLeaveBalances(pipeline=self.pipeline_name)
-
-
-class UpdateLibHRData(HRQBPipelineTask):
-    """Pipeline to load Library HR employee appointment data from static CSV file.
-
-    This pipeline loads the table 'LibHR Employee Appointments', which contains
-    information known only by Library HR, that we cannot get from the data warehouse.
-
-    This Quickbase table is used by the 'Employee Appointments' table to fill in gaps from
-    warehouse data alone.  This pipeline is useful for initial loading and bulk changes,
-    but this table is primarily managed directly in Quickbase by HR staff.
-
-    This pipeline requires a 'csv_filepath' parameter is defined when running, e.g.:
-        pipenv run hrqb --verbose \
-        pipeline -p UpdateLibHRData \
-        --pipeline-parameters=csv_filepath=<PATH/TO/CSV> \
-        run
-    """
-
-    csv_filepath = luigi.Parameter()
-
-    def requires(self) -> Iterator[luigi.Task]:  # pragma: no cover
-        from hrqb.tasks.libhr_employee_appointments import LoadLibHREmployeeAppointments
-
-        yield LoadLibHREmployeeAppointments(
-            pipeline=self.pipeline_name,
-            csv_filepath=self.csv_filepath,
-        )
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,9 +30,7 @@ show-fixes = true
 select = ["ALL", "PT"]
 ignore = [
     # default
-    "ANN101", 
-    "ANN102", 
-    "COM812", 
+    "COM812",
     "D107",
     "N812",
     "PTH",