Add instance pool id to make_job to speed up testing

JCZuurmond · JCZuurmond · commit eb166cf0b93f · 2024-10-04T16:32:20.000+02:00
diff --git a/src/databricks/labs/pytester/fixtures/compute.py b/src/databricks/labs/pytester/fixtures/compute.py
@@ -180,6 +180,7 @@ def make_job(
     * [DEPRECATED: Use `path` instead] `notebook_path` (str, optional): The path to the notebook. If not provided, a random notebook will be created.
     * `content` (str | bytes, optional): The content of the notebook or file used in the job. If not provided, default content of `make_notebook` will be used.
     * `task_type` (type[NotebookTask] | type[SparkPythonTask], optional): The type of task. If not provides, `type[NotebookTask]` will be used.
+    * `instance_pool_id` (str, optional): The instance pool id to add to the job cluster. If not provided, no instance pool will be used.
     * `spark_conf` (dict, optional): The Spark configuration of the job. If not provided, Spark configuration is not explicitly set.
     * `libraries` (list, optional): The list of libraries to install on the job.
     * `tags` (list[str], optional): A list of job tags. If not provided, no additional tags will be set on the job.
@@ -202,6 +203,7 @@ def create(
         content: str | bytes | None = None,
         task_type: type[NotebookTask] | type[SparkPythonTask] = NotebookTask,
         spark_conf: dict[str, str] | None = None,
+        instance_pool_id: str | None = None,
         libraries: list[Library] | None = None,
         tags: dict[str, str] | None = None,
         tasks: list[Task] | None = None,
@@ -223,13 +225,17 @@ def create(
         tags = tags or {}
         tags["RemoveAfter"] = tags.get("RemoveAfter", watchdog_remove_after)
         if not tasks:
+            node_type_id = None
+            if instance_pool_id is None:
+                node_type_id = ws.clusters.select_node_type(local_disk=True, min_memory_gb=16)
             task = Task(
                 task_key=make_random(4),
                 description=make_random(4),
                 new_cluster=ClusterSpec(
                     num_workers=1,
-                    node_type_id=ws.clusters.select_node_type(local_disk=True, min_memory_gb=16),
+                    node_type_id=node_type_id,
                     spark_version=ws.clusters.select_spark_version(latest=True),
+                    instance_pool_id=instance_pool_id,
                     spark_conf=spark_conf,
                 ),
                 libraries=libraries,
diff --git a/tests/integration/fixtures/test_compute.py b/tests/integration/fixtures/test_compute.py
@@ -22,16 +22,16 @@ def test_instance_pool(make_instance_pool):
     logger.info(f"created {make_instance_pool()}")
 
 
-def test_job(ws: WorkspaceClient, make_job) -> None:
-    job = make_job()
+def test_job(ws: WorkspaceClient, make_job, env_or_skip) -> None:
+    job = make_job(instance_pool_id=env_or_skip("TEST_INSTANCE_POOL_ID"))
     run = ws.jobs.run_now(job.job_id)
     ws.jobs.wait_get_run_job_terminated_or_skipped(run_id=run.run_id)
     run_state = ws.jobs.get_run(run_id=run.run_id).state
     assert run_state is not None and run_state.result_state == RunResultState.SUCCESS
 
 
-def test_job_with_spark_python_task(ws: WorkspaceClient, make_job) -> None:
-    job = make_job(task_type=SparkPythonTask)
+def test_job_with_spark_python_task(ws: WorkspaceClient, make_job, env_or_skip) -> None:
+    job = make_job(task_type=SparkPythonTask, instance_pool_id=env_or_skip("TEST_INSTANCE_POOL_ID"))
     run = ws.jobs.run_now(job.job_id)
     ws.jobs.wait_get_run_job_terminated_or_skipped(run_id=run.run_id)
     run_state = ws.jobs.get_run(run_id=run.run_id).state
diff --git a/tests/unit/fixtures/test_compute.py b/tests/unit/fixtures/test_compute.py
@@ -77,6 +77,13 @@ def test_make_job_with_spark_python_task() -> None:
     assert workspace_path.read_text() == "print(3)"
 
 
+def test_make_job_with_instance_pool_id() -> None:
+    _, job = call_stateful(make_job, instance_pool_id="test")
+    tasks = job.settings.tasks
+    assert len(tasks) == 1
+    assert tasks[0].new_cluster.instance_pool_id == "test"
+
+
 def test_make_job_with_spark_conf() -> None:
     _, job = call_stateful(make_job, spark_conf={"value": "test"})
     tasks = job.settings.tasks