moved fixture to respective module

mwojtyczka · mwojtyczka · commit 3080a0760a51 · 2025-01-30T17:34:06.000+01:00
diff --git a/README.md b/README.md
@@ -376,7 +376,10 @@ See also [`log_account_link`](#log_account_link-fixture), [`make_acc_group`](#ma
 Get Databricks Connect Spark session. Requires `databricks-connect` package to be installed.
 
 To enable serverless set the local environment variable `DATABRICKS_SERVERLESS_COMPUTE_ID` to `"auto"`.
-If this environment variable is set, Databricks Connect ignores the cluster id.
+If this environment variable is set, Databricks Connect ignores the cluster_id.
+If `DATABRICKS_SERVERLESS_COMPUTE_ID` is set to a specific serverless cluster ID, that cluster will be used instead.
+However, this is not recommended, as serverless clusters are ephemeral by design.
+See more details here: https://docs.databricks.com/en/dev-tools/databricks-connect/cluster-config.html#configure-a-connection-to-serverless-compute
 
 Usage:
 ```python
diff --git a/src/databricks/labs/pytester/fixtures/connect.py b/src/databricks/labs/pytester/fixtures/connect.py
@@ -13,6 +13,9 @@ def spark(ws: WorkspaceClient):
 
     To enable serverless set the local environment variable `DATABRICKS_SERVERLESS_COMPUTE_ID` to `"auto"`.
     If this environment variable is set, Databricks Connect ignores the cluster_id.
+    If `DATABRICKS_SERVERLESS_COMPUTE_ID` is set to a specific serverless cluster ID, that cluster will be used instead.
+    However, this is not recommended, as serverless clusters are ephemeral by design.
+    See more details here: https://docs.databricks.com/en/dev-tools/databricks-connect/cluster-config.html#configure-a-connection-to-serverless-compute
 
     Usage:
     ```python
@@ -21,21 +24,32 @@ def test_databricks_connect(spark):
         assert rows[0][0] == 1
     ```
     """
-    if not ws.config.serverless_compute_id:
-        if not ws.config.cluster_id:
-            skip("No cluster_id found in the environment")
-        ws.clusters.ensure_cluster_is_running(ws.config.cluster_id)
+    cluster_id = ws.config.cluster_id
+    serverless_cluster_id = ws.config.serverless_compute_id
+    print(f"cluster id is: {serverless_cluster_id}")
+
+    if not serverless_cluster_id:
+        ensure_cluster_is_running(cluster_id, ws)
+
+    if serverless_cluster_id and serverless_cluster_id != "auto":
+        ensure_cluster_is_running(serverless_cluster_id, ws)
 
     try:
         # pylint: disable-next=import-outside-toplevel
         from databricks.connect import (  # type: ignore[import-untyped]
             DatabricksSession,
         )
 
-        if ws.config.serverless_compute_id:
+        if serverless_cluster_id:
             logging.debug("Using serverless compute")
             return DatabricksSession.builder.serverless(True).getOrCreate()
         return DatabricksSession.builder.sdkConfig(ws.config).getOrCreate()
     except ImportError:
         skip("Please run `pip install databricks-connect`")
         return None
+
+
+def ensure_cluster_is_running(cluster_id, ws):
+    if not cluster_id:
+        skip("No cluster_id found in the environment")
+    ws.clusters.ensure_cluster_is_running(cluster_id)
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
@@ -18,6 +18,3 @@ def debug_env_name():
 @fixture
 def product_info():
     return 'pytester', __version__
-
-
-
diff --git a/tests/integration/fixtures/test_connect.py b/tests/integration/fixtures/test_connect.py
@@ -2,10 +2,21 @@
 from pytest import fixture
 
 
-def test_databricks_connect(spark):
+def test_databricks_connect(ws, spark):
     rows = spark.sql("SELECT 1").collect()
     assert rows[0][0] == 1
 
+    creator = get_cluster_creator(spark, ws)
+    assert creator  # non-serverless clusters must have assigned creator
+
+
+def test_databricks_connect_serverless(serverless_env, ws, spark):
+    rows = spark.sql("SELECT 1").collect()
+    assert rows[0][0] == 1
+
+    creator = get_cluster_creator(spark, ws)
+    assert not creator  # serverless clusters don't have assigned creator
+
 
 @fixture
 def serverless_env():
@@ -14,6 +25,10 @@ def serverless_env():
     os.environ.pop('DATABRICKS_SERVERLESS_COMPUTE_ID')
 
 
-def test_databricks_connect_serverless(serverless_env, spark):
-    rows = spark.sql("SELECT 1").collect()
-    assert rows[0][0] == 1
+def get_cluster_creator(spark, ws):
+    """
+    Get the creator of the cluster that the Spark session is connected to.
+    """
+    cluster_id = spark.conf.get("spark.databricks.clusterUsageTags.clusterId")
+    creator = ws.clusters.get(cluster_id).creator_user_name
+    return creator