Remove default spark_version from DatabricksResource (#57)

jhamet93 · claude · web-flow · commit 1102462a4382 · 2025-11-25T07:08:56.000-05:00
This is a breaking change that requires users to explicitly specify the Databricks runtime version when creating a DatabricksResource. Previously, the spark_version defaulted to "11.3.x-scala2.12". This change: - Removes the default value for spark_version field in DatabricksResource - Updates pyproject.toml version from 2.10.2 to 3.0.0 (major version bump) - Updates all tests to explicitly provide spark_version parameter BREAKING CHANGE: Users must now explicitly provide spark_version when instantiating DatabricksResource. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/block_cascade/executors/databricks/resource.py b/block_cascade/executors/databricks/resource.py
@@ -107,7 +107,6 @@ class DatabricksResource(BaseModel):
     spark_version: str
         Databricks runtime version. Tested on 11.3.x-scala2.12.
         https://docs.databricks.com/release-notes/runtime/releases.html
-        Default is 11.3.x-scala2.12
     data_security_mode: Optional[str]
         See `data_security_mode` at
         https://docs.databricks.com/administration-guide/clusters/policies.html#cluster-policy-attribute-paths
@@ -159,7 +158,7 @@ class DatabricksResource(BaseModel):
     storage_location: str
     worker_count: Union[int, DatabricksAutoscaleConfig] = 1
     machine: str = "i3.xlarge"
-    spark_version: str = "11.3.x-scala2.12"
+    spark_version: str
     data_security_mode: Optional[str] = "SINGLE_USER"
     cluster_spec_overrides: Optional[dict] = None
     cluster_policy: Optional[str] = None
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "block-cascade"
 packages = [
     {include = "block_cascade"}
 ]
-version = "2.10.2"
+version = "3.0.0"
 description = "Library for model training in multi-cloud environment."
 readme = "README.md"
 authors = ["Block"]
diff --git a/tests/executors/databricks/resource/test_python_library.py b/tests/executors/databricks/resource/test_python_library.py
@@ -32,6 +32,7 @@ def test_databricks_resource_string_library_conversion():
     
     resource = DatabricksResource(
         storage_location="s3://test-bucket/cascade",
+        spark_version="11.3.x-scala2.12",
         python_libraries=["test-package"]
     )
     
@@ -41,8 +42,9 @@ def test_databricks_resource_string_library_conversion():
     
     resource = DatabricksResource(
         storage_location="s3://test-bucket/cascade",
+        spark_version="11.3.x-scala2.12",
         python_libraries=[
-            "package1", 
+            "package1",
             DatabricksPythonLibrary(name="package2", version="1.0.0")
         ]
     )
@@ -60,6 +62,7 @@ def test_databricks_resource_string_with_version_conversion():
     
     resource = DatabricksResource(
         storage_location="s3://test-bucket/cascade",
+        spark_version="11.3.x-scala2.12",
         python_libraries=["cloudpickle==0.10.0"]
     )
     
@@ -70,6 +73,7 @@ def test_databricks_resource_string_with_version_conversion():
     
     resource = DatabricksResource(
         storage_location="s3://test-bucket/cascade",
+        spark_version="11.3.x-scala2.12",
         python_libraries=[
             "numpy==1.22.4",
             "pandas==2.0.0",
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -67,6 +67,7 @@ def databricks_resource():
         storage_location="s3://test-bucket/cascade",
         worker_count=DatabricksAutoscaleConfig(min_workers=5, max_workers=10),
         cloud_pickle_by_value=["a", "b"],
+        spark_version="11.3.x-scala2.12",
     )
 
 
@@ -120,13 +121,14 @@ def test_databricks_resource(
     configuration = f"""
 {test_job_name}:
     type: DatabricksResource
-    storage_location: {databricks_resource.storage_location}    
+    storage_location: {databricks_resource.storage_location}
     worker_count:
         min_workers: {databricks_resource.worker_count.min_workers}
         max_workers: {databricks_resource.worker_count.max_workers}
     cloud_pickle_by_value:
         - a
         - b
+    spark_version: {databricks_resource.spark_version}
 """
     fs.create_file(configuration_filename, contents=configuration)
     assert databricks_resource == find_default_configuration()[test_job_name]
diff --git a/tests/test_databricks_executor.py b/tests/test_databricks_executor.py
@@ -19,6 +19,7 @@
 databricks_resource = DatabricksResource(
     storage_location="s3://test-bucket/cascade",
     group_name=DATABRICKS_GROUP,
+    spark_version="11.3.x-scala2.12",
 )
 
 

Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@ name = "block-cascade"`
`3`	`3`	`packages = [`
`4`	`4`	`{include = "block_cascade"}`
`5`	`5`	`]`
`6`		`-version = "2.10.2"`
	`6`	`+version = "3.0.0"`
`7`	`7`	`description = "Library for model training in multi-cloud environment."`
`8`	`8`	`readme = "README.md"`
`9`	`9`	`authors = ["Block"]`
Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@`
`19`	`19`	`databricks_resource = DatabricksResource(`
`20`	`20`	`storage_location="s3://test-bucket/cascade",`
`21`	`21`	`group_name=DATABRICKS_GROUP,`
	`22`	`+ spark_version="11.3.x-scala2.12",`
`22`	`23`	`)`
`23`	`24`
`24`	`25`