Skip to content

Commit 1102462

Browse files
jhamet93claude
andauthored
Remove default spark_version from DatabricksResource (#57)
This is a breaking change that requires users to explicitly specify the Databricks runtime version when creating a DatabricksResource. Previously, the spark_version defaulted to "11.3.x-scala2.12". This change: - Removes the default value for spark_version field in DatabricksResource - Updates pyproject.toml version from 2.10.2 to 3.0.0 (major version bump) - Updates all tests to explicitly provide spark_version parameter BREAKING CHANGE: Users must now explicitly provide spark_version when instantiating DatabricksResource. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <[email protected]>
1 parent f190d05 commit 1102462

File tree

5 files changed

+11
-5
lines changed

5 files changed

+11
-5
lines changed

block_cascade/executors/databricks/resource.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ class DatabricksResource(BaseModel):
107107
spark_version: str
108108
Databricks runtime version. Tested on 11.3.x-scala2.12.
109109
https://docs.databricks.com/release-notes/runtime/releases.html
110-
Default is 11.3.x-scala2.12
111110
data_security_mode: Optional[str]
112111
See `data_security_mode` at
113112
https://docs.databricks.com/administration-guide/clusters/policies.html#cluster-policy-attribute-paths
@@ -159,7 +158,7 @@ class DatabricksResource(BaseModel):
159158
storage_location: str
160159
worker_count: Union[int, DatabricksAutoscaleConfig] = 1
161160
machine: str = "i3.xlarge"
162-
spark_version: str = "11.3.x-scala2.12"
161+
spark_version: str
163162
data_security_mode: Optional[str] = "SINGLE_USER"
164163
cluster_spec_overrides: Optional[dict] = None
165164
cluster_policy: Optional[str] = None

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "block-cascade"
33
packages = [
44
{include = "block_cascade"}
55
]
6-
version = "2.10.2"
6+
version = "3.0.0"
77
description = "Library for model training in multi-cloud environment."
88
readme = "README.md"
99
authors = ["Block"]

tests/executors/databricks/resource/test_python_library.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def test_databricks_resource_string_library_conversion():
3232

3333
resource = DatabricksResource(
3434
storage_location="s3://test-bucket/cascade",
35+
spark_version="11.3.x-scala2.12",
3536
python_libraries=["test-package"]
3637
)
3738

@@ -41,8 +42,9 @@ def test_databricks_resource_string_library_conversion():
4142

4243
resource = DatabricksResource(
4344
storage_location="s3://test-bucket/cascade",
45+
spark_version="11.3.x-scala2.12",
4446
python_libraries=[
45-
"package1",
47+
"package1",
4648
DatabricksPythonLibrary(name="package2", version="1.0.0")
4749
]
4850
)
@@ -60,6 +62,7 @@ def test_databricks_resource_string_with_version_conversion():
6062

6163
resource = DatabricksResource(
6264
storage_location="s3://test-bucket/cascade",
65+
spark_version="11.3.x-scala2.12",
6366
python_libraries=["cloudpickle==0.10.0"]
6467
)
6568

@@ -70,6 +73,7 @@ def test_databricks_resource_string_with_version_conversion():
7073

7174
resource = DatabricksResource(
7275
storage_location="s3://test-bucket/cascade",
76+
spark_version="11.3.x-scala2.12",
7377
python_libraries=[
7478
"numpy==1.22.4",
7579
"pandas==2.0.0",

tests/test_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def databricks_resource():
6767
storage_location="s3://test-bucket/cascade",
6868
worker_count=DatabricksAutoscaleConfig(min_workers=5, max_workers=10),
6969
cloud_pickle_by_value=["a", "b"],
70+
spark_version="11.3.x-scala2.12",
7071
)
7172

7273

@@ -120,13 +121,14 @@ def test_databricks_resource(
120121
configuration = f"""
121122
{test_job_name}:
122123
type: DatabricksResource
123-
storage_location: {databricks_resource.storage_location}
124+
storage_location: {databricks_resource.storage_location}
124125
worker_count:
125126
min_workers: {databricks_resource.worker_count.min_workers}
126127
max_workers: {databricks_resource.worker_count.max_workers}
127128
cloud_pickle_by_value:
128129
- a
129130
- b
131+
spark_version: {databricks_resource.spark_version}
130132
"""
131133
fs.create_file(configuration_filename, contents=configuration)
132134
assert databricks_resource == find_default_configuration()[test_job_name]

tests/test_databricks_executor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
databricks_resource = DatabricksResource(
2020
storage_location="s3://test-bucket/cascade",
2121
group_name=DATABRICKS_GROUP,
22+
spark_version="11.3.x-scala2.12",
2223
)
2324

2425

0 commit comments

Comments
 (0)