Skip to content

Commit 6ca267f

Browse files
andrewnesterdenik
authored andcommitted
Added support for lifecycle.started for clusters (#5150)
## Changes Adds lifecycle.started support for clusters in the direct deployment engine, mirroring the same feature for apps (#4672). ## Why Without this field, clusters defined in a bundle are always left in whatever state the API puts them in after creation. Users have no way to declare "ensure this cluster is running after every deploy." `lifecycle.started: true` guarantees the cluster is RUNNING after bundle deploy. `lifecycle.started: false` creates the cluster but immediately terminates it, and subsequent deploys that detect drift (e.g., someone started the cluster manually) will stop it again. Note: `WaitAfterCreate` always waits for RUNNING first — real clusters start in PENDING state and must be polled. For `started=false`, we wait for RUNNING then terminate; this avoids races with the API that would reject a terminate on a still-pending cluster. ## Tests Added acceptance tests <!-- If your PR needs to be included in the release notes for next release, add a separate entry in NEXT_CHANGELOG.md as part of your PR. -->
1 parent b1359dc commit 6ca267f

34 files changed

Lines changed: 759 additions & 131 deletions

File tree

acceptance/bundle/refschema/out.fields.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ resources.apps.*.git_source.resolved_commit string ALL
122122
resources.apps.*.git_source.source_code_path string ALL
123123
resources.apps.*.git_source.tag string ALL
124124
resources.apps.*.id string ALL
125-
resources.apps.*.lifecycle *dresources.AppStateLifecycle REMOTE STATE
125+
resources.apps.*.lifecycle *dresources.StateLifecycle REMOTE STATE
126126
resources.apps.*.lifecycle *resources.LifecycleWithStarted INPUT
127127
resources.apps.*.lifecycle resources.Lifecycle INPUT
128128
resources.apps.*.lifecycle.prevent_destroy bool INPUT
@@ -389,8 +389,11 @@ resources.clusters.*.jdbc_port int REMOTE
389389
resources.clusters.*.kind compute.Kind ALL
390390
resources.clusters.*.last_restarted_time int64 REMOTE
391391
resources.clusters.*.last_state_loss_time int64 REMOTE
392+
resources.clusters.*.lifecycle *dresources.StateLifecycle REMOTE STATE
393+
resources.clusters.*.lifecycle *resources.LifecycleWithStarted INPUT
392394
resources.clusters.*.lifecycle resources.Lifecycle INPUT
393395
resources.clusters.*.lifecycle.prevent_destroy bool INPUT
396+
resources.clusters.*.lifecycle.started *bool ALL
394397
resources.clusters.*.modified_status string INPUT
395398
resources.clusters.*.node_type_id string ALL
396399
resources.clusters.*.num_workers int ALL

acceptance/bundle/resources/clusters/deploy/update-and-resize-autoscale/out.plan_.direct.json

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@
4646
"cluster_name": "test-cluster-[UNIQUE_NAME]",
4747
"driver_node_type_id": "[NODE_TYPE_ID]",
4848
"enable_elastic_disk": false,
49+
"lifecycle": {
50+
"started": false
51+
},
4952
"node_type_id": "[NODE_TYPE_ID]",
5053
"num_workers": 2,
51-
"spark_version": "13.3.x-snapshot-scala2.12"
54+
"spark_version": "13.3.x-snapshot-scala2.12",
55+
"state": "TERMINATED"
5256
},
5357
"changes": {
5458
"autoscale": {
@@ -76,6 +80,13 @@
7680
"reason": "empty",
7781
"remote": false
7882
},
83+
"lifecycle": {
84+
"action": "skip",
85+
"reason": "backend_default",
86+
"remote": {
87+
"started": false
88+
}
89+
},
7990
"num_workers": {
8091
"action": "update",
8192
"old": 2,
@@ -152,6 +163,13 @@
152163
"action": "skip",
153164
"reason": "empty",
154165
"remote": false
166+
},
167+
"lifecycle": {
168+
"action": "skip",
169+
"reason": "backend_default",
170+
"remote": {
171+
"started": false
172+
}
155173
}
156174
}
157175
}
@@ -191,6 +209,9 @@
191209
"cluster_name": "test-cluster-[UNIQUE_NAME]",
192210
"driver_node_type_id": "[NODE_TYPE_ID]",
193211
"enable_elastic_disk": false,
212+
"lifecycle": {
213+
"started": true
214+
},
194215
"node_type_id": "[NODE_TYPE_ID]",
195216
"spark_version": "13.3.x-snapshot-scala2.12",
196217
"state": "RUNNING"
@@ -227,6 +248,13 @@
227248
"action": "skip",
228249
"reason": "empty",
229250
"remote": false
251+
},
252+
"lifecycle": {
253+
"action": "skip",
254+
"reason": "backend_default",
255+
"remote": {
256+
"started": true
257+
}
230258
}
231259
}
232260
}
@@ -263,6 +291,9 @@
263291
"cluster_name": "test-cluster-[UNIQUE_NAME]",
264292
"driver_node_type_id": "[NODE_TYPE_ID]",
265293
"enable_elastic_disk": false,
294+
"lifecycle": {
295+
"started": true
296+
},
266297
"node_type_id": "[NODE_TYPE_ID]",
267298
"spark_version": "13.3.x-snapshot-scala2.12",
268299
"state": "RUNNING"
@@ -298,6 +329,13 @@
298329
"reason": "empty",
299330
"remote": false
300331
},
332+
"lifecycle": {
333+
"action": "skip",
334+
"reason": "backend_default",
335+
"remote": {
336+
"started": true
337+
}
338+
},
301339
"num_workers": {
302340
"action": "resize",
303341
"reason": "custom",

acceptance/bundle/resources/clusters/deploy/update-and-resize-autoscale/output.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ Deployment complete!
1212
"autoscale": null
1313
}
1414

15+
=== Terminating the cluster
16+
{
17+
"cluster_id": "[CLUSTER_ID]"
18+
}
19+
1520
=== Adding autoscale section should call update API on stopped cluster
1621

1722
>>> [CLI] bundle deploy

acceptance/bundle/resources/clusters/deploy/update-and-resize-autoscale/script

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ CLUSTER_ID=$($CLI bundle summary -o json | jq -r '.resources.clusters.test_clust
1515
echo "$CLUSTER_ID:CLUSTER_ID" >> ACC_REPLS
1616
$CLI clusters get "${CLUSTER_ID}" | jq '{cluster_name,num_workers,autoscale}'
1717

18+
title "Terminating the cluster\n"
19+
$CLI clusters delete "${CLUSTER_ID}" | jq '{cluster_id}'
20+
1821
title "Adding autoscale section should call update API on stopped cluster\n"
1922
update_file.py databricks.yml " num_workers: 2" " autoscale:
2023
min_workers: 2

acceptance/bundle/resources/clusters/deploy/update-and-resize/out.plan_.direct.json

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
"serial": 1,
2727
"plan": {
2828
"resources.clusters.test_cluster": {
29-
"action": "update",
29+
"action": "resize",
3030
"new_state": {
3131
"value": {
3232
"autotermination_minutes": 60,
@@ -49,12 +49,16 @@
4949
"cluster_name": "test-cluster-[UNIQUE_NAME]",
5050
"driver_node_type_id": "[NODE_TYPE_ID]",
5151
"enable_elastic_disk": false,
52+
"lifecycle": {
53+
"started": true
54+
},
5255
"node_type_id": "[NODE_TYPE_ID]",
5356
"num_workers": 2,
5457
"spark_conf": {
5558
"spark.executor.memory": "2g"
5659
},
57-
"spark_version": "13.3.x-snapshot-scala2.12"
60+
"spark_version": "13.3.x-snapshot-scala2.12",
61+
"state": "RUNNING"
5862
},
5963
"changes": {
6064
"aws_attributes": {
@@ -75,8 +79,16 @@
7579
"reason": "empty",
7680
"remote": false
7781
},
82+
"lifecycle": {
83+
"action": "skip",
84+
"reason": "backend_default",
85+
"remote": {
86+
"started": true
87+
}
88+
},
7889
"num_workers": {
79-
"action": "update",
90+
"action": "resize",
91+
"reason": "custom",
8092
"old": 2,
8193
"new": 3,
8294
"remote": 2
@@ -115,6 +127,9 @@
115127
"cluster_name": "test-cluster-[UNIQUE_NAME]",
116128
"driver_node_type_id": "[NODE_TYPE_ID]",
117129
"enable_elastic_disk": false,
130+
"lifecycle": {
131+
"started": true
132+
},
118133
"node_type_id": "[NODE_TYPE_ID]",
119134
"num_workers": 3,
120135
"spark_conf": {
@@ -142,6 +157,13 @@
142157
"reason": "empty",
143158
"remote": false
144159
},
160+
"lifecycle": {
161+
"action": "skip",
162+
"reason": "backend_default",
163+
"remote": {
164+
"started": true
165+
}
166+
},
145167
"num_workers": {
146168
"action": "resize",
147169
"reason": "custom",
@@ -183,6 +205,9 @@
183205
"cluster_name": "test-cluster-[UNIQUE_NAME]",
184206
"driver_node_type_id": "[NODE_TYPE_ID]",
185207
"enable_elastic_disk": false,
208+
"lifecycle": {
209+
"started": true
210+
},
186211
"node_type_id": "[NODE_TYPE_ID]",
187212
"num_workers": 4,
188213
"spark_conf": {
@@ -210,6 +235,13 @@
210235
"reason": "empty",
211236
"remote": false
212237
},
238+
"lifecycle": {
239+
"action": "skip",
240+
"reason": "backend_default",
241+
"remote": {
242+
"started": true
243+
}
244+
},
213245
"num_workers": {
214246
"action": "resize",
215247
"reason": "custom",

acceptance/bundle/resources/clusters/deploy/update-and-resize/out.plan_.direct.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
create clusters.test_cluster
22

33
Plan: 1 to add, 0 to change, 0 to delete, 0 unchanged
4-
update clusters.test_cluster
4+
resize clusters.test_cluster
55

66
Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged
77
resize clusters.test_cluster

acceptance/bundle/resources/clusters/deploy/update-and-resize/output.txt

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,6 @@ Updating deployment state...
2020
Deployment complete!
2121

2222
>>> jq select(.method == "POST" and (.path | contains("/clusters/edit"))) | del(.body.aws_attributes, .body.driver_node_type_id, .body.data_security_mode, .body.enable_elastic_disk) out.requests.txt
23-
{
24-
"method": "POST",
25-
"path": "/api/2.1/clusters/edit",
26-
"body": {
27-
"autotermination_minutes": 60,
28-
"cluster_id": "[CLUSTER_ID]",
29-
"cluster_name": "test-cluster-[UNIQUE_NAME]",
30-
"node_type_id": "[NODE_TYPE_ID]",
31-
"num_workers": 3,
32-
"spark_conf": {
33-
"spark.executor.memory": "2g"
34-
},
35-
"spark_version": "13.3.x-snapshot-scala2.12"
36-
}
37-
}
3823

3924
=== Cluster should have new num_workers
4025
{
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
bundle:
2+
name: lifecycle-started-$UNIQUE_NAME
3+
4+
workspace:
5+
root_path: ~/.bundle/$UNIQUE_NAME
6+
7+
resources:
8+
clusters:
9+
mycluster:
10+
cluster_name: $UNIQUE_NAME
11+
spark_version: "15.4.x-scala2.12"
12+
node_type_id: "i3.xlarge"
13+
num_workers: 1
14+
lifecycle:
15+
started: true

acceptance/bundle/resources/clusters/lifecycle-started-terraform-error/out.test.toml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
=== bundle plan fails with lifecycle.started on terraform engine
3+
>>> errcode [CLI] bundle plan
4+
Error: lifecycle.started is only supported in direct deployment mode
5+
in databricks.yml:15:18
6+
7+
8+
Exit code: 1
9+
10+
=== bundle deploy fails with lifecycle.started on terraform engine
11+
>>> errcode [CLI] bundle deploy
12+
Error: lifecycle.started is only supported in direct deployment mode
13+
in databricks.yml:15:18
14+
15+
16+
Exit code: 1

0 commit comments

Comments
 (0)