Skip to content

Commit 96f057e

Browse files
committed
Merge branch 'dev' into 'master'
6.3.1 See merge request SchedMD/slurm-gcp!82
2 parents a308f7d + 464a613 commit 96f057e

File tree

7 files changed

+30
-3
lines changed

7 files changed

+30
-3
lines changed

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## \[6.3.1\]
6+
7+
- Add reserved property for nodeset_tpu
8+
- update lustre repository url
9+
510
## \[6.3.0\]
611

712
- Upgrade installed Slurm to 23.02.7

ansible/roles/lustre/vars/redhat-8.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
lustre_repo_url: https://downloads.whamcloud.com/public/lustre/latest-release/el8.8/client
16+
lustre_repo_url: https://downloads.whamcloud.com/public/lustre/latest-release/el8.9/client
1717

1818
lustre_packages:
1919
- lustre-client

scripts/util.py

+5
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,10 @@ def enable_public_ip(self):
11481148
def preemptible(self):
11491149
return self._nodeset.preemptible
11501150

1151+
@property
1152+
def reserved(self):
1153+
return self._nodeset.reserved
1154+
11511155
@property
11521156
def service_account(self):
11531157
return self._nodeset.service_account
@@ -1277,6 +1281,7 @@ def create_node(self, nodename):
12771281
node.service_account.email = self.nodeset.service_account.email
12781282
node.service_account.scope = self.nodeset.service_account.scopes
12791283
node.scheduling_config.preemptible = self.preemptible
1284+
node.scheduling_config.reserved = self.reserved
12801285
if self.nodeset.network:
12811286
node.network_config.network = self.nodeset.network
12821287
if self.nodeset.subnetwork:

terraform/slurm_cluster/modules/slurm_nodeset_tpu/README_TF.md

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ No modules.
5959
| <a name="input_preemptible"></a> [preemptible](#input\_preemptible) | Specify whether TPU-vms in this nodeset are preemtible, see https://cloud.google.com/tpu/docs/preemptible for details. | `bool` | `false` | no |
6060
| <a name="input_preserve_tpu"></a> [preserve\_tpu](#input\_preserve\_tpu) | Specify whether TPU-vms will get preserve on suspend, if set to true, on suspend vm is stopped, on false it gets deleted | `bool` | `true` | no |
6161
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes |
62+
| <a name="input_reserved"></a> [reserved](#input\_reserved) | Specify whether TPU-vms in this nodeset are created under a reservation. | `bool` | `false` | no |
6263
| <a name="input_service_account"></a> [service\_account](#input\_service\_account) | Service account to attach to the TPU-vm.<br>If none is given, the default service account and scopes will be used. | <pre>object({<br> email = string<br> scopes = set(string)<br> })</pre> | `null` | no |
6364
| <a name="input_subnetwork"></a> [subnetwork](#input\_subnetwork) | The name of the subnetwork to attach the TPU-vm of this nodeset to. | `string` | `null` | no |
6465
| <a name="input_tf_version"></a> [tf\_version](#input\_tf\_version) | Nodeset Tensorflow version, see https://cloud.google.com/tpu/docs/supported-tpu-configurations#tpu_vm for details. | `string` | n/a | yes |

terraform/slurm_cluster/modules/slurm_nodeset_tpu/main.tf

+4
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ resource "null_resource" "nodeset_tpu" {
111111
condition = sum([var.node_count_dynamic_max, var.node_count_static]) > 0
112112
error_message = "Sum of node_count_dynamic_max and node_count_static must be > 0."
113113
}
114+
precondition {
115+
condition = !(var.preemptible && var.reserved)
116+
error_message = "Nodeset cannot be preemptible and reserved at the same time."
117+
}
114118
precondition {
115119
condition = !(var.subnetwork == null && !var.enable_public_ip)
116120
error_message = "Using the default subnetwork for the TPU nodeset requires enable_public_ip set to true."

terraform/slurm_cluster/modules/slurm_nodeset_tpu/variables.tf

+6
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ variable "preemptible" {
7777
default = false
7878
}
7979

80+
variable "reserved" {
81+
description = "Specify whether TPU-vms in this nodeset are created under a reservation."
82+
type = bool
83+
default = false
84+
}
85+
8086
variable "preserve_tpu" {
8187
description = "Specify whether TPU-vms will get preserve on suspend, if set to true, on suspend vm is stopped, on false it gets deleted"
8288
type = bool

terraform/slurm_cluster/modules/slurm_nodeset_tpu/versions.tf

+8-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@ terraform {
1818
required_version = "~> 1.2"
1919

2020
required_providers {
21-
google = ">= 3.53, < 5.0"
22-
null = "~> 3.0"
21+
google = {
22+
source = "hashicorp/google"
23+
version = ">= 3.53, < 5.0"
24+
}
25+
null = {
26+
source = "hashicorp/null"
27+
version = "~> 3.0"
28+
}
2329
}
2430
}

0 commit comments

Comments
 (0)