Skip to content

Commit 09b00b1

Browse files
authored
Merge pull request #4503 from GoogleCloudPlatform/release-candidate
Release candidate V1.62.0
2 parents 26f95d8 + 4fe513c commit 09b00b1

File tree

69 files changed

+2169
-127
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+2169
-127
lines changed

cluster-toolkit-writers.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,5 +379,8 @@
379379
},
380380
{
381381
"login": "bytetwin"
382+
},
383+
{
384+
"login": "LAVEEN"
382385
}
383386
]

cmd/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ HPC deployments on the Google Cloud Platform.`,
5353
logging.Fatal("cmd.Help function failed: %s", err)
5454
}
5555
},
56-
Version: "v1.61.0",
56+
Version: "v1.62.0",
5757
Annotations: annotation,
5858
}
5959
)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2025 "Google LLC"
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: monitoring.googleapis.com/v1
16+
kind: PodMonitoring
17+
metadata:
18+
name: slurm-exporter
19+
namespace: slurm
20+
spec:
21+
selector:
22+
matchLabels:
23+
app.kubernetes.io/instance: slurm-exporter
24+
app.kubernetes.io/name: slurm-exporter
25+
endpoints:
26+
- port: metrics
27+
interval: 30s
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
---
15+
16+
blueprint_name: hpc-slinky
17+
18+
vars:
19+
project_id: ## Set GCP Project ID Here ##
20+
deployment_name: slinky-01
21+
region: us-central1
22+
zones:
23+
- us-central1-a
24+
authorized_cidr: # <your-ip-address>/32
25+
gcp_public_cidrs_access_enabled: false
26+
exporter_pod_monitoring_path: $(ghpc_stage("./exporter-pod-monitoring.yaml"))
27+
debug_nodeset_replicas: 2
28+
h3_nodeset_replicas: 2
29+
30+
deployment_groups:
31+
- group: primary
32+
modules:
33+
- id: network
34+
source: modules/network/vpc
35+
settings:
36+
subnetwork_name: $(vars.deployment_name)-subnet
37+
secondary_ranges_list:
38+
- subnetwork_name: $(vars.deployment_name)-subnet
39+
ranges:
40+
- range_name: pods
41+
ip_cidr_range: 10.4.0.0/14
42+
- range_name: services
43+
ip_cidr_range: 10.0.32.0/20
44+
45+
- id: node_pool_service_account
46+
source: community/modules/project/service-account
47+
settings:
48+
name: gke-np-sa
49+
project_roles:
50+
- logging.logWriter
51+
- monitoring.metricWriter
52+
- monitoring.viewer
53+
- stackdriver.resourceMetadata.writer
54+
- storage.objectAdmin
55+
- artifactregistry.reader
56+
57+
- id: workload_service_account
58+
source: community/modules/project/service-account
59+
settings:
60+
name: gke-wl-sa
61+
project_roles:
62+
- logging.logWriter
63+
- monitoring.metricWriter
64+
- monitoring.viewer
65+
- stackdriver.resourceMetadata.writer
66+
- storage.objectAdmin
67+
- artifactregistry.reader
68+
69+
- id: gke_cluster
70+
source: modules/scheduler/gke-cluster
71+
use: [network, workload_service_account]
72+
settings:
73+
enable_private_endpoint: false
74+
gcp_public_cidrs_access_enabled: $(vars.gcp_public_cidrs_access_enabled)
75+
master_authorized_networks:
76+
- display_name: deployment-machine
77+
cidr_block: $(vars.authorized_cidr)
78+
system_node_pool_enabled: false
79+
configure_workload_identity_sa: true
80+
outputs: [instructions]
81+
82+
- id: base_pool
83+
source: modules/compute/gke-node-pool
84+
use: [gke_cluster, node_pool_service_account]
85+
settings:
86+
initial_node_count: 1
87+
disk_type: pd-balanced
88+
machine_type: e2-standard-4
89+
zones: $(vars.zones)
90+
91+
- id: h3_pool
92+
source: modules/compute/gke-node-pool
93+
use: [gke_cluster, node_pool_service_account]
94+
settings:
95+
initial_node_count: 2
96+
disk_type: pd-balanced
97+
machine_type: h3-standard-88
98+
zones: $(vars.zones)
99+
100+
- id: slinky
101+
source: community/modules/scheduler/slinky
102+
use:
103+
- gke_cluster
104+
- base_pool # Optionally specify nodepool(s) to avoid operator components running on HPC hardware
105+
settings:
106+
slurm_values:
107+
compute:
108+
nodesets:
109+
- name: debug
110+
enabled: true
111+
replicas: $(vars.debug_nodeset_replicas)
112+
image:
113+
# Use the default nodeset image
114+
repository: ""
115+
tag: ""
116+
resources:
117+
requests:
118+
cpu: 500m
119+
memory: 4Gi
120+
limits:
121+
cpu: 500m
122+
memory: 4Gi
123+
affinity:
124+
nodeAffinity:
125+
requiredDuringSchedulingIgnoredDuringExecution:
126+
nodeSelectorTerms:
127+
- matchExpressions:
128+
- key: "node.kubernetes.io/instance-type"
129+
operator: In
130+
values:
131+
- e2-standard-4
132+
partition:
133+
enabled: true
134+
- name: h3
135+
enabled: true
136+
replicas: $(vars.h3_nodeset_replicas)
137+
image:
138+
# Use the default nodeset image
139+
repository: ""
140+
tag: ""
141+
resources:
142+
requests:
143+
cpu: 86
144+
memory: 324Gi
145+
limits:
146+
cpu: 86
147+
memory: 324Gi
148+
affinity:
149+
nodeAffinity:
150+
requiredDuringSchedulingIgnoredDuringExecution:
151+
nodeSelectorTerms:
152+
- matchExpressions:
153+
- key: "node.kubernetes.io/instance-type"
154+
operator: In
155+
values:
156+
- h3-standard-88
157+
partition:
158+
enabled: true
159+
outputs: [instructions]
160+
161+
- id: slurm_exporter_monitoring
162+
source: modules/management/kubectl-apply
163+
use: [gke_cluster]
164+
settings:
165+
apply_manifests:
166+
- source: $(vars.exporter_pod_monitoring_path)

community/modules/compute/htcondor-execute-point/versions.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,6 @@ terraform {
2929
}
3030

3131
provider_meta "google" {
32-
module_name = "blueprints/terraform/hpc-toolkit:htcondor-execute-point/v1.61.0"
32+
module_name = "blueprints/terraform/hpc-toolkit:htcondor-execute-point/v1.62.0"
3333
}
3434
}

community/modules/compute/mig/versions.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,6 @@ terraform {
2222
}
2323
}
2424
provider_meta "google" {
25-
module_name = "blueprints/terraform/hpc-toolkit:mig/v1.61.0"
25+
module_name = "blueprints/terraform/hpc-toolkit:mig/v1.62.0"
2626
}
2727
}

community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@
1717
terraform {
1818
required_version = ">= 1.3"
1919
provider_meta "google" {
20-
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-dynamic/v1.61.0"
20+
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-dynamic/v1.62.0"
2121
}
2222
}

community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/versions.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ terraform {
1818
required_version = ">= 1.3"
1919

2020
provider_meta "google" {
21-
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-tpu/v1.61.0"
21+
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-tpu/v1.62.0"
2222
}
2323
}

community/modules/compute/schedmd-slurm-gcp-v6-nodeset/versions.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ terraform {
2424
}
2525
}
2626
provider_meta "google" {
27-
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset/v1.61.0"
27+
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset/v1.62.0"
2828
}
2929
}

community/modules/compute/schedmd-slurm-gcp-v6-partition/versions.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ terraform {
1818
required_version = ">= 1.3"
1919

2020
provider_meta "google" {
21-
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-partition/v1.61.0"
21+
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-partition/v1.62.0"
2222
}
2323
}

0 commit comments

Comments
 (0)