Skip to content

Commit e727f26

Browse files
authored
Merge pull request #3641 from GoogleCloudPlatform/release-candidate
Release v1.46.0
2 parents a9dd634 + bb1ddad commit e727f26

File tree

213 files changed

+5233
-1764
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

213 files changed

+5233
-1764
lines changed

.github/dependabot.yml

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -64,25 +64,3 @@ updates:
6464
update-types:
6565
- minor
6666
- patch
67-
- package-ecosystem: pip
68-
directory: /community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/
69-
labels:
70-
- dependencies
71-
- python
72-
- release-chore
73-
schedule:
74-
interval: monthly
75-
day: monday
76-
time: "03:00"
77-
timezone: America/Los_Angeles
78-
target-branch: develop
79-
groups:
80-
# group all Slurm minor/patch updates together and individual PRs for major updates
81-
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/optimizing-pr-creation-version-updates#example-3-individual-pull-requests-for-major-updates-and-grouped-for-minorpatch-updates
82-
slurm-python-minor-and-patch-updates:
83-
applies-to: version-updates
84-
patterns:
85-
- "*"
86-
update-types:
87-
- minor
88-
- patch

.github/workflows/pr-precommit.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ jobs:
3434
- uses: actions/checkout@v4
3535
- uses: actions/setup-python@v5
3636
with:
37+
# internal Python tests require Python 3.10
3738
python-version: '3.10'
3839
check-latest: true
3940
cache: 'pip'
@@ -56,3 +57,33 @@ jobs:
5657
- uses: pre-commit/action@v3.0.1
5758
with:
5859
extra_args: --show-diff-on-failure --all-files
60+
pre-commit-highest-dependencies:
61+
runs-on: ubuntu-latest
62+
steps:
63+
- uses: actions/checkout@v4
64+
- uses: actions/setup-python@v5
65+
with:
66+
# the slurm-files Python requirements.txt requires updating
67+
# to enable 3.12+ compatibility
68+
python-version: '3.11'
69+
check-latest: true
70+
cache: 'pip'
71+
- uses: actions/setup-go@v5
72+
with:
73+
go-version: '1.23'
74+
check-latest: true
75+
- uses: hashicorp/setup-terraform@v3
76+
with:
77+
terraform_version: latest
78+
terraform_wrapper: false
79+
- run: make install-dev-deps
80+
- uses: terraform-linters/setup-tflint@v4
81+
with:
82+
tflint_version: v0.49.0
83+
- run: tflint --init
84+
env:
85+
# https://github.com/terraform-linters/tflint/blob/master/docs/user-guide/plugins.md#avoiding-rate-limiting
86+
GITHUB_TOKEN: ${{ github.token }}
87+
- uses: pre-commit/action@v3.0.1
88+
with:
89+
extra_args: --show-diff-on-failure --all-files

Makefile

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# PREAMBLE
22
MIN_PACKER_VERSION=1.7.9 # for building images
3-
MIN_TERRAFORM_VERSION=1.2 # for deploying modules
4-
MIN_GOLANG_VERSION=1.18 # for building gcluster
3+
MIN_TERRAFORM_VERSION=1.5.7 # for deploying modules
4+
MIN_GOLANG_VERSION=1.22 # for building gcluster
55

66
.PHONY: install install-user tests format install-dev-deps \
77
warn-go-missing warn-terraform-missing warn-packer-missing \
@@ -14,6 +14,8 @@ SHELL=/bin/bash -o pipefail
1414
ENG = ./cmd/... ./pkg/...
1515
TERRAFORM_FOLDERS=$(shell find ./modules ./community/modules ./tools -type f -name "*.tf" -not -path '*/\.*' -exec dirname "{}" \; | sort -u)
1616
PACKER_FOLDERS=$(shell find ./modules ./community/modules ./tools -type f -name "*.pkr.hcl" -not -path '*/\.*' -exec dirname "{}" \; | sort -u)
17+
BINARY_TARGETS := ghpc gcluster
18+
INSTALL_DIRS := . ~/bin /usr/local/bin
1719

1820
ifneq (, $(shell which git))
1921
## GIT IS PRESENT
@@ -72,6 +74,23 @@ install-dev-deps: warn-terraform-version warn-packer-version check-pre-commit ch
7274
pip install -r community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/requirements.txt
7375
pip install -r community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/requirements.txt
7476

77+
78+
clean:
79+
@for dir in $(INSTALL_DIRS); do \
80+
for file in $(BINARY_TARGETS); do \
81+
if [ -f "$$dir/$$file" ] || [ -L "$$dir/$$file" ]; then \
82+
if [ -w "$$dir/$$file" ]; then \
83+
echo "Removing $$dir/$$file"; \
84+
rm "$$dir/$$file"; \
85+
else \
86+
echo "Do not have permissions to delete $$dir/$$file"; \
87+
fi; \
88+
else \
89+
echo "$$dir/$$file does not exist"; \
90+
fi; \
91+
done; \
92+
done
93+
7594
# RULES SUPPORTING THE ABOVE
7695

7796
test-engine: warn-go-missing

cmd/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ HPC deployments on the Google Cloud Platform.`,
5353
logging.Fatal("cmd.Help function failed: %s", err)
5454
}
5555
},
56-
Version: "v1.45.0",
56+
Version: "v1.46.0",
5757
Annotations: annotation,
5858
}
5959
)

community/examples/AMD/hpc-amd-slurm.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ deployment_groups:
178178
machine_type: c2d-standard-4
179179
node_count_dynamic_max: 10
180180
bandwidth_tier: gvnic_enabled
181-
enable_placement: false
182181
allow_automatic_updates: false
183182

184183
- id: low_cost_partition
@@ -194,7 +193,6 @@ deployment_groups:
194193
machine_type: c2d-standard-112
195194
node_count_dynamic_max: 50
196195
bandwidth_tier: gvnic_enabled
197-
enable_placement: true
198196
allow_automatic_updates: false
199197

200198
# Because is_default is set to true, jobs will run on this partition unless an

community/examples/hpc-slurm-gromacs.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ deployment_groups:
3131
- id: network
3232
source: modules/network/vpc
3333

34+
# Private Service Access (PSA) requires the compute.networkAdmin role which is
35+
# included in the Owner role, but not Editor.
36+
# PSA is a best practice for Filestore instances, but can be optionally
37+
# removed by deleting the private_service_access module and any references to
38+
# the module by Filestore modules.
39+
# https://cloud.google.com/vpc/docs/configure-private-services-access#permissions
3440
- id: private_service_access
3541
source: community/modules/network/private-service-access
3642
use: [network]
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
---
15+
blueprint_name: slurm-h4d
16+
vars:
17+
project_id: ## Set GCP Project ID Here ##
18+
deployment_name: slurm-h4d
19+
region: us-central1
20+
zone: us-central1-a
21+
rdma_net_range: 192.168.128.0/18
22+
23+
# Documentation for each of the modules used below can be found at
24+
# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md
25+
deployment_groups:
26+
- group: primary
27+
modules:
28+
29+
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
30+
# as a prefix. To refer to a local module, prefix with ./, ../ or /
31+
32+
- id: h4d-slurm-net-0
33+
source: modules/network/vpc
34+
35+
- id: h4d-rdma-net
36+
source: modules/network/vpc
37+
settings:
38+
network_name: $(vars.deployment_name)-rdma-net-0
39+
mtu: 8896
40+
network_profile: https://www.googleapis.com/compute/beta/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-falcon
41+
network_routing_mode: REGIONAL
42+
enable_cloud_router: false
43+
enable_cloud_nat: false
44+
subnetworks:
45+
- subnet_name: $(vars.deployment_name)-rdma-sub-0
46+
subnet_region: $(vars.region)
47+
subnet_ip: $(vars.rdma_net_range)
48+
region: $(vars.region)
49+
firewall_rules:
50+
- name: $(vars.deployment_name)-rdma-0
51+
ranges: [$(vars.rdma_net_range)]
52+
allow:
53+
- protocol: tcp
54+
- protocol: udp
55+
- protocol: icmp
56+
57+
- id: homefs
58+
source: modules/file-system/filestore
59+
use: [h4d-slurm-net-0]
60+
settings:
61+
filestore_tier: BASIC_SSD
62+
size_gb: 2560
63+
filestore_share_name: homeshare
64+
local_mount: /home
65+
66+
- id: appsfs
67+
source: modules/file-system/filestore
68+
use: [h4d-slurm-net-0]
69+
settings:
70+
filestore_tier: BASIC_SSD
71+
size_gb: 2560
72+
filestore_share_name: appsshare
73+
local_mount: /apps
74+
75+
- id: h4d_startup
76+
source: modules/scripts/startup-script
77+
settings:
78+
install_cloud_rdma_drivers: true
79+
set_ofi_cloud_rdma_tunables: true
80+
local_ssd_filesystem:
81+
fs_type: ext4
82+
mountpoint: /mnt/lssd
83+
permissions: "1777"
84+
85+
- id: h4d_nodeset
86+
source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
87+
use: [h4d_startup, h4d-slurm-net-0]
88+
settings:
89+
bandwidth_tier: gvnic_enabled
90+
machine_type: h4d-highmem-192-lssd
91+
node_count_static: 2
92+
node_count_dynamic_max: 0
93+
enable_placement: false
94+
disk_type: hyperdisk-balanced
95+
on_host_maintenance: TERMINATE
96+
additional_networks:
97+
$(concat(
98+
[{
99+
network=null,
100+
subnetwork=h4d-rdma-net.subnetwork_self_link,
101+
subnetwork_project=vars.project_id,
102+
nic_type="IRDMA",
103+
queue_count=null,
104+
network_ip=null,
105+
stack_type=null,
106+
access_config=null,
107+
ipv6_access_config=[],
108+
alias_ip_range=[]
109+
}]
110+
))
111+
112+
- id: h4d_partition
113+
source: community/modules/compute/schedmd-slurm-gcp-v6-partition
114+
use:
115+
- h4d_nodeset
116+
settings:
117+
exclusive: false
118+
partition_name: h4d
119+
is_default: true
120+
partition_conf:
121+
ResumeTimeout: 900
122+
SuspendTimeout: 600
123+
124+
- id: slurm_login
125+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-login
126+
use: [h4d-slurm-net-0]
127+
settings:
128+
machine_type: n2-standard-4
129+
enable_login_public_ips: true
130+
131+
- id: slurm_controller_startup
132+
source: modules/scripts/startup-script
133+
settings:
134+
set_ofi_cloud_rdma_tunables: true
135+
136+
- id: slurm_controller
137+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
138+
use: [h4d-slurm-net-0, h4d_partition, slurm_login, homefs, appsfs]
139+
settings:
140+
enable_controller_public_ips: true
141+
controller_startup_script: $(slurm_controller_startup.startup_script)

community/examples/hpc-slurm-local-ssd.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ deployment_groups:
3131
- id: network
3232
source: modules/network/vpc
3333

34+
# Private Service Access (PSA) requires the compute.networkAdmin role which is
35+
# included in the Owner role, but not Editor.
36+
# PSA is a best practice for Filestore instances, but can be optionally
37+
# removed by deleting the private_service_access module and any references to
38+
# the module by Filestore modules.
39+
# https://cloud.google.com/vpc/docs/configure-private-services-access#permissions
3440
- id: private_service_access
3541
source: community/modules/network/private-service-access
3642
use: [network]

community/examples/hpc-slurm-sharedvpc.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ deployment_groups:
6161
settings:
6262
node_count_dynamic_max: 4
6363
machine_type: n2-standard-2
64-
enable_placement: false # the default is: true
6564
allow_automatic_updates: false
6665

6766
- id: debug_partition

community/examples/hpc-slurm-ubuntu2004.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ deployment_groups:
3636
- id: network
3737
source: modules/network/vpc
3838

39+
# Private Service Access (PSA) requires the compute.networkAdmin role which is
40+
# included in the Owner role, but not Editor.
41+
# PSA is a best practice for Filestore instances, but can be optionally
42+
# removed by deleting the private_service_access module and any references to
43+
# the module by Filestore modules.
44+
# https://cloud.google.com/vpc/docs/configure-private-services-access#permissions
3945
- id: private_service_access
4046
source: community/modules/network/private-service-access
4147
use: [network]
@@ -51,7 +57,6 @@ deployment_groups:
5157
use: [network]
5258
settings:
5359
instance_image: $(vars.slurm_image)
54-
enable_placement: false # the default is: true
5560
node_count_dynamic_max: 4
5661
machine_type: n2-standard-2
5762

0 commit comments

Comments
 (0)