Skip to content

Commit eaeacfb

Browse files
authored
Merge pull request #2702 from GoogleCloudPlatform/release-candidate
Release v1.35.0
2 parents 627b43a + 1b55f2d commit eaeacfb

File tree

131 files changed

+1872
-521
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+1872
-521
lines changed

.github/workflows/pr-precommit.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ on:
2424
- labeled
2525
- synchronize
2626
branches:
27+
- main
2728
- develop
29+
- release-candidate
2830

2931
jobs:
3032
pre-commit:

cmd/deploy.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ import (
2828
)
2929

3030
func addDeployFlags(c *cobra.Command) *cobra.Command {
31-
return addAutoApproveFlag(
32-
addArtifactsDirFlag(
33-
addCreateFlags(c)))
31+
return addGroupSelectionFlags(
32+
addAutoApproveFlag(
33+
addArtifactsDirFlag(
34+
addCreateFlags(c))))
3435
}
3536

3637
func init() {
@@ -71,10 +72,16 @@ func doDeploy(deplRoot string) {
7172
checkErr(shell.CheckWritableDir(artDir), nil)
7273
bp, ctx := artifactBlueprintOrDie(artDir)
7374
groups := bp.Groups
75+
checkErr(validateGroupSelectionFlags(bp), ctx)
7476
checkErr(validateRuntimeDependencies(deplRoot, groups), ctx)
7577
checkErr(shell.ValidateDeploymentDirectory(groups, deplRoot), ctx)
7678

7779
for ig, group := range groups {
80+
if !isGroupSelected(group.Name) {
81+
logging.Info("skipping group %q", group.Name)
82+
continue
83+
}
84+
7885
groupDir := filepath.Join(deplRoot, string(group.Name))
7986
checkErr(shell.ImportInputs(groupDir, artDir, bp), ctx)
8087

cmd/destroy.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ import (
3131

3232
func init() {
3333
rootCmd.AddCommand(
34-
addAutoApproveFlag(
35-
addArtifactsDirFlag(destroyCmd)))
34+
addGroupSelectionFlags(
35+
addAutoApproveFlag(
36+
addArtifactsDirFlag(destroyCmd))))
3637
}
3738

3839
var (
@@ -56,13 +57,17 @@ func runDestroyCmd(cmd *cobra.Command, args []string) {
5657
}
5758

5859
bp, ctx := artifactBlueprintOrDie(artifactsDir)
59-
60+
checkErr(validateGroupSelectionFlags(bp), ctx)
6061
checkErr(shell.ValidateDeploymentDirectory(bp.Groups, deplRoot), ctx)
6162

6263
// destroy in reverse order of creation!
6364
packerManifests := []string{}
6465
for i := len(bp.Groups) - 1; i >= 0; i-- {
6566
group := bp.Groups[i]
67+
if !isGroupSelected(group.Name) {
68+
logging.Info("skipping group %q", group.Name)
69+
continue
70+
}
6671
groupDir := filepath.Join(deplRoot, string(group.Name))
6772

6873
if err := shell.ImportInputs(groupDir, artifactsDir, bp); err != nil {

cmd/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ HPC deployments on the Google Cloud Platform.`,
5252
logging.Fatal("cmd.Help function failed: %s", err)
5353
}
5454
},
55-
Version: "v1.34.1",
55+
Version: "v1.35.0",
5656
Annotations: annotation,
5757
}
5858
)

cmd/utils.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
package cmd
1616

1717
import (
18+
"errors"
1819
"fmt"
20+
"hpc-toolkit/pkg/config"
1921
"hpc-toolkit/pkg/modulewriter"
2022
"hpc-toolkit/pkg/shell"
2123
"os"
24+
"slices"
2225

2326
"github.com/spf13/cobra"
2427
)
@@ -78,3 +81,41 @@ func filterYaml(cmd *cobra.Command, args []string, toComplete string) ([]string,
7881
}
7982
return []string{"yaml", "yml"}, cobra.ShellCompDirectiveFilterFileExt
8083
}
84+
85+
var flagSkipGroups []string
86+
var flagOnlyGroups []string
87+
88+
func addGroupSelectionFlags(c *cobra.Command) *cobra.Command {
89+
c.Flags().StringSliceVar(&flagSkipGroups, "skip", nil, "Skip groups with the given names")
90+
c.Flags().StringSliceVar(&flagOnlyGroups, "only", nil, "Only apply to groups with the given names")
91+
return c
92+
}
93+
94+
func validateGroupSelectionFlags(bp config.Blueprint) error {
95+
if flagOnlyGroups != nil && flagSkipGroups != nil {
96+
return errors.New("cannot specify both --only and --skip")
97+
}
98+
99+
dict := []string{}
100+
for _, group := range bp.Groups {
101+
dict = append(dict, string(group.Name))
102+
}
103+
104+
for _, g := range append(flagOnlyGroups, flagSkipGroups...) {
105+
if !slices.Contains(dict, g) {
106+
return config.HintSpelling(g, dict, fmt.Errorf("group %q not found", g))
107+
}
108+
}
109+
110+
return nil
111+
}
112+
113+
func isGroupSelected(g config.GroupName) bool {
114+
if flagOnlyGroups != nil {
115+
return slices.Contains(flagOnlyGroups, string(g))
116+
}
117+
if flagSkipGroups != nil {
118+
return !slices.Contains(flagSkipGroups, string(g))
119+
}
120+
return true
121+
}

cmd/utils_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package cmd
16+
17+
import (
18+
"fmt"
19+
"hpc-toolkit/pkg/config"
20+
"testing"
21+
)
22+
23+
func TestIsGroupSelected(t *testing.T) {
24+
type test struct {
25+
only []string
26+
skip []string
27+
group config.GroupName
28+
want bool
29+
}
30+
tests := []test{
31+
{nil, nil, "green", true},
32+
{[]string{"green"}, nil, "green", true},
33+
{[]string{"green"}, nil, "blue", false},
34+
{nil, []string{"green"}, "green", false},
35+
{nil, []string{"green"}, "blue", true},
36+
}
37+
38+
for _, tc := range tests {
39+
t.Run(fmt.Sprintf("%v;%v;%q", tc.only, tc.skip, tc.group), func(t *testing.T) {
40+
flagOnlyGroups, flagSkipGroups = tc.only, tc.skip
41+
got := isGroupSelected(tc.group)
42+
if got != tc.want {
43+
t.Errorf("isGroupSelected(%v) = %v; want %v", tc.group, got, tc.want)
44+
}
45+
})
46+
}
47+
}
48+
49+
func TestValidateGroupSelectionFlags(t *testing.T) {
50+
type test struct {
51+
only []string
52+
skip []string
53+
groups []string
54+
err bool
55+
}
56+
tests := []test{
57+
{nil, nil, []string{"green"}, false},
58+
{[]string{"green"}, []string{"blue"}, []string{"green", "blue"}, true},
59+
{[]string{"green"}, nil, []string{"green"}, false},
60+
{[]string{"green"}, nil, []string{"blue"}, true},
61+
{nil, []string{"green"}, []string{"green"}, false},
62+
{nil, []string{"green"}, []string{"blue"}, true},
63+
}
64+
65+
for _, tc := range tests {
66+
t.Run(fmt.Sprintf("%v;%v;%v", tc.only, tc.skip, tc.groups), func(t *testing.T) {
67+
flagOnlyGroups, flagSkipGroups = tc.only, tc.skip
68+
bp := config.Blueprint{}
69+
for _, g := range tc.groups {
70+
bp.Groups = append(bp.Groups, config.Group{Name: config.GroupName(g)})
71+
}
72+
73+
err := validateGroupSelectionFlags(bp)
74+
if tc.err && err == nil {
75+
t.Errorf("validateGroupSelectionFlags(%v) = nil; want error", tc.groups)
76+
}
77+
if !tc.err && err != nil {
78+
t.Errorf("validateGroupSelectionFlags(%v) = %v; want nil", tc.groups, err)
79+
}
80+
})
81+
}
82+
83+
}

community/examples/hpc-build-slurm-image.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ vars:
2323
image_build_machine_type: n2d-standard-16
2424
build_from_image_family: hpc-rocky-linux-8
2525
build_from_image_project: cloud-hpc-image-public
26-
build_from_git_ref: 6.5.6
26+
build_from_git_ref: 6.5.8
2727
built_image_family: my-custom-slurm
2828
built_instance_image:
2929
family: $(vars.built_image_family)

community/examples/ml-gke.yaml

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ blueprint_name: ml-gke
1818
vars:
1919
project_id: ## Set GCP Project ID Here ##
2020
deployment_name: ml-01
21-
region: us-central1
21+
region: asia-southeast1
22+
zones:
23+
- asia-southeast1-b # g2 machine has better availability in this zone
2224

2325
# Cidr block containing the IP of the machine calling terraform.
2426
# The following line must be updated for this example to work.
@@ -48,22 +50,23 @@ deployment_groups:
4850
cidr_block: $(vars.authorized_cidr)
4951
outputs: [instructions]
5052

51-
# Docs at https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/community/modules/scripts/kubernetes-operations
52-
- id: install-nvidia-drivers
53-
source: github.com/GoogleCloudPlatform/ai-infra-cluster-provisioning//aiinfra-cluster/modules/kubernetes-operations?ref=v0.6.0
54-
use: [gke_cluster]
55-
settings:
56-
install_nvidia_driver: true
57-
58-
- id: a2-pool
53+
- id: g2-pool
5954
source: community/modules/compute/gke-node-pool
6055
use: [gke_cluster]
6156
settings:
62-
machine_type: a2-highgpu-8g
57+
disk_type: pd-balanced
58+
machine_type: g2-standard-4
59+
guest_accelerator:
60+
- type: nvidia-l4
61+
count: 1
62+
gpu_partition_size: null
63+
gpu_sharing_config: null
64+
gpu_driver_installation_config:
65+
- gpu_driver_version: "DEFAULT"
6366

6467
- id: job-template
6568
source: community/modules/compute/gke-job-template
66-
use: [a2-pool]
69+
use: [g2-pool]
6770
settings:
6871
image: nvidia/cuda:11.0.3-runtime-ubuntu20.04
6972
command:

community/front-end/ofe/deploy.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ PRJ_API['bigqueryconnection.googleapis.com']='BigQuery Connection API'
5757
PRJ_API['sqladmin.googleapis.com']='Cloud SQL Admin API'
5858
PRJ_API['servicenetworking.googleapis.com']='Service Networking API'
5959
PRJ_API['secretmanager.googleapis.com']='Secret Manager API'
60+
PRJ_API['serviceusage.googleapis.com']='Service Usage API'
61+
PRJ_API['storage.googleapis.com']='Cloud Storage API'
6062

6163
# Location for output credential file = pwd/credential.json
6264
#

community/front-end/ofe/infrastructure_files/gcs_bucket/webserver/startup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ autostart=true
249249
autorestart=true
250250
user=gcluster
251251
redirect_stderr=true
252+
environment=HOME=/opt/gcluster
252253
stdout_logfile=/opt/gcluster/run/supvisor.log" >/etc/supervisord.d/gcluster.ini
253254

254255
printf "Creating systemd service..."

0 commit comments

Comments
 (0)