Skip to content

Commit 3b5212a

Browse files
authored
Merge pull request #289 from etiennedub/sharding_doc
Sharding GPU support
2 parents aa62d4b + c56851f commit 3b5212a

File tree

5 files changed

+21
-16
lines changed

5 files changed

+21
-16
lines changed

aws/infrastructure.tf

+5-4
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,11 @@ locals {
193193
prefix = values.prefix
194194
tags = values.tags
195195
specs = {
196-
cpus = data.aws_ec2_instance_type.instance_type[values.prefix].default_vcpus
197-
ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size
198-
gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0)
199-
mig = lookup(values, "mig", null)
196+
cpus = data.aws_ec2_instance_type.instance_type[values.prefix].default_vcpus
197+
ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size
198+
gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0)
199+
mig = lookup(values, "mig", null)
200+
shard = lookup(values, "shard", null)
200201
}
201202
volumes = contains(keys(module.design.volume_per_instance), x) ? {
202203
for pv_key, pv_values in var.volumes:

azure/infrastructure.tf

+5-4
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,11 @@ locals {
156156
prefix = values.prefix
157157
tags = values.tags
158158
specs = {
159-
cpus = local.vmsizes[values.type].vcpus
160-
ram = local.vmsizes[values.type].ram
161-
gpus = local.vmsizes[values.type].gpus
162-
mig = lookup(values, "mig", null)
159+
cpus = local.vmsizes[values.type].vcpus
160+
ram = local.vmsizes[values.type].ram
161+
gpus = local.vmsizes[values.type].gpus
162+
mig = lookup(values, "mig", null)
163+
shard = lookup(values, "shard", null)
163164
}
164165
volumes = contains(keys(module.design.volume_per_instance), x) ? {
165166
for pv_key, pv_values in var.volumes:

docs/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ the operating system and service software
540540
```
541541
This is only functional with [MIG supported GPUs](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#supported-gpus),
542542
and with x86-64 processors (see [NVIDIA/mig-parted issue #30](https://github.com/NVIDIA/mig-parted/issues/30)).
543+
6. `shard`: total number of [Sharding](https://slurm.schedmd.com/gres.html#Sharding) on the node. Sharding allows sharing the same GPU on multiple jobs. The total number of shards is evenly distributed across all GPUs on the node.
543544
544545
For some cloud providers, it possible to define additional attributes.
545546
The following sections present the available attributes per provider.

gcp/infrastructure.tf

+5-4
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,11 @@ locals {
166166
prefix = values.prefix
167167
tags = values.tags
168168
specs = {
169-
cpus = data.external.machine_type[values["prefix"]].result["vcpus"]
170-
ram = data.external.machine_type[values["prefix"]].result["ram"]
171-
gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0))
172-
mig = lookup(values, "mig", null)
169+
cpus = data.external.machine_type[values["prefix"]].result["vcpus"]
170+
ram = data.external.machine_type[values["prefix"]].result["ram"]
171+
gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0))
172+
mig = lookup(values, "mig", null)
173+
shard = lookup(values, "shard", null)
173174
}
174175
volumes = contains(keys(module.design.volume_per_instance), x) ? {
175176
for pv_key, pv_values in var.volumes:

openstack/infrastructure.tf

+5-4
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,14 @@ locals {
121121
prefix = values.prefix
122122
tags = values.tags
123123
specs = {
124-
cpus = data.openstack_compute_flavor_v2.flavors[values.prefix].vcpus
125-
ram = data.openstack_compute_flavor_v2.flavors[values.prefix].ram
126-
gpus = sum([
124+
cpus = data.openstack_compute_flavor_v2.flavors[values.prefix].vcpus
125+
ram = data.openstack_compute_flavor_v2.flavors[values.prefix].ram
126+
gpus = sum([
127127
parseint(lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "resources:VGPU", "0"), 10),
128128
parseint(split(":", lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "pci_passthrough:alias", "gpu:0"))[1], 10)
129129
])
130-
mig = lookup(values, "mig", null)
130+
mig = lookup(values, "mig", null)
131+
shard = lookup(values, "shard", null)
131132
}
132133
volumes = contains(keys(module.design.volume_per_instance), x) ? {
133134
for pv_key, pv_values in var.volumes:

0 commit comments

Comments
 (0)