Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ spec:
operator: In
values:
- {{._NAME_}}
topologyKey: network.topology.kubernetes.io/accelerator
topologyKey: network.topology.nvidia.com/accelerator
containers:
- name: test
image: ubuntu
Expand Down
4 changes: 2 additions & 2 deletions resources/benchmarks/nwtopo/templates/jobset/jobset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ spec:
operator: In
values:
- {{._NAME_}}
topologyKey: network.topology.kubernetes.io/spine
topologyKey: network.topology.nvidia.com/spine
- weight: 90
podAffinityTerm:
labelSelector:
Expand All @@ -62,7 +62,7 @@ spec:
operator: In
values:
- {{._NAME_}}
topologyKey: network.topology.kubernetes.io/block
topologyKey: network.topology.nvidia.com/block
containers:
- name: test
image: ubuntu
Expand Down
4 changes: 2 additions & 2 deletions resources/benchmarks/nwtopo/templates/runai/mpijob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ spec:
operator: In
values:
- {{._NAME_}}
topologyKey: network.topology.kubernetes.io/spine
topologyKey: network.topology.nvidia.com/spine
- weight: 90
podAffinityTerm:
labelSelector:
Expand All @@ -60,7 +60,7 @@ spec:
operator: In
values:
- {{._NAME_}}
topologyKey: network.topology.kubernetes.io/block
topologyKey: network.topology.nvidia.com/block
schedulerName: runai-scheduler
containers:
- image: runai/mpi-worker:latest
Expand Down
96 changes: 48 additions & 48 deletions resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,108 +38,108 @@ tasks:
count: 1
labels:
node-id: n1
network.topology.kubernetes.io/accelerator: nvl1
network.topology.kubernetes.io/block: sw11
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl1
network.topology.nvidia.com/block: sw11
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n2
network.topology.kubernetes.io/accelerator: nvl1
network.topology.kubernetes.io/block: sw11
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl1
network.topology.nvidia.com/block: sw11
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n3
network.topology.kubernetes.io/accelerator: nvl1
network.topology.kubernetes.io/block: sw11
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl1
network.topology.nvidia.com/block: sw11
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n4
network.topology.kubernetes.io/accelerator: nvl1
network.topology.kubernetes.io/block: sw12
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl1
network.topology.nvidia.com/block: sw12
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n5
network.topology.kubernetes.io/accelerator: nvl1
network.topology.kubernetes.io/block: sw12
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl1
network.topology.nvidia.com/block: sw12
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n6
network.topology.kubernetes.io/accelerator: nvl1
network.topology.kubernetes.io/block: sw12
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl1
network.topology.nvidia.com/block: sw12
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n7
network.topology.kubernetes.io/accelerator: nvl2
network.topology.kubernetes.io/block: sw13
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl2
network.topology.nvidia.com/block: sw13
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n8
network.topology.kubernetes.io/accelerator: nvl2
network.topology.kubernetes.io/block: sw13
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl2
network.topology.nvidia.com/block: sw13
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n9
network.topology.kubernetes.io/accelerator: nvl2
network.topology.kubernetes.io/block: sw13
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl2
network.topology.nvidia.com/block: sw13
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n10
network.topology.kubernetes.io/accelerator: nvl2
network.topology.kubernetes.io/block: sw14
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl2
network.topology.nvidia.com/block: sw14
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n11
network.topology.kubernetes.io/accelerator: nvl2
network.topology.kubernetes.io/block: sw14
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl2
network.topology.nvidia.com/block: sw14
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n12
network.topology.kubernetes.io/accelerator: nvl2
network.topology.kubernetes.io/block: sw14
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/accelerator: nvl2
network.topology.nvidia.com/block: sw14
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
timeout: 5m
72 changes: 36 additions & 36 deletions resources/benchmarks/nwtopo/workflows/config-nodes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,100 +35,100 @@ tasks:
count: 1
labels:
node-id: n1
network.topology.kubernetes.io/block: sw11
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw11
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n2
network.topology.kubernetes.io/block: sw11
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw11
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n3
network.topology.kubernetes.io/block: sw12
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw12
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n4
network.topology.kubernetes.io/block: sw12
network.topology.kubernetes.io/spine: sw21
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw12
network.topology.nvidia.com/spine: sw21
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n5
network.topology.kubernetes.io/block: sw13
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw13
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
net-optimal: true
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n6
network.topology.kubernetes.io/block: sw13
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw13
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n7
network.topology.kubernetes.io/block: sw14
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw14
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
net-optimal: true
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n8
network.topology.kubernetes.io/block: sw14
network.topology.kubernetes.io/spine: sw22
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw14
network.topology.nvidia.com/spine: sw22
network.topology.nvidia.com/datacenter: sw31
net-optimal: true
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n9
network.topology.kubernetes.io/block: sw15
network.topology.kubernetes.io/spine: sw23
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw15
network.topology.nvidia.com/spine: sw23
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n10
network.topology.kubernetes.io/block: sw15
network.topology.kubernetes.io/spine: sw23
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw15
network.topology.nvidia.com/spine: sw23
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n11
network.topology.kubernetes.io/block: sw16
network.topology.kubernetes.io/spine: sw23
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw16
network.topology.nvidia.com/spine: sw23
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
- type: dgxa100.80g
count: 1
labels:
node-id: n12
network.topology.kubernetes.io/block: sw16
network.topology.kubernetes.io/spine: sw23
network.topology.kubernetes.io/datacenter: sw31
network.topology.nvidia.com/block: sw16
network.topology.nvidia.com/spine: sw23
network.topology.nvidia.com/datacenter: sw31
nvidia.com/gpu.count: "8"
timeout: 5m
- id: update
Expand Down