diff --git a/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml b/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml index b6ec7cdc..6fa36fb0 100644 --- a/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml +++ b/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml @@ -51,7 +51,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: network.topology.kubernetes.io/accelerator + topologyKey: network.topology.nvidia.com/accelerator containers: - name: test image: ubuntu diff --git a/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml b/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml index 413e4ca7..4cfcbd0d 100644 --- a/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml +++ b/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml @@ -53,7 +53,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: network.topology.kubernetes.io/spine + topologyKey: network.topology.nvidia.com/spine - weight: 90 podAffinityTerm: labelSelector: @@ -62,7 +62,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: network.topology.kubernetes.io/block + topologyKey: network.topology.nvidia.com/block containers: - name: test image: ubuntu diff --git a/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml b/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml index 0ac015a2..1285513e 100644 --- a/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml +++ b/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml @@ -51,7 +51,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: network.topology.kubernetes.io/spine + topologyKey: network.topology.nvidia.com/spine - weight: 90 podAffinityTerm: labelSelector: @@ -60,7 +60,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: network.topology.kubernetes.io/block + topologyKey: network.topology.nvidia.com/block schedulerName: runai-scheduler containers: - image: runai/mpi-worker:latest diff --git a/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml b/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml index 632d501c..f6313ce6 100644 --- a/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml +++ b/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml @@ -38,108 +38,108 @@ tasks: count: 1 labels: node-id: n1 - network.topology.kubernetes.io/accelerator: nvl1 - network.topology.kubernetes.io/block: sw11 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl1 + network.topology.nvidia.com/block: sw11 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n2 - network.topology.kubernetes.io/accelerator: nvl1 - network.topology.kubernetes.io/block: sw11 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl1 + network.topology.nvidia.com/block: sw11 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n3 - network.topology.kubernetes.io/accelerator: nvl1 - network.topology.kubernetes.io/block: sw11 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl1 + network.topology.nvidia.com/block: sw11 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n4 - network.topology.kubernetes.io/accelerator: nvl1 - network.topology.kubernetes.io/block: sw12 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl1 + network.topology.nvidia.com/block: sw12 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n5 - network.topology.kubernetes.io/accelerator: nvl1 - network.topology.kubernetes.io/block: sw12 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl1 + network.topology.nvidia.com/block: sw12 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n6 - network.topology.kubernetes.io/accelerator: nvl1 - network.topology.kubernetes.io/block: sw12 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl1 + network.topology.nvidia.com/block: sw12 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n7 - network.topology.kubernetes.io/accelerator: nvl2 - network.topology.kubernetes.io/block: sw13 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl2 + network.topology.nvidia.com/block: sw13 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n8 - network.topology.kubernetes.io/accelerator: nvl2 - network.topology.kubernetes.io/block: sw13 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl2 + network.topology.nvidia.com/block: sw13 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n9 - network.topology.kubernetes.io/accelerator: nvl2 - network.topology.kubernetes.io/block: sw13 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl2 + network.topology.nvidia.com/block: sw13 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n10 - network.topology.kubernetes.io/accelerator: nvl2 - network.topology.kubernetes.io/block: sw14 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl2 + network.topology.nvidia.com/block: sw14 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n11 - network.topology.kubernetes.io/accelerator: nvl2 - network.topology.kubernetes.io/block: sw14 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl2 + network.topology.nvidia.com/block: sw14 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n12 - network.topology.kubernetes.io/accelerator: nvl2 - network.topology.kubernetes.io/block: sw14 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/accelerator: nvl2 + network.topology.nvidia.com/block: sw14 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" timeout: 5m diff --git a/resources/benchmarks/nwtopo/workflows/config-nodes.yaml b/resources/benchmarks/nwtopo/workflows/config-nodes.yaml index 25735fc3..c715cb29 100644 --- a/resources/benchmarks/nwtopo/workflows/config-nodes.yaml +++ b/resources/benchmarks/nwtopo/workflows/config-nodes.yaml @@ -35,100 +35,100 @@ tasks: count: 1 labels: node-id: n1 - network.topology.kubernetes.io/block: sw11 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw11 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n2 - network.topology.kubernetes.io/block: sw11 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw11 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n3 - network.topology.kubernetes.io/block: sw12 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw12 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n4 - network.topology.kubernetes.io/block: sw12 - network.topology.kubernetes.io/spine: sw21 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw12 + network.topology.nvidia.com/spine: sw21 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n5 - network.topology.kubernetes.io/block: sw13 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw13 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 net-optimal: true nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n6 - network.topology.kubernetes.io/block: sw13 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw13 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n7 - network.topology.kubernetes.io/block: sw14 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw14 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 net-optimal: true nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n8 - network.topology.kubernetes.io/block: sw14 - network.topology.kubernetes.io/spine: sw22 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw14 + network.topology.nvidia.com/spine: sw22 + network.topology.nvidia.com/datacenter: sw31 net-optimal: true nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n9 - network.topology.kubernetes.io/block: sw15 - network.topology.kubernetes.io/spine: sw23 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw15 + network.topology.nvidia.com/spine: sw23 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n10 - network.topology.kubernetes.io/block: sw15 - network.topology.kubernetes.io/spine: sw23 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw15 + network.topology.nvidia.com/spine: sw23 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n11 - network.topology.kubernetes.io/block: sw16 - network.topology.kubernetes.io/spine: sw23 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw16 + network.topology.nvidia.com/spine: sw23 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n12 - network.topology.kubernetes.io/block: sw16 - network.topology.kubernetes.io/spine: sw23 - network.topology.kubernetes.io/datacenter: sw31 + network.topology.nvidia.com/block: sw16 + network.topology.nvidia.com/spine: sw23 + network.topology.nvidia.com/datacenter: sw31 nvidia.com/gpu.count: "8" timeout: 5m - id: update