Skip to content

Commit 1196bf5

Browse files
committed
keep AWS ROCE config commented out but enable mlnxnics
Signed-off-by: Ajay Mishra <ajmishra@nvidia.com>
1 parent 780804a commit 1196bf5

2 files changed

Lines changed: 52 additions & 4 deletions

File tree

distros/kubernetes/nvsentinel/charts/labeler/values.yaml

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,17 @@ expectedDeviceCounts:
6060
expectedCountOverrides: []
6161
currentExpression: |
6262
int(node.metadata.labels['nvidia.com/gpu.count'])
63-
# Example NVIDIA Network Operator / Mellanox NIC class using node allocatable.
63+
- name: nic
64+
enabled: true
65+
labels:
66+
current: nvsentinel.dgxc.nvidia.com/nic.count.current
67+
expected: nvsentinel.dgxc.nvidia.com/nic.count.expected
68+
groupingLabels:
69+
- node.kubernetes.io/instance-type
70+
expectedCountOverrides: []
71+
currentExpression: |
72+
int(node.status.allocatable['nvidia.com/mlnxnics'])
73+
# Example AWS RoCE DRA NIC class. Uncomment and adapt per environment.
6474
# - name: nic
6575
# enabled: false
6676
# labels:
@@ -70,7 +80,21 @@ expectedDeviceCounts:
7080
# - node.kubernetes.io/instance-type
7181
# expectedCountOverrides: []
7282
# currentExpression: |
73-
# int(node.status.allocatable['nvidia.com/mlnxnics'])
83+
# sum(resourceSlices
84+
# .filter(rs,
85+
# has(rs.spec.driver) &&
86+
# rs.spec.driver == 'dra.networking.k8s.aws' &&
87+
# has(rs.spec.devices)
88+
# )
89+
# .map(rs, rs.spec.devices
90+
# .filter(d,
91+
# has(d.attributes) &&
92+
# 'dra.vpc.amazonaws.com/deviceType' in d.attributes &&
93+
# has(d.attributes['dra.vpc.amazonaws.com/deviceType'].string) &&
94+
# d.attributes['dra.vpc.amazonaws.com/deviceType'].string == 'roce'
95+
# )
96+
# .size()
97+
# ))
7498

7599
resources:
76100
requests:

distros/kubernetes/nvsentinel/values-full.yaml

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -975,7 +975,17 @@ labeler:
975975
expectedCountOverrides: []
976976
currentExpression: |
977977
int(node.metadata.labels['nvidia.com/gpu.count'])
978-
# Example NVIDIA Network Operator / Mellanox NIC class using node allocatable.
978+
- name: nic
979+
enabled: true
980+
labels:
981+
current: nvsentinel.dgxc.nvidia.com/nic.count.current
982+
expected: nvsentinel.dgxc.nvidia.com/nic.count.expected
983+
groupingLabels:
984+
- node.kubernetes.io/instance-type
985+
expectedCountOverrides: []
986+
currentExpression: |
987+
int(node.status.allocatable['nvidia.com/mlnxnics'])
988+
# Example AWS RoCE DRA NIC class. Uncomment and adapt per environment.
979989
# - name: nic
980990
# enabled: false
981991
# labels:
@@ -985,7 +995,21 @@ labeler:
985995
# - node.kubernetes.io/instance-type
986996
# expectedCountOverrides: []
987997
# currentExpression: |
988-
# int(node.status.allocatable['nvidia.com/mlnxnics'])
998+
# sum(resourceSlices
999+
# .filter(rs,
1000+
# has(rs.spec.driver) &&
1001+
# rs.spec.driver == 'dra.networking.k8s.aws' &&
1002+
# has(rs.spec.devices)
1003+
# )
1004+
# .map(rs, rs.spec.devices
1005+
# .filter(d,
1006+
# has(d.attributes) &&
1007+
# 'dra.vpc.amazonaws.com/deviceType' in d.attributes &&
1008+
# has(d.attributes['dra.vpc.amazonaws.com/deviceType'].string) &&
1009+
# d.attributes['dra.vpc.amazonaws.com/deviceType'].string == 'roce'
1010+
# )
1011+
# .size()
1012+
# ))
9891013

9901014
# Pod resource limits and requests
9911015
resources:

0 commit comments

Comments
 (0)