File tree Expand file tree Collapse file tree
distros/kubernetes/nvsentinel Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -60,7 +60,17 @@ expectedDeviceCounts:
6060 expectedCountOverrides : []
6161 currentExpression : |
6262 int(node.metadata.labels['nvidia.com/gpu.count'])
63- # Example NVIDIA Network Operator / Mellanox NIC class using node allocatable.
63+ - name : nic
64+ enabled : true
65+ labels :
66+ current : nvsentinel.dgxc.nvidia.com/nic.count.current
67+ expected : nvsentinel.dgxc.nvidia.com/nic.count.expected
68+ groupingLabels :
69+ - node.kubernetes.io/instance-type
70+ expectedCountOverrides : []
71+ currentExpression : |
72+ int(node.status.allocatable['nvidia.com/mlnxnics'])
73+ # Example AWS RoCE DRA NIC class. Uncomment and adapt per environment.
6474 # - name: nic
6575 # enabled: false
6676 # labels:
@@ -70,7 +80,21 @@ expectedDeviceCounts:
7080 # - node.kubernetes.io/instance-type
7181 # expectedCountOverrides: []
7282 # currentExpression: |
73- # int(node.status.allocatable['nvidia.com/mlnxnics'])
83+ # sum(resourceSlices
84+ # .filter(rs,
85+ # has(rs.spec.driver) &&
86+ # rs.spec.driver == 'dra.networking.k8s.aws' &&
87+ # has(rs.spec.devices)
88+ # )
89+ # .map(rs, rs.spec.devices
90+ # .filter(d,
91+ # has(d.attributes) &&
92+ # 'dra.vpc.amazonaws.com/deviceType' in d.attributes &&
93+ # has(d.attributes['dra.vpc.amazonaws.com/deviceType'].string) &&
94+ # d.attributes['dra.vpc.amazonaws.com/deviceType'].string == 'roce'
95+ # )
96+ # .size()
97+ # ))
7498
7599resources :
76100 requests :
Original file line number Diff line number Diff line change @@ -975,7 +975,17 @@ labeler:
975975 expectedCountOverrides : []
976976 currentExpression : |
977977 int(node.metadata.labels['nvidia.com/gpu.count'])
978- # Example NVIDIA Network Operator / Mellanox NIC class using node allocatable.
978+ - name : nic
979+ enabled : true
980+ labels :
981+ current : nvsentinel.dgxc.nvidia.com/nic.count.current
982+ expected : nvsentinel.dgxc.nvidia.com/nic.count.expected
983+ groupingLabels :
984+ - node.kubernetes.io/instance-type
985+ expectedCountOverrides : []
986+ currentExpression : |
987+ int(node.status.allocatable['nvidia.com/mlnxnics'])
988+ # Example AWS RoCE DRA NIC class. Uncomment and adapt per environment.
979989 # - name: nic
980990 # enabled: false
981991 # labels:
@@ -985,7 +995,21 @@ labeler:
985995 # - node.kubernetes.io/instance-type
986996 # expectedCountOverrides: []
987997 # currentExpression: |
988- # int(node.status.allocatable['nvidia.com/mlnxnics'])
998+ # sum(resourceSlices
999+ # .filter(rs,
1000+ # has(rs.spec.driver) &&
1001+ # rs.spec.driver == 'dra.networking.k8s.aws' &&
1002+ # has(rs.spec.devices)
1003+ # )
1004+ # .map(rs, rs.spec.devices
1005+ # .filter(d,
1006+ # has(d.attributes) &&
1007+ # 'dra.vpc.amazonaws.com/deviceType' in d.attributes &&
1008+ # has(d.attributes['dra.vpc.amazonaws.com/deviceType'].string) &&
1009+ # d.attributes['dra.vpc.amazonaws.com/deviceType'].string == 'roce'
1010+ # )
1011+ # .size()
1012+ # ))
9891013
9901014 # Pod resource limits and requests
9911015 resources :
You can’t perform that action at this time.
0 commit comments