Skip to content

Commit 6d499c6

Browse files
glowkeynvvfedorov
andauthored
DCGM-Exporter release version 3.3.7-3.5.0 (#366)
Co-authored-by: Vadym Fedorov <[email protected]>
1 parent b4552f0 commit 6d499c6

17 files changed

+228
-98
lines changed

.gitignore

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ tests.cov
1111
test_results.json
1212
.scannerwork
1313
dist/
14-
14+
.run/
1515
###############################################################################
1616
# JetBrains
1717
# https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
@@ -236,4 +236,4 @@ $RECYCLE.BIN/
236236
*.msp
237237

238238
# Windows shortcuts
239-
*.lnk
239+
*.lnk

Makefile

+3-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ MKDIR ?= mkdir
2020
GOLANGCILINT_TIMEOUT ?= 10m
2121

2222
DCGM_VERSION := $(NEW_DCGM_VERSION)
23-
GOLANG_VERSION := 1.21.5
23+
GOLANG_VERSION := 1.22.5
2424
VERSION := $(NEW_EXPORTER_VERSION)
2525
FULL_VERSION := $(DCGM_VERSION)-$(VERSION)
2626
OUTPUT := type=oci,dest=/dev/null
@@ -78,7 +78,8 @@ test-integration:
7878
go test -race -count=1 -timeout 5m -v $(TEST_ARGS) ./tests/integration/
7979

8080
test-coverage:
81-
gocov test ./... | gocov report
81+
sh scripts/test_coverage.sh
82+
gocov convert tests.cov | gocov report
8283

8384
.PHONY: lint
8485
lint:

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Official documentation for DCGM-Exporter can be found on [docs.nvidia.com](https
1111
To gather metrics on a GPU node, simply start the `dcgm-exporter` container:
1212

1313
```shell
14-
docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.6-3.4.2-ubuntu22.04
14+
docker run -d --gpus all --rm -p 9400:9400 nvcr.io/nvidia/k8s/dcgm-exporter:3.3.7-3.5.0-ubuntu22.04
1515
curl localhost:9400/metrics
1616
# HELP DCGM_FI_DEV_SM_CLOCK SM clock frequency (in MHz).
1717
# TYPE DCGM_FI_DEV_SM_CLOCK gauge

dcgm-exporter.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,23 @@ metadata:
1818
name: "dcgm-exporter"
1919
labels:
2020
app.kubernetes.io/name: "dcgm-exporter"
21-
app.kubernetes.io/version: "3.4.2"
21+
app.kubernetes.io/version: "3.5.0"
2222
spec:
2323
updateStrategy:
2424
type: RollingUpdate
2525
selector:
2626
matchLabels:
2727
app.kubernetes.io/name: "dcgm-exporter"
28-
app.kubernetes.io/version: "3.4.2"
28+
app.kubernetes.io/version: "3.5.0"
2929
template:
3030
metadata:
3131
labels:
3232
app.kubernetes.io/name: "dcgm-exporter"
33-
app.kubernetes.io/version: "3.4.2"
33+
app.kubernetes.io/version: "3.5.0"
3434
name: "dcgm-exporter"
3535
spec:
3636
containers:
37-
- image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.6-3.4.2-ubuntu22.04"
37+
- image: "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.7-3.5.0-ubuntu22.04"
3838
env:
3939
- name: "DCGM_EXPORTER_LISTEN"
4040
value: ":9400"
@@ -64,11 +64,11 @@ metadata:
6464
name: "dcgm-exporter"
6565
labels:
6666
app.kubernetes.io/name: "dcgm-exporter"
67-
app.kubernetes.io/version: "3.4.2"
67+
app.kubernetes.io/version: "3.5.0"
6868
spec:
6969
selector:
7070
app.kubernetes.io/name: "dcgm-exporter"
71-
app.kubernetes.io/version: "3.4.2"
71+
app.kubernetes.io/version: "3.5.0"
7272
ports:
7373
- name: "metrics"
7474
port: 9400

deployment/Chart.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
apiVersion: v2
22
name: dcgm-exporter
33
description: A Helm chart for DCGM exporter
4-
version: "3.4.2"
4+
version: "3.5.0"
55
kubeVersion: ">= 1.19.0-0"
6-
appVersion: "3.4.2"
6+
appVersion: "3.5.0"
77
sources:
88
- https://github.com/nvidia/dcgm-exporter
99
home: https://github.com/nvidia/dcgm-exporter/

deployment/values.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ image:
1717
pullPolicy: IfNotPresent
1818
# Image tag defaults to AppVersion, but you can use the tag key
1919
# for the image tag, e.g:
20-
tag: 3.3.6-3.4.2-ubuntu22.04
20+
tag: 3.3.7-3.5.0-ubuntu22.04
2121

2222
# Change the following reference to "/etc/dcgm-exporter/default-counters.csv"
2323
# to stop profiling metrics from DCGM

docker/Dockerfile.ubi9

+14-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
FROM nvcr.io/nvidia/cuda:12.4.1-base-ubi9 AS builder
2-
ARG GOLANG_VERSION
1+
FROM nvcr.io/nvidia/cuda:12.5.1-base-ubi9 AS builder
2+
ARG GOLANG_VERSION=1.22.4
33
WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter
44
RUN set -eux; \
55
dnf clean expire-cache; \
@@ -40,7 +40,7 @@ COPY . .
4040

4141
RUN make binary check-format
4242

43-
FROM nvcr.io/nvidia/cuda:12.4.1-base-ubi9
43+
FROM nvcr.io/nvidia/cuda:12.5.1-base-ubi9
4444
ARG DCGM_VERSION
4545
ARG VERSION
4646
ARG DIST_DIR
@@ -53,9 +53,17 @@ LABEL release="N/A"
5353
LABEL summary="Exports GPU Metrics to Prometheus"
5454
LABEL description="See summary"
5555

56-
RUN dnf update --disablerepo=* --enablerepo=ubi-9-appstream-rpms --enablerepo=ubi-9-baseos-rpms -y && rm -rf /var/cache/yum \
57-
&& dnf clean expire-cache && dnf install -y datacenter-gpu-manager-${DCGM_VERSION} libcap \
58-
&& rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite
56+
RUN dnf update --disablerepo=* --enablerepo=ubi-9-appstream-rpms --enablerepo=ubi-9-baseos-rpms -y \
57+
&& dnf install --nodocs --setopt=install_weak_deps=False -y datacenter-gpu-manager-${DCGM_VERSION} libcap \
58+
&& dnf -y clean all\
59+
&& rm -rf /var/cache/yum\
60+
&& rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite \
61+
# DCGM exporter doesn't use libdcgm_cublas_proxy*.so.
62+
&& rm -rf /usr/lib64/libdcgm_cublas_proxy*.so \
63+
&& rm -rf /usr/local/dcgm/scripts \
64+
&& rm -f /usr/include/*.h /usr/bin/DcgmProfTesterKernels.ptx /usr/bin/dcgmproftester* \
65+
&& rm -rf /var/lib/rpm/rpmdb.sqlite /var/cache/* /var/lib/dnf/history.* /var/log/* /tmp/* /var/tmp/* \
66+
&& rm -rf /usr/share/doc && rm -rf /usr/share/man
5967

6068
COPY ./LICENSE ./licenses/LICENSE
6169
COPY --from=builder /go/src/github.com/NVIDIA/dcgm-exporter/cmd/dcgm-exporter/dcgm-exporter /usr/bin/

docker/Dockerfile.ubuntu22.04

+12-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
FROM nvcr.io/nvidia/cuda:12.4.1-base-ubuntu22.04 AS builder
2-
ARG GOLANG_VERSION=1.21.5
1+
FROM nvcr.io/nvidia/cuda:12.5.1-base-ubuntu22.04 AS builder
2+
ARG GOLANG_VERSION=1.22.4
33
WORKDIR /go/src/github.com/NVIDIA/dcgm-exporter
44
RUN set -eux; \
55
apt-get update; \
@@ -45,7 +45,7 @@ COPY . .
4545

4646
RUN make binary check-format
4747

48-
FROM nvcr.io/nvidia/cuda:12.4.1-base-ubuntu22.04
48+
FROM nvcr.io/nvidia/cuda:12.5.1-base-ubuntu22.04
4949

5050
ARG VERSION
5151
ARG DCGM_VERSION
@@ -65,9 +65,16 @@ COPY etc /etc/dcgm-exporter
6565

6666
RUN apt-get update && apt-get install -y --no-install-recommends \
6767
datacenter-gpu-manager=1:${DCGM_VERSION} libcap2-bin && apt-get purge --autoremove -y openssl \
68+
&& apt-get -y clean \
69+
&& apt-get -y autoclean \
6870
&& apt-get autoremove -y \
69-
&& rm -rf /var/lib/apt/lists/* \
70-
&& rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite
71+
&& rm -rfd /usr/local/dcgm/bindings /usr/local/dcgm/sdk_samples /usr/share/nvidia-validation-suite \
72+
# DCGM exporter doesn't use libdcgm_cublas_proxy*.so.
73+
&& rm -rf /usr/lib/x86_64-linux-gnu/libdcgm_cublas_proxy*.so \
74+
&& rm -rf /usr/local/dcgm/scripts \
75+
&& rm -f /usr/include/*.h /usr/bin/DcgmProfTesterKernels.ptx /usr/bin/dcgmproftester* \
76+
&& rm -rf /var/cache/debconf/* /var/lib/apt/lists/* /var/log/* /tmp/* /var/tmp/* \
77+
&& rm -rf /usr/share/doc && rm -rf /usr/share/man
7178
# Required for DCP metrics
7279
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility,compat32
7380
# disable all constraints on the configurations required by NVIDIA container toolkit

go.mod

+37-36
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
module github.com/NVIDIA/dcgm-exporter
22

3-
go 1.21
3+
go 1.22.0
4+
5+
toolchain go1.22.4
46

57
require (
68
github.com/NVIDIA/go-dcgm v0.0.0-20240118201113-3385e277e49f
79
github.com/NVIDIA/go-nvml v0.12.0-2
810
github.com/avast/retry-go/v4 v4.5.1
911
github.com/bits-and-blooms/bitset v1.13.0
1012
github.com/go-kit/log v0.2.1
11-
github.com/google/uuid v1.5.0
13+
github.com/google/uuid v1.6.0
1214
github.com/gorilla/mux v1.8.1
13-
github.com/mittwald/go-helm-client v0.12.8
15+
github.com/mittwald/go-helm-client v0.12.9
1416
github.com/onsi/ginkgo/v2 v2.15.0
1517
github.com/onsi/gomega v1.32.0
1618
github.com/prometheus/client_model v0.6.0
@@ -20,13 +22,14 @@ require (
2022
github.com/stretchr/testify v1.8.4
2123
github.com/urfave/cli/v2 v2.27.1
2224
go.uber.org/automaxprocs v1.5.3
23-
golang.org/x/sync v0.5.0
24-
google.golang.org/grpc v1.61.1
25-
k8s.io/api v0.29.2
26-
k8s.io/apimachinery v0.29.2
27-
k8s.io/client-go v0.29.2
28-
k8s.io/kubelet v0.29.2
29-
k8s.io/utils v0.0.0-20240102154912-e7106e64919e
25+
go.uber.org/mock v0.4.0
26+
golang.org/x/sync v0.7.0
27+
google.golang.org/grpc v1.64.0
28+
k8s.io/api v0.30.2
29+
k8s.io/apimachinery v0.30.2
30+
k8s.io/client-go v0.30.2
31+
k8s.io/kubelet v0.30.2
32+
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0
3033
)
3134

3235
require (
@@ -44,20 +47,19 @@ require (
4447
github.com/beorn7/perks v1.0.1 // indirect
4548
github.com/cespare/xxhash/v2 v2.2.0 // indirect
4649
github.com/chai2010/gettext-go v1.0.2 // indirect
47-
github.com/containerd/containerd v1.7.11 // indirect
50+
github.com/containerd/containerd v1.7.12 // indirect
4851
github.com/containerd/log v0.1.0 // indirect
4952
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
5053
github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect
5154
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
5255
github.com/davecgh/go-spew v1.1.1 // indirect
5356
github.com/distribution/reference v0.5.0 // indirect
54-
github.com/docker/cli v24.0.7+incompatible // indirect
57+
github.com/docker/cli v26.1.4+incompatible // indirect
5558
github.com/docker/distribution v2.8.3+incompatible // indirect
56-
github.com/docker/docker v24.0.7+incompatible // indirect
59+
github.com/docker/docker v26.1.4+incompatible // indirect
5760
github.com/docker/docker-credential-helpers v0.8.0 // indirect
58-
github.com/docker/go-connections v0.4.0 // indirect
61+
github.com/docker/go-connections v0.5.0 // indirect
5962
github.com/docker/go-metrics v0.0.1 // indirect
60-
github.com/docker/go-units v0.5.0 // indirect
6163
github.com/emicklei/go-restful/v3 v3.11.1 // indirect
6264
github.com/evanphx/json-patch v5.7.0+incompatible // indirect
6365
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
@@ -74,7 +76,7 @@ require (
7476
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
7577
github.com/gobwas/glob v0.2.3 // indirect
7678
github.com/gogo/protobuf v1.3.2 // indirect
77-
github.com/golang/protobuf v1.5.3 // indirect
79+
github.com/golang/protobuf v1.5.4 // indirect
7880
github.com/google/btree v1.1.2 // indirect
7981
github.com/google/gnostic-models v0.6.8 // indirect
8082
github.com/google/go-cmp v0.6.0 // indirect
@@ -111,12 +113,11 @@ require (
111113
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
112114
github.com/modern-go/reflect2 v1.0.2 // indirect
113115
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
114-
github.com/morikuni/aec v1.0.0 // indirect
115116
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
116117
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
117118
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
118119
github.com/opencontainers/go-digest v1.0.0 // indirect
119-
github.com/opencontainers/image-spec v1.1.0-rc5 // indirect
120+
github.com/opencontainers/image-spec v1.1.0-rc6 // indirect
120121
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
121122
github.com/pkg/errors v0.9.1 // indirect
122123
github.com/pmezard/go-difflib v1.0.0 // indirect
@@ -140,32 +141,32 @@ require (
140141
go.opentelemetry.io/otel/metric v1.21.0 // indirect
141142
go.opentelemetry.io/otel/trace v1.21.0 // indirect
142143
go.starlark.net v0.0.0-20231121155337-90ade8b19d09 // indirect
143-
go.uber.org/mock v0.4.0 // indirect
144-
golang.org/x/crypto v0.18.0 // indirect
144+
golang.org/x/crypto v0.24.0 // indirect
145145
golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc // indirect
146-
golang.org/x/net v0.20.0 // indirect
147-
golang.org/x/oauth2 v0.16.0 // indirect
148-
golang.org/x/sys v0.16.0 // indirect
149-
golang.org/x/term v0.16.0 // indirect
150-
golang.org/x/text v0.14.0 // indirect
146+
golang.org/x/mod v0.17.0 // indirect
147+
golang.org/x/net v0.26.0 // indirect
148+
golang.org/x/oauth2 v0.18.0 // indirect
149+
golang.org/x/sys v0.21.0 // indirect
150+
golang.org/x/term v0.21.0 // indirect
151+
golang.org/x/text v0.16.0 // indirect
151152
golang.org/x/time v0.5.0 // indirect
152-
golang.org/x/tools v0.16.1 // indirect
153+
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
153154
google.golang.org/appengine v1.6.8 // indirect
154-
google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect
155+
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect
155156
google.golang.org/protobuf v1.33.0 // indirect
156157
gopkg.in/evanphx/json-patch.v5 v5.7.0 // indirect
157158
gopkg.in/inf.v0 v0.9.1 // indirect
158159
gopkg.in/yaml.v2 v2.4.0 // indirect
159160
gopkg.in/yaml.v3 v3.0.1 // indirect
160-
helm.sh/helm/v3 v3.14.2 // indirect
161-
k8s.io/apiextensions-apiserver v0.29.0 // indirect
162-
k8s.io/apiserver v0.29.2 // indirect
163-
k8s.io/cli-runtime v0.29.0 // indirect
164-
k8s.io/component-base v0.29.2 // indirect
165-
k8s.io/klog/v2 v2.110.1 // indirect
166-
k8s.io/kube-openapi v0.0.0-20240220201932-37d671a357a5 // indirect
167-
k8s.io/kubectl v0.29.0 // indirect
168-
oras.land/oras-go v1.2.4 // indirect
161+
helm.sh/helm/v3 v3.15.2 // indirect
162+
k8s.io/apiextensions-apiserver v0.30.0 // indirect
163+
k8s.io/apiserver v0.30.2 // indirect
164+
k8s.io/cli-runtime v0.30.0 // indirect
165+
k8s.io/component-base v0.30.2 // indirect
166+
k8s.io/klog/v2 v2.120.1 // indirect
167+
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
168+
k8s.io/kubectl v0.30.0 // indirect
169+
oras.land/oras-go v1.2.5 // indirect
169170
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
170171
sigs.k8s.io/kustomize/api v0.16.0 // indirect
171172
sigs.k8s.io/kustomize/kyaml v0.16.0 // indirect

0 commit comments

Comments
 (0)