Skip to content

Commit 320ecda

Browse files
authored
Merge pull request #61 from intel/release-26q1
Release 26q1
2 parents db58ceb + fa753a2 commit 320ecda

149 files changed

Lines changed: 8102 additions & 2694 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/scripts/coverage_check.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ set -euo pipefail
33

44
RESULT=$(make "$1" | awk '/total:/ {print ($3+0)}')
55

6-
if (( $(echo "$RESULT > $2" | bc -l) )); then
7-
echo "$1 $RESULT% is above threshold $2%"
6+
if (( $(echo "$RESULT >= $2" | bc -l) )); then
7+
echo "$1 $RESULT% is above or equal to the threshold $2%"
88
exit 0
99
else
1010
echo "$1 $RESULT% is below threshold $2%. Add more tests!"

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/bin/
22
/vendor/
3+
/cmd/kubelet-gaudi-plugin/vendor
34

45
# macOS
56
.DS_Store

DEV.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,35 @@ cp controller-tools/controller-gen code-generator/client-gen $HOME/go/bin
6161
# ensure it's in the path. You may want to add export to $HOME/.bashrc
6262
echo $PATH | grep -q $HOME/go/bin || export PATH=$HOME/go/bin:$PATH
6363
```
64+
# Running tests
65+
66+
Since Q1 '26 Gaudi DRA driver uses `gohlml` to retrieve health-related information.
67+
There is a hardcoded path to the HLML shared library, with `hack/fake_libhlml` implementing
68+
a stub / mock version of the `gohlml` project `hlml.h` API and flow control support.
69+
70+
When health-related tests call `gohlml` - it should in turn call fake `libhlml`, instead of the real
71+
one, on the nodes where there is no real Gaudi HW and SW installed (e.g. CI). This means, if the
72+
tests are run on your development machine - you should either deploy fresh fake `libhlml.so`, or
73+
run tests in a `gaudi-dra-driver-test-image` container like CI does.
74+
75+
Deploying fake hlml instead of real `libhlml` should allow running tests in VSCode and other IDEs,
76+
after `ldconfig` is [configured properly](hack/fake_libhlml/README.md).
77+
78+
## Deploying
79+
```shell
80+
$ cd hack/fake_libhlml
81+
$ make clean
82+
rm -f fake_libhlml.o fake_libhlml.so
83+
$ make
84+
gcc -O -Wall -Wextra -Wno-unused-parameter -fPIC -c fake_libhlml.c -o fake_libhlml.o
85+
gcc -shared -o fake_libhlml.so fake_libhlml.o
86+
$ sudo cp ./fake_libhlml.so /usr/lib/habanalabs/libhlml.so
87+
$ cat << EOF | sudo tee /etc/ld.so.conf.d/habanalabs.conf
88+
/usr/lib/habanalabs/
89+
EOF
90+
91+
$ sudo ldconfig
92+
```
6493

6594
## Running tests in container
6695

Dockerfile.device-faker

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
1-
FROM golang:1.24.2@sha256:b51b7beeabe2e2d8438ba4295c59d584049873a480ba0e7b56d80db74b3e3a3a AS build
1+
# Copyright (c) 2025, Intel Corporation. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
215
ARG LOCAL_LICENSES
16+
17+
FROM golang:1.25.8@sha256:dfae680962532eeea67ab297f1166c2c4e686edb9a8f05f9d02d96fc9191833e AS build
318
WORKDIR /build
419
COPY . .
520

@@ -11,20 +26,15 @@ fi && \
1126
cp -r licenses /install_root/ && \
1227
cp bin/device-faker /install_root/
1328

14-
15-
FROM alpine AS template
16-
COPY --from=build /install_root/device-faker /device-faker
17-
18-
1929
RUN mkdir -p /opt/templates && \
20-
/device-faker gpu -n && \
30+
bin/device-faker gpu -n && \
2131
mv /tmp/gpu-template-*.json /opt/templates/gpu-template.json && \
22-
/device-faker gaudi -n && \
32+
bin/device-faker gaudi -n && \
2333
mv /tmp/gaudi-template-*.json /opt/templates/gaudi-template.json && \
2434
chmod 644 /opt/templates/*.json
2535

2636
FROM scratch
2737
LABEL description="Intel Device Faker"
2838
COPY --from=build /install_root/device-faker /device-faker
29-
COPY --from=template /opt/templates /opt/templates
39+
COPY --from=build /opt/templates /opt/templates
3040
ENTRYPOINT ["/device-faker"]

Dockerfile.gaudi

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,79 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
FROM golang:1.24.2@sha256:b51b7beeabe2e2d8438ba4295c59d584049873a480ba0e7b56d80db74b3e3a3a AS build
15+
# Get libc and sources from Ubuntu24, build libhlml, repackage hlml sources with shared lib build modifications
16+
FROM ubuntu:24.04@sha256:80dd3c3b9c6cecb9f1667e9290b3bc61b78c2678c02cbdae5f0fea92cc6734ab AS ubuntu
17+
RUN \
18+
cat /etc/apt/sources.list.d/ubuntu.sources && \
19+
sed -i 's/^Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources && \
20+
apt-get update && \
21+
apt-get install -y dpkg-dev cmake build-essential wget bison flex libyaml-dev pkg-config libudev-dev zlib1g-dev && \
22+
mkdir /tmp/src && \
23+
cd /tmp/src && \
24+
apt-get source libc6 coreutils dash && \
25+
wget https://vault.habana.ai/artifactory/misc/fw-tools-sources/1.23.0/hl-smi-SA.tgz && \
26+
mv hl-smi-SA.tgz hl-smi-SA.tar.gz && mkdir hl-smi-SA && cd hl-smi-SA && \
27+
tar -xf ../hl-smi-SA.tar.gz && \
28+
make 3rd-party && OPT_C='-O2 -DNDEBUG -fPIC -D_FORTIFY_SOURCE=2 -fstack-protector-strong' && \
29+
OPT_CXX='-O2 -DNDEBUG -fPIC -D_FORTIFY_SOURCE=2 -fstack-protector-strong' && \
30+
HARDEN_LD='-Wl,-O1,--as-needed,-z,relro,-z,now' && \
31+
make -C 3rd-parties/extract/pciutils-3.13.0 clean && \
32+
make -C 3rd-parties/extract/pciutils-3.13.0 SHARED=no HWDB=no CFLAGS="$OPT_C" && \
33+
make -C 3rd-parties/extract/lm-sensors-3-6-0 clean && \
34+
make -C 3rd-parties/extract/lm-sensors-3-6-0 CFLAGS="$OPT_C" BUILD_STATIC_LIB=1 && \
35+
cp -f 3rd-parties/extract/pciutils-3.13.0/lib/libpci.a 3rd-parties/extract/prod/libpci.a && \
36+
cp -f 3rd-parties/extract/lm-sensors-3-6-0/lib/libsensors.a 3rd-parties/extract/prod/libsensors.a && \
37+
cmake -S . -B build-shared -DCMAKE_BUILD_TYPE=Release -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_C_FLAGS_RELEASE="$OPT_C" -DCMAKE_CXX_FLAGS_RELEASE="$OPT_CXX" && \
38+
cmake --build build-shared --target hlml-raw && \
39+
gcc -shared -O2 $HARDEN_LD -Wl,--whole-archive \
40+
build-shared/make/hlml/libhlml-raw.a \
41+
3rd-parties/extract/prod/libsensors.a \
42+
3rd-parties/extract/prod/libeeprom-util.a \
43+
3rd-parties/extract/prod/libhl-thunk-static.a \
44+
3rd-parties/extract/prod/libhl-thunk-err_injection-static.a \
45+
3rd-parties/extract/prod/libpci.a \
46+
-Wl,--no-whole-archive -ldl -lm -lz -o libhlml.so
47+
48+
FROM golang:1.25.8@sha256:dfae680962532eeea67ab297f1166c2c4e686edb9a8f05f9d02d96fc9191833e AS build
49+
1650
ARG LOCAL_LICENSES
1751
WORKDIR /build
1852
COPY . .
1953

54+
# install libhlml.so from deb for licenses
55+
RUN \
56+
curl -fsSL https://vault.habana.ai/artifactory/api/gpg/key/public | gpg --dearmor | tee /etc/apt/trusted.gpg.d/habanalabs.gpg > /dev/null && \
57+
wget -q -O /etc/apt/sources.list.d/habanalabs_synapseai.list "https://vault.habana.ai/artifactory/gaudi-installer/repos/1.23.0/ubuntu24.04/habanalabs_synapseai.list" > /dev/null && \
58+
sed -i "s/^deb/deb [trusted=yes]/" /etc/apt/sources.list.d/habanalabs_synapseai.list && \
59+
apt-get update && \
60+
apt-get download habanalabs-firmware-tools && \
61+
dpkg --force-all -i *.deb
62+
63+
# Build Gaudi driver binary, copy original licenses and sources, and put the GPL license to the root of the container image
2064
RUN make gaudi && \
2165
mkdir -p /install_root && \
2266
if [ -z "$LOCAL_LICENSES" ]; then \
23-
make licenses; \
67+
make gaudi-licenses; \
2468
fi && \
2569
cp -r licenses /install_root/ && \
70+
mkdir /install_root/licenses/habanalabs && \
71+
cp /usr/share/doc/habanalabs-firmware-tools/* /install_root/licenses/habanalabs/ && \
72+
cp cmd/kubelet-gaudi-plugin/LICENSE /install_root/LICENSE && \
2673
cp bin/kubelet-gaudi-plugin /install_root/
2774

2875
FROM scratch
2976
LABEL description="Intel Gaudi resource driver for Kubernetes"
3077

31-
COPY --from=build /install_root/ /
32-
CMD ["/kubelet-gaudi-plugin"]
78+
COPY --from=build /install_root /
79+
COPY --from=ubuntu /tmp/src/hl-smi-SA/libhlml.so /usr/lib/habanalabs/libhlml.so
80+
COPY --from=ubuntu /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/libc.so.6
81+
COPY --from=ubuntu /lib64/ld-linux-x86-64.so.2 /lib64/ld-linux-x86-64.so.2
82+
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6
83+
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/libdl.so.2
84+
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libz.so.1 /usr/lib/x86_64-linux-gnu/libz.so.1
85+
COPY --from=ubuntu /bin/cat /bin/cat
86+
COPY --from=ubuntu /bin/sh /bin/sh
87+
COPY --from=ubuntu /tmp/src/*tar.* /src/
88+
89+
ENV LD_LIBRARY_PATH=/usr/lib/habanalabs:/lib/x86_64-linux-gnu:/lib64:/usr/lib/x86_64-linux-gnu
90+
ENV PATH=/bin

Dockerfile.gaudi-test

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,36 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
FROM golang:1.24.2@sha256:b51b7beeabe2e2d8438ba4295c59d584049873a480ba0e7b56d80db74b3e3a3a AS go
14+
15+
# 1. Build fake libhlml for Gaudi tests
16+
FROM golang:1.25.8@sha256:dfae680962532eeea67ab297f1166c2c4e686edb9a8f05f9d02d96fc9191833e AS build
17+
WORKDIR /build
18+
COPY . .
19+
20+
RUN \
21+
wget -c https://vault.habana.ai/artifactory/debian/noble/pool/main/h/habanalabs-firmware-tools/habanalabs-firmware-tools_1.23.0-695_amd64.deb && \
22+
apt-get update && \
23+
apt-get install -y --ignore-missing ./habanalabs-firmware-tools_1.23.0-695_amd64.deb && \
24+
cd hack/fake_libhlml && \
25+
make clean && make
1526

1627
FROM ubuntu:24.04@sha256:80dd3c3b9c6cecb9f1667e9290b3bc61b78c2678c02cbdae5f0fea92cc6734ab AS ubuntu
1728
ARG UID=1000
1829
ARG GID=1000
19-
COPY --from=go --chown=${UID}:${GID} /usr/local/go /home/ubuntu/go
30+
# add fake hlml shared library for Gaudi tests
31+
COPY --from=build /build/hack/fake_libhlml/fake_libhlml.so /usr/lib/habanalabs/libhlml.so
32+
COPY --from=build /build/cmd/kubelet-gaudi-plugin/LICENSE /LICENSE
33+
COPY --from=build /usr/include/habanalabs/hlml.h /usr/include/habanalabs/hlml.h
34+
COPY --from=build --chown=${UID}:${GID} /usr/local/go /home/ubuntu/go
2035

21-
# add xpu-smi shared library for GPU tests and other dependencies
2236
RUN \
2337
apt-get update && \
24-
apt-get install -y make gcc wget software-properties-common python3-launchpadlib git yamllint shellcheck bc && \
25-
add-apt-repository -y ppa:kobuk-team/intel-graphics && \
26-
apt-get update && \
27-
apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && \
28-
wget -qO /tmp/xpu-smi.deb https://github.com/intel/xpumanager/releases/download/V1.3.1/xpu-smi_1.3.1_20250724.061629.60921e5e_u24.04_amd64.deb && \
29-
apt-get install -y /tmp/xpu-smi.deb && \
30-
rm /tmp/xpu-smi.deb && \
31-
echo 'export PATH=/home/ubuntu/go/bin:$PATH' >> /home/ubuntu/.bashrc && \
32-
wget -q https://github.com/golangci/golangci-lint/releases/download/v2.7.2/golangci-lint-2.7.2-linux-amd64.tar.gz && \
33-
tar zxf golangci-lint-2.7.2-linux-amd64.tar.gz --strip-components=1 golangci-lint-2.7.2-linux-amd64/golangci-lint && \
38+
apt-get install -y make gcc wget git yamllint shellcheck bc && \
39+
wget -q https://github.com/golangci/golangci-lint/releases/download/v2.11.4/golangci-lint-2.11.4-linux-amd64.tar.gz && \
40+
tar zxf golangci-lint-2.11.4-linux-amd64.tar.gz --strip-components=1 golangci-lint-2.11.4-linux-amd64/golangci-lint && \
3441
mv golangci-lint /usr/local/bin/golangci-lint && \
35-
rm golangci-lint-2.7.2-linux-amd64.tar.gz && \
36-
unset http_proxy https_proxy no_proxy
42+
rm golangci-lint-2.11.4-linux-amd64.tar.gz && \
43+
echo "export PATH=/home/ubuntu/go/bin:$PATH" >> /home/ubuntu/.bashrc
3744

3845
RUN \
3946
mkdir /github && \

Dockerfile.gpu

Lines changed: 1 addition & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,10 @@
1414

1515
ARG LOCAL_LICENSES
1616

17-
FROM golang:1.24.2@sha256:b51b7beeabe2e2d8438ba4295c59d584049873a480ba0e7b56d80db74b3e3a3a AS build
17+
FROM golang:1.25.8@sha256:dfae680962532eeea67ab297f1166c2c4e686edb9a8f05f9d02d96fc9191833e AS build
1818
WORKDIR /build
1919
COPY . .
2020

21-
# Add xpu-smi shared library
22-
RUN \
23-
wget -qO /tmp/xpu-smi.deb https://github.com/intel/xpumanager/releases/download/V1.3.1/xpu-smi_1.3.1_20250724.061629.60921e5e_u24.04_amd64.deb && \
24-
dpkg -i --ignore-depends=level-zero,intel-gsc,libze-intel-gpu1 /tmp/xpu-smi.deb
25-
2621
# Build GPU DRA driver
2722
RUN make gpu && \
2823
mkdir -p /install_root && \
@@ -32,60 +27,9 @@ fi && \
3227
cp -r licenses /install_root/ && \
3328
cp bin/kubelet-gpu-plugin /install_root/
3429

35-
# Prepare dependencies
36-
FROM ubuntu:24.04@sha256:80dd3c3b9c6cecb9f1667e9290b3bc61b78c2678c02cbdae5f0fea92cc6734ab AS ubuntu
37-
COPY --from=build /tmp/xpu-smi.deb /tmp/xpu-smi.deb
38-
39-
RUN \
40-
sed -i 's/^Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources && \
41-
apt-get update && \
42-
apt-get install -y software-properties-common python3-launchpadlib
43-
44-
RUN \
45-
add-apt-repository -s -y ppa:kobuk-team/intel-graphics && \
46-
apt-get update && \
47-
apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-gsc libmetee5 && \
48-
apt-get install -y /tmp/xpu-smi.deb
49-
50-
RUN \
51-
mkdir /tmp/src && \
52-
cd /tmp/src && \
53-
apt-get source --download-only dash glibc libcap2 libudev1 libstdc++6 libmetee5 && \
54-
mkdir /licenses && \
55-
for pkg in dash libc6 intel-gsc libcap2 libudev1 libze1 libigdgmm12 libstdc++6 libze-intel-gpu1 libmetee5; do \
56-
mkdir -p /licenses/$pkg; \
57-
cp /usr/share/doc/$pkg/copyright /licenses/$pkg/; \
58-
done && \
59-
if grep -q /common-licenses/ /licenses/*/copyright; then \
60-
cp -r /usr/share/common-licenses/ /licenses/; \
61-
fi
62-
6330
FROM scratch
6431
WORKDIR /
6532
LABEL description="Intel GPU resource driver for Kubernetes"
6633

67-
# /bin/sh is used by xpu-smi library.
6834
COPY --from=build /install_root /
69-
COPY --from=build /usr/lib/x86_64-linux-gnu/libxpum.so.1 /usr/lib/x86_64-linux-gnu/libxpum.so.1
70-
COPY --from=ubuntu /lib/x86_64-linux-gnu/libc.so.6 /lib/x86_64-linux-gnu/libc.so.6
71-
COPY --from=ubuntu /lib64/ld-linux-x86-64.so.2 /lib64/ld-linux-x86-64.so.2
72-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6
73-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libdl.so.2 /usr/lib/x86_64-linux-gnu/libdl.so.2
74-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libz.so.1 /usr/lib/x86_64-linux-gnu/libz.so.1
75-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libze_loader.so.1 /usr/lib/x86_64-linux-gnu/libze_loader.so.1
76-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libigsc.so.0 /usr/lib/x86_64-linux-gnu/libigsc.so.0
77-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libudev.so.1 /usr/lib/x86_64-linux-gnu/libudev.so.1
78-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libcap.so.2 /usr/lib/x86_64-linux-gnu/libcap.so.2
79-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libstdc++.so.6
80-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1
81-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libze_intel_gpu.so.1 /usr/lib/x86_64-linux-gnu/libze_intel_gpu.so.1
82-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libmetee.so.5.0.0 /usr/lib/x86_64-linux-gnu/libmetee.so.5.0.0
83-
84-
COPY --from=ubuntu /lib/x86_64-linux-gnu/libpciaccess.so.0 /lib/x86_64-linux-gnu/libpciaccess.so.0
85-
COPY --from=ubuntu /lib/x86_64-linux-gnu/libigdgmm.so.12 /lib/x86_64-linux-gnu/libigdgmm.so.12
86-
COPY --from=ubuntu /usr/lib/x86_64-linux-gnu/libigc.so.2 /usr/lib/x86_64-linux-gnu/libigc.so.2
87-
88-
COPY --from=ubuntu /bin/sh /bin/sh
89-
COPY --from=ubuntu /licenses /licenses
90-
COPY --from=ubuntu /tmp/src /src
9135
CMD ["/kubelet-gpu-plugin"]

Dockerfile.qat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
FROM golang:1.24.2@sha256:b51b7beeabe2e2d8438ba4295c59d584049873a480ba0e7b56d80db74b3e3a3a AS build
15+
FROM golang:1.25.8@sha256:dfae680962532eeea67ab297f1166c2c4e686edb9a8f05f9d02d96fc9191833e AS build
1616
ARG LOCAL_LICENSES
1717
WORKDIR /build
1818
COPY . .

0 commit comments

Comments
 (0)