Skip to content
6 changes: 3 additions & 3 deletions sle15/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG SLES_VERSION
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi8 as license
FROM nvcr.io/nvidia/cuda:12.6.0-base-ubi9 as license

FROM registry.suse.com/bci/golang:1.17 as build
FROM registry.suse.com/bci/golang:1.23 as build

RUN zypper --non-interactive install -y git wget tar gzip

Expand All @@ -12,7 +12,7 @@ RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
go build -o vgpu-util && \
mv vgpu-util /work

FROM registry.suse.com/suse/sle15:$SLES_VERSION
FROM registry.suse.com/bci/bci-base:$SLES_VERSION

#ARG BASE_URL=http://us.download.nvidia.com/XFree86/Linux-x86_64
ARG BASE_URL=https://us.download.nvidia.com/tesla
Expand Down
108 changes: 96 additions & 12 deletions sle15/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@ NVIDIA_MODULE_PARAMS=()
NVIDIA_UVM_MODULE_PARAMS=()
NVIDIA_MODESET_MODULE_PARAMS=()

OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-true}
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "false" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel

_update_package_cache() {
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
echo "Updating the package cache..."
FLAVOR="$(echo ${KERNEL_VERSION} | cut -d- -f3)"
if [ "$FLAVOR" == "azure" ]; then
# consumed by container-suseconnect when calling `zypper refresh`
export ADDITIONAL_MODULES="sle-module-public-cloud"
fi
if ! zypper refresh; then
Expand Down Expand Up @@ -66,10 +70,13 @@ _install_prerequisites() (

echo "Installing Linux kernel source..."
local version_without_flavor=$(echo ${KERNEL_VERSION} | cut -d- -f-2)
export ZYPP_MODALIAS_SYSFS=$(mktemp /tmp/modalias-XXXX)
if ! zypper --non-interactive in -y --no-recommends --capability kernel-${FLAVOR} = ${version_without_flavor} kernel-${FLAVOR}-devel = ${version_without_flavor} ; then
echo "FATAL: failed to install kernel packages. Ensure SLES subscription is available."
rm -f ${ZYPP_MODALIAS_SYSFS}
exit 1
fi
rm -f ${ZYPP_MODALIAS_SYSFS}; unset ZYPP_MODALIAS_SYSFS

echo "Generating Linux kernel version string..."
extract-vmlinux /boot/vmlinuz-${KERNEL_VERSION} | strings | grep -E '^Linux version' | sed 's/^\(.*\)\s\+(.*)$/\1/' > version
Expand All @@ -96,8 +103,8 @@ _kernel_requires_package() {

echo "Checking NVIDIA driver packages..."

[[ ! -d /usr/src/nvidia-${DRIVER_VERSION}/kernel ]] && return 0
cd /usr/src/nvidia-${DRIVER_VERSION}/kernel
[[ ! -d /usr/src/nvidia-${DRIVER_VERSION}/${KERNEL_TYPE} ]] && return 0
cd /usr/src/nvidia-${DRIVER_VERSION}/${KERNEL_TYPE}

proc_mount_arg="--proc-mount-point /lib/modules/${KERNEL_VERSION}/proc"
for pkg_name in $(ls -d -1 precompiled/** 2> /dev/null); do
Expand All @@ -120,7 +127,7 @@ _create_driver_package() (
trap "make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/source clean > /dev/null" EXIT

echo "Compiling NVIDIA driver kernel modules..."
cd /usr/src/nvidia-${DRIVER_VERSION}/kernel
cd /usr/src/nvidia-${DRIVER_VERSION}/${KERNEL_TYPE}
make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/source nv-linux.o nv-modeset-linux.o > /dev/null

echo "Relinking NVIDIA driver kernel modules..."
Expand Down Expand Up @@ -205,6 +212,25 @@ _get_module_params() {

# Load the kernel modules and start persistenced.
_load_driver() {
local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
local set_fw_path="true"
local fw_path_config_file="/sys/module/firmware_class/parameters/path"
for param in "${NVIDIA_MODULE_PARAMS[@]}"; do
if [[ "$param" == "NVreg_EnableGpuFirmware=0" ]]; then
set_fw_path="false"
fi
done

if [[ "$set_fw_path" == "true" ]]; then
echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path"
if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then
echo "WARNING: A search path is already configured in $fw_path_config_file"
echo " Retaining the current configuration"
else
echo -n "$nv_fw_search_path" > $fw_path_config_file || echo "WARNING: Failed to configure the firmware search path"
fi
fi

echo "Parsing kernel module parameters..."
_get_module_params

Expand Down Expand Up @@ -245,9 +271,11 @@ _load_driver() {
_unload_driver() {
local rmmod_args=()
local nvidia_deps=0
local nvidia_modeset_deps=0
local nvidia_refs=0
local nvidia_uvm_refs=0
local nvidia_modeset_refs=0
local nvidia_drm_refs=0

echo "Stopping NVIDIA persistence daemon..."
if [ -f /var/run/nvidia-persistenced/nvidia-persistenced.pid ]; then
Expand Down Expand Up @@ -295,6 +323,11 @@ _unload_driver() {
fi

echo "Unloading NVIDIA driver kernel modules..."
if [ -f /sys/module/nvidia_drm/refcnt ]; then
nvidia_drm_refs=$(< /sys/module/nvidia_drm/refcnt)
rmmod_args+=("nvidia-drm")
((++nvidia_modeset_deps))
fi
if [ -f /sys/module/nvidia_modeset/refcnt ]; then
nvidia_modeset_refs=$(< /sys/module/nvidia_modeset/refcnt)
rmmod_args+=("nvidia-modeset")
Expand All @@ -309,7 +342,7 @@ _unload_driver() {
nvidia_refs=$(< /sys/module/nvidia/refcnt)
rmmod_args+=("nvidia")
fi
if [ ${nvidia_refs} -gt ${nvidia_deps} ] || [ ${nvidia_uvm_refs} -gt 0 ] || [ ${nvidia_modeset_refs} -gt 0 ]; then
if [ ${nvidia_refs} -gt ${nvidia_deps} ] || [ ${nvidia_uvm_refs} -gt 0 ] || [ ${nvidia_modeset_refs} -gt ${nvidia_modeset_deps} ] || [ ${nvidia_drm_refs} -gt 0 ]; then
echo "Could not unload NVIDIA driver kernel modules, driver is in use" >&2
return 1
fi
Expand All @@ -331,7 +364,7 @@ _install_driver() {
if [ "${ACCEPT_LICENSE}" = "yes" ]; then
install_args+=("--accept-license")
fi
nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check ${install_args[@]+"${install_args[@]}"}
IGNORE_CC_MISMATCH=1 nvidia-installer --kernel-module-only --ui=none --no-nouveau-check -m=${KERNEL_TYPE} --no-rebuild-initramfs ${install_args[@]+"${install_args[@]}"} --skip-module-load # --no-drm
}

# Mount the driver rootfs into the run directory with the exception of sysfs.
Expand All @@ -341,6 +374,16 @@ _mount_rootfs() {
mount --make-private /sys
mkdir -p ${RUN_DIR}/driver
mount --rbind / ${RUN_DIR}/driver

echo "Check SELinux status"
if [ -e /sys/fs/selinux ]; then
echo "SELinux is enabled"
echo "Change device files security context for selinux compatibility"
chcon -R -t container_file_t ${RUN_DIR}/driver/dev
else
echo "SELinux is disabled, skipping..."
fi

}

# Unmount the driver rootfs from the run directory.
Expand Down Expand Up @@ -419,7 +462,7 @@ _start_vgpu_topology_daemon() {
nvidia-topologyd
}

init() {
_prepare() {
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
_find_vgpu_driver_version || exit 1
fi
Expand All @@ -429,11 +472,15 @@ init() {
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
sh /tmp/install.sh nvinstall && \
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
mv LICENSE mkprecompiled kernel /usr/src/nvidia-$DRIVER_VERSION && \
mv LICENSE mkprecompiled $KERNEL_TYPE /usr/src/nvidia-$DRIVER_VERSION && \
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION/.manifest

echo -e "\n========== NVIDIA Software Installer ==========\n"
echo -e "Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n"
}

_prepare_exclusive() {
_prepare

exec 3> ${PID_FILE}
if ! flock -n 3; then
Expand All @@ -447,22 +494,35 @@ init() {

_unload_driver || exit 1
_unmount_rootfs
}

_build() {
local cleanup=false

# Install dependencies
if _kernel_requires_package; then
_update_package_cache
_resolve_kernel_version || exit 1
_install_prerequisites
_create_driver_package
#_remove_prerequisites
_cleanup_package_cache
cleanup=true
fi

# Build the driver
# Build the driver - rootfs needs to be mounted as the build magic attempts to
# load the driver.
_install_driver
_load_driver || exit 1
if $cleanup; then
# Do not call _remove_prerequisites as this will delete depmod information
_cleanup_package_cache
fi
}

_load() {
_mount_rootfs
# Something in the build process may have decided to load drivers that happened to be installed.
# Make sure they are uninstalled.
lsmod | grep -q nvidia && { _unload_driver || exit 1; } || true
_load_driver || exit 1
_write_kernel_update_hook

echo "Done, now waiting for signal"
Expand All @@ -473,6 +533,26 @@ init() {
exit 0
}

init() {
_prepare_exclusive

_build

_load
}

build() {
_prepare

_build
}

load() {
_prepare_exclusive

_load
}

update() {
exec 3>&2
if exec 2> /dev/null 4< ${PID_FILE}; then
Expand Down Expand Up @@ -511,7 +591,7 @@ update() {
if _kernel_requires_package; then
_create_driver_package
fi
#_remove_prerequisites
# Do not call _remove_prerequisites as this will delete demod information
_cleanup_package_cache

echo "Done"
Expand All @@ -524,6 +604,8 @@ Usage: $0 COMMAND [ARG...]

Commands:
init [-a | --accept-license] [-m | --max-threads MAX_THREADS]
build [-a | --accept-license] [-m | --max-threads MAX_THREADS]
load
update [-k | --kernel VERSION] [-s | --sign KEYID] [-t | --tag TAG] [-m | --max-threads MAX_THREADS]
EOF
exit 1
Expand All @@ -535,6 +617,8 @@ fi
command=$1; shift
case "${command}" in
init) options=$(getopt -l accept-license,max-threads: -o am: -- "$@") ;;
build) options=$(getopt -l accept-license,tag:,max-threads: -o a:t:m: -- "$@") ;;
load) options="" ;;
update) options=$(getopt -l kernel:,sign:,tag:,max-threads: -o k:s:t:m: -- "$@") ;;
*) usage ;;
esac
Expand Down