Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions vgpu-manager/ubuntu24.04/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FROM nvcr.io/nvidia/cuda:13.0.1-base-ubuntu24.04

ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION
ARG DRIVER_ARCH=x86_64
ENV DRIVER_ARCH=$DRIVER_ARCH

# Remove cuda repository to avoid GPG errors
RUN rm /etc/apt/sources.list.d/cuda*.list

RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections

RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils \
build-essential \
file \
kmod \
pciutils && \
rm -rf /var/lib/apt/lists/*

RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \
usermod -o -u 0 -g 0 _apt

RUN mkdir -p /driver
WORKDIR /driver
ADD NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run .
RUN chmod +x NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run

COPY nvidia-driver /usr/local/bin

# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \
rm -rf /var/lib/apt/lists/*; \
fi

# Add NGC DL license from the CUDA image
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

ENTRYPOINT ["nvidia-driver", "init"]
331 changes: 331 additions & 0 deletions vgpu-manager/ubuntu24.04/nvidia-driver
Original file line number Diff line number Diff line change
@@ -0,0 +1,331 @@
#!/bin/bash

set -xeu

DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
DRIVER_ARCH=${DRIVER_ARCH:?"Missing driver arch"}
DRIVER_RESET_RETRIES=10
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
KERNEL_VERSION=$(uname -r)
RUN_DIR=/run/nvidia

export DEBIAN_FRONTEND=noninteractive

_update_package_cache() {
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
echo "Updating the package cache..."
apt-get -qq update
fi
}

_cleanup_package_cache() {
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
echo "Cleaning up the package cache..."
rm -rf /var/lib/apt/lists/*
fi
}

# Resolve the kernel version to the form major.minor.patch-revision-flavor where flavor defaults to generic.
_resolve_kernel_version() {
local version=$(apt-cache show "linux-headers-${KERNEL_VERSION}" 2> /dev/null | \
sed -nE 's/^Version:\s+(([0-9]+\.){2}[0-9]+)[-.]([0-9]+).*/\1-\3/p' | head -1)
local kernel_flavor=$(echo ${KERNEL_VERSION} | sed 's/[^a-z]*//')
kernel_flavor="${kernel_flavor//virtual/generic}"

echo "Resolving Linux kernel version..."
if [ -z "${version}" ]; then
echo "Could not resolve Linux kernel version" >&2
return 1
fi

KERNEL_VERSION="${version}-${kernel_flavor}"
echo "Proceeding with Linux kernel version ${KERNEL_VERSION}"
return 0
}

# Install the kernel modules header/builtin/order files and generate the kernel version string.
_install_prerequisites() {
local tmp_dir=$(mktemp -d)

trap "popd; rm -rf ${tmp_dir}" RETURN EXIT
pushd ${tmp_dir}

rm -rf /lib/modules/${KERNEL_VERSION}
mkdir -p /lib/modules/${KERNEL_VERSION}/proc

echo "Installing Linux kernel headers..."
apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null

echo "Installing Linux kernel module files..."
apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb .
{ apt-get -qq download linux-modules-${KERNEL_VERSION} && dpkg -x linux-modules*.deb . || true; } 2> /dev/null
# linux-modules-extra contains pci-pf-stub which is required when enabling SR-IOV on a physical GPU
{ apt-get -qq download linux-modules-extra-${KERNEL_VERSION} && dpkg -x linux-modules-extra*.deb . || true; } 2> /dev/null
mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION}
mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION}
depmod ${KERNEL_VERSION}

echo "Generating Linux kernel version string..."

file boot/vmlinuz-* | awk 'BEGIN { RS="," } $1=="version" { print $2 }' - > version
if [ -z "$(<version)" ]; then
echo "Could not locate Linux kernel version string" >&2
return 1
fi
mv version /lib/modules/${KERNEL_VERSION}/proc
}

# Cleanup the prerequisites installed above.
_remove_prerequisites() {
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
apt-get -qq purge linux-headers-${KERNEL_VERSION} > /dev/null
# TODO remove module files not matching an existing driver package.
fi
}

# Mount the driver rootfs into the run directory with the exception of sysfs.
_mount_rootfs() {
echo "Mounting NVIDIA driver rootfs..."
# Hack: remount /sys as rw to overcome intermittent bug with
# /sys being mounted ro for privileged containers
mount -o remount,rw /sys
mount --make-runbindable /sys
mount --make-private /sys
mkdir -p ${RUN_DIR}/driver
mount --rbind / ${RUN_DIR}/driver
}

# Unmount the driver rootfs from the run directory.
_unmount_rootfs() {
echo "Unmounting NVIDIA driver rootfs..."
if findmnt -r -o TARGET | grep "${RUN_DIR}/driver" > /dev/null; then
umount -l -R ${RUN_DIR}/driver
fi
}

# Create /dev/char directory if it doesn't exist inside the container.
# Without this directory, nvidia-vgpu-mgr will fail to create symlinks
# under /dev/char for new devices nodes.
_create_dev_char_directory() {
if [ ! -d "/dev/char" ]; then
echo "Creating '/dev/char' directory"
mkdir -p /dev/char
fi
}

_set_fw_search_path() {
local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
local fw_path_config_file="/sys/module/firmware_class/parameters/path"

if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then
echo "WARNING: A search path is already configured in $fw_path_config_file"
echo " Retaining the current configuration. Note, GSP firmware may not be found and thus won't be used by the NVIDIA driver."
return
fi

echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path"
echo -n "$nv_fw_search_path" > $fw_path_config_file
}

_install_driver() {
local tmp_dir=$(mktemp -d)

sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
}

# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
_load_driver() {
/usr/bin/nvidia-vgpud
/usr/bin/nvidia-vgpu-mgr &

# check nvidia drivers are loaded
if [ ! -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ ! -f /sys/module/nvidia/refcnt ]; then
echo "Failed to load nvidia driver"
return 1
fi
return 0
}

# Enable virtual functions for all physical GPUs on the node that support SR-IOV.
# Retry logic is to account for when the driver is busy (i.e. during driver initialization)
_enable_vfs() {
# Wait before attempting to create VFs to ensure the driver has finished initializing.
# This is a WAR for a bug in vGPU 17.2 where sriov-manage does not return a non-zero
# exit code even though VF creation fails.
sleep $DELAY_BEFORE_VF_CREATION

local retry
for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do
if /usr/lib/nvidia/sriov-manage -e ALL; then
return 0
fi
if [ $retry == $DRIVER_RESET_RETRIES ]; then
echo "Failed to enable VFs"
fi
done
return 1
}

# Disable virtual functions for all physical GPUs on the node that support SR-IOV.
# Retry logic is to account for when the driver is busy (i.e. during driver initialization)
_disable_vfs() {
local retry
for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do
if /usr/lib/nvidia/sriov-manage -d ALL; then
return 0
fi
if [ $retry == $DRIVER_RESET_RETRIES ]; then
echo "Failed to disable VFs"
fi
done
return 1
}

_unload_driver() {
local rmmod_args=()
local nvidia_deps=0
local nvidia_refs=0
local nvidia_vgpu_vfio_refs=0

if [ -f /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid ]; then
echo "Stopping NVIDIA vGPU Manager..."
local pid=$(< /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid)

kill -TERM "${pid}"
for i in $(seq 1 50); do
kill -0 "${pid}" 2> /dev/null || break
sleep 0.1
done
if [ $i -eq 50 ]; then
echo "Could not stop NVIDIA vGPU Manager" >&2
return 1
fi
fi

echo "Unloading NVIDIA driver kernel modules..."
if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ]; then
nvidia_vgpu_vfio_refs=$(< /sys/module/nvidia_vgpu_vfio/refcnt)
rmmod_args+=("nvidia_vgpu_vfio")
((++nvidia_deps))
fi
if [ -f /sys/module/nvidia/refcnt ]; then
nvidia_refs=$(< /sys/module/nvidia/refcnt)
rmmod_args+=("nvidia")
fi

# TODO: check if nvidia module is in use by checking refcnt

if [ ${#rmmod_args[@]} -gt 0 ]; then
rmmod ${rmmod_args[@]}
if [ "$?" != "0" ]; then
return 1
fi
fi
return 0
}

_shutdown() {
if _disable_vfs && _unload_driver; then
_unmount_rootfs
return 0
fi
echo "Failed to cleanup driver"
return 1
}

build() {
echo "build() not implemented"
}

load() {
echo "load() not implemented"
}

update() {
echo "update() not implemented"
}

init() {
trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
trap "_shutdown" EXIT

if ! _unload_driver; then
echo "Previous NVIDIA driver installation cannot be removed. Exiting"
exit 1
fi
_unmount_rootfs

_update_package_cache
_resolve_kernel_version || exit 1
_install_prerequisites
_create_dev_char_directory
_set_fw_search_path
_install_driver
_load_driver || exit 1
_mount_rootfs
_enable_vfs

# In certain scenarios, /sys/class/mdev_bus is not populated with the correct list of devices (PFs and possible VFs) at this point.
# Re-run nvdidia-vgpud to ensure /sys/class/mdev_bus is populated correctly. And restart nvidia-vgpu-mgr if previously killed.
nvidia-vgpud &
pgrep nvidia-vgpu-mgr >/dev/null || (echo "Restarting nvidia-vgpu-mgr after previously killed" && nvidia-vgpu-mgr &)

set +x
echo "Done, now waiting for signal"
trap "echo 'Caught signal'; _shutdown; trap - EXIT; exit" HUP INT QUIT PIPE TERM

while true; do
sleep 15
pgrep nvidia-vgpu-mgr >/dev/null || (echo "ERROR: nvidia-vgpu-mgr daemon is no longer running. Exiting." && exit 1)
done
}


usage() {
cat >&2 <<EOF
Usage: $0 COMMAND [ARG...]

Commands:
init [-a | --accept-license]
build [-a | --accept-license]
load
update [-k | --kernel VERSION] [-s | --sign KEYID] [-t | --tag TAG]
EOF
exit 1
}

if [ $# -eq 0 ]; then
usage
fi
command=$1; shift
case "${command}" in
init) options=$(getopt -l accept-license -o a -- "$@") ;;
build) options=$(getopt -l accept-license,tag: -o a:t -- "$@") ;;
load) options="" ;;
update) options=$(getopt -l kernel:,sign:,tag: -o k:s:t: -- "$@") ;;
*) usage ;;
esac
if [ $? -ne 0 ]; then
usage
fi
eval set -- "${options}"

ACCEPT_LICENSE=""
KERNEL_VERSION=$(uname -r)
PRIVATE_KEY=""
PACKAGE_TAG=""

for opt in ${options}; do
case "$opt" in
-a | --accept-license) ACCEPT_LICENSE="yes"; shift 1 ;;
-k | --kernel) KERNEL_VERSION=$2; shift 2 ;;
-s | --sign) PRIVATE_KEY=$2; shift 2 ;;
-t | --tag) PACKAGE_TAG=$2; shift 2 ;;
--) shift; break ;;
esac
done
if [ $# -ne 0 ]; then
usage
fi

$command
Loading