Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions templates/mps-control-daemon.tmpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,38 @@ spec:
image: {{ .MpsImageName }}
securityContext:
privileged: true
command: [chroot, /driver-root, sh, -c]
command: [sh, -c]
args:
- |-
set -e
rm -f /var/log/nvidia-mps/startup.log
rm -f /driver-root/var/log/nvidia-mps/startup.log

nvidia-cuda-mps-control -d
if [ -x /driver-root/bin/sh ] || [ -x /driver-root/usr/bin/sh ]; then
# Use chroot to avoid library mismatch between container and host
# when driver root is / (default value) or /run/nvidia/driver (default location for driver installation by GPU Operator)
RUN="chroot /driver-root sh -c"
else
# No shell in driver root (e.g. GKE COS): run directly with PATH/LD_LIBRARY_PATH
export PATH="/driver-root/usr/bin:/driver-root/bin:/driver-root/usr/local/bin:$PATH"
export LD_LIBRARY_PATH="/driver-root/lib64:/driver-root/lib:/driver-root/usr/lib64:/driver-root/usr/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
RUN="sh -c"

# Point MPS to mounted paths (not set in Deployment spec as for chroot approach /driver-root/... paths don't exist inside chroot)
export CUDA_MPS_PIPE_DIRECTORY=/driver-root/tmp/nvidia-mps
export CUDA_MPS_LOG_DIRECTORY=/driver-root/var/log/nvidia-mps
fi

$RUN "nvidia-cuda-mps-control -d"
{{- if .DefaultActiveThreadPercentage }}
echo set_default_active_thread_percentage {{.DefaultActiveThreadPercentage}} | nvidia-cuda-mps-control
$RUN "echo set_default_active_thread_percentage {{.DefaultActiveThreadPercentage}} | nvidia-cuda-mps-control"
{{- end}}
{{- range $id, $limit := .DefaultPinnedDeviceMemoryLimits }}
echo set_default_device_pinned_mem_limit {{ $id }} {{ $limit }} | nvidia-cuda-mps-control
$RUN "echo set_default_device_pinned_mem_limit {{ $id }} {{ $limit }} | nvidia-cuda-mps-control"
{{- end}}

echo "startup complete" > /var/log/nvidia-mps/startup.log
echo "startup complete" > /driver-root/var/log/nvidia-mps/startup.log

tail -n +1 -f /var/log/nvidia-mps/control.log
tail -n +1 -f /driver-root/var/log/nvidia-mps/control.log
startupProbe:
exec:
command:
Expand Down