Skip to content

Commit 63f6fae

Browse files
authored
Merge pull request #44 from NVIDIA/fix/gke-remove-deprecated-cfs-sysctls
nvidia-tuning-gke: drop deprecated CFS sysctls + fix base image registry + bump TUNING_VERSION to 1.1.6 + permissive CI guard
2 parents 7f153d1 + 9826ac1 commit 63f6fae

6 files changed

Lines changed: 9 additions & 12 deletions

File tree

.github/workflows/pr_build.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,9 @@ jobs:
106106
if [ ! -f "$CONFIG_FILE" ]; then
107107
# Check if Dockerfile exists and has a FROM that references a skyhook-packages image
108108
if [ -f "$DOCKERFILE" ]; then
109-
# Check if FROM line contains "skyhook-packages" (indicating it inherits from another package)
110-
if grep -q "^FROM.*skyhook-packages" "$DOCKERFILE"; then
109+
# Check if FROM line contains "skyhook-packages" or "nodewright-packages"
110+
# (indicating it inherits from another package in this repo's published image set).
111+
if grep -Eq "^FROM.*(skyhook|nodewright)-packages" "$DOCKERFILE"; then
111112
echo "config_exists=false" >> $GITHUB_OUTPUT
112113
echo "config_changed=false" >> $GITHUB_OUTPUT
113114
echo "No config.json found, but Dockerfile inherits from skyhook-packages image - validation skipped"
@@ -152,7 +153,7 @@ jobs:
152153
run: |
153154
DOCKERFILE="${{ matrix.package }}/Dockerfile"
154155
155-
if [ -f "$DOCKERFILE" ] && grep -q "^FROM.*skyhook-packages" "$DOCKERFILE"; then
156+
if [ -f "$DOCKERFILE" ] && grep -Eq "^FROM.*(skyhook|nodewright)-packages" "$DOCKERFILE"; then
156157
echo "inherits_from_skyhook_packages=true" >> $GITHUB_OUTPUT
157158
echo "Package inherits from skyhook-packages image - will use validate-inherited target"
158159
else

makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ validate-inherited: ## Validate an inherited package (inherits from skyhook-pack
108108
echo "ERROR: Dockerfile not found for package $(PACKAGE)"; \
109109
exit 1; \
110110
fi
111-
@if ! grep -q "^FROM.*skyhook-packages" "$(PACKAGE)/Dockerfile"; then \
112-
echo "ERROR: Package $(PACKAGE) does not inherit from skyhook-packages. Use 'make validate-standalone' instead."; \
111+
@if ! grep -Eq "^FROM.*(skyhook|nodewright)-packages" "$(PACKAGE)/Dockerfile"; then \
112+
echo "ERROR: Package $(PACKAGE) does not inherit from skyhook-packages or nodewright-packages. Use 'make validate-standalone' instead."; \
113113
exit 1; \
114114
fi
115115
@echo "Building container for validation: $(PACKAGE)"

nvidia-tuning-gke/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
# Config step: prepare_nvidia_configs.sh (populate configmaps from profile)
2222
# then update_settings.sh (base tuning apply).
2323

24-
ARG TUNING_VERSION=1.1.4
25-
FROM ghcr.io/nvidia/skyhook-packages/tuning:${TUNING_VERSION}
24+
ARG TUNING_VERSION=1.1.6
25+
FROM ghcr.io/nvidia/nodewright-packages/tuning:${TUNING_VERSION}
2626

2727
COPY profiles/ /skyhook-package/profiles/
2828
COPY skyhook_dir/prepare_nvidia_configs.sh /skyhook-package/skyhook_dir/

nvidia-tuning-gke/profiles/gb200/inference/sysctl.conf

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,3 @@ vm.max_map_count=262144
1010
vm.min_free_kbytes=65536
1111
vm.overcommit_memory=1
1212
vm.swappiness=1
13-
kernel.sched_latency_ns=1000000
14-
kernel.sched_min_granularity_ns=100000

nvidia-tuning-gke/profiles/h100/inference/sysctl.conf

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,3 @@ net.ipv4.conf.default.arp_announce = 2
44
net.ipv4.conf.all.arp_ignore = 1
55
net.ipv4.conf.default.arp_ignore = 1
66
vm.swappiness=1
7-
kernel.sched_latency_ns=1000000
8-
kernel.sched_min_granularity_ns=100000

tests/integration/nvidia_tuning_gke/test_prepare_nvidia_configs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
@pytest.mark.parametrize(
4242
"accelerator,intent,expected_sysctl_line,expect_containerd",
4343
[
44-
("h100", "inference", "kernel.sched_latency_ns=1000000", False),
44+
("h100", "inference", "vm.swappiness=1", False),
4545
("h100", "multiNodeTraining", "net.core.default_qdisc=fq", False),
4646
("gb200", "inference", "vm.swappiness=1", True),
4747
("gb200", "multiNodeTraining", "net.core.default_qdisc=fq", True),

0 commit comments

Comments
 (0)