-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_apply.py
More file actions
147 lines (125 loc) · 5.15 KB
/
test_apply.py
File metadata and controls
147 lines (125 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""
Tests for nvidia-setup apply.sh script.
"""
import pytest
from tests.helpers.assertions import (
assert_exit_code,
assert_output_contains,
assert_output_not_contains,
)
from tests.helpers.docker_test import DockerTestRunner
def test_unsupported_combination():
"""Test that unsupported service/accelerator combinations fail with proper error."""
runner = DockerTestRunner(package="nvidia-setup")
try:
result = runner.run_script(
script="apply.sh",
configmaps={"service": "invalid", "accelerator": "invalid"}
)
assert_exit_code(result, 1)
assert_output_contains(result.stdout, "Unsupported combination")
assert_output_contains(result.stdout, "Supported:")
finally:
runner.cleanup()
@pytest.mark.skip(reason="Skipping test_apply_eks_h100. Kernel is flaky based on where it is run.")
def test_apply_eks_h100(base_image):
"""Test apply.sh with eks-h100 combination."""
runner = DockerTestRunner(package="nvidia-setup", base_image=base_image)
try:
result = runner.run_script(
script="apply.sh",
configmaps={"service": "eks", "accelerator": "h100"},
skip_system_operations=True
)
# Should exit successfully (even if system operations are skipped)
assert_exit_code(result, 0)
finally:
runner.cleanup()
@pytest.mark.skip(reason="Skipping test_apply_eks_gb200. Kernel is flaky based on where it is run.")
def test_apply_eks_gb200(base_image):
"""Test apply.sh with eks-gb200 combination."""
runner = DockerTestRunner(package="nvidia-setup", base_image=base_image)
try:
result = runner.run_script(
script="apply.sh",
configmaps={"service": "eks", "accelerator": "gb200"},
skip_system_operations=True
)
assert_exit_code(result, 0)
finally:
runner.cleanup()
def test_apply_with_env_overrides(base_image):
"""Test apply.sh with environment variable overrides."""
runner = DockerTestRunner(package="nvidia-setup", base_image=base_image)
try:
result = runner.run_script(
script="apply.sh",
configmaps={"service": "eks", "accelerator": "h100"},
env_vars={
"NVIDIA_KERNEL": "6.8.0",
"NVIDIA_SETUP_KERNEL_ALLOW_NEWER": "true", # container kernel may be newer than override
"NVIDIA_EFA": "1.31.0",
"NVIDIA_LUSTRE": "aws"
},
skip_system_operations=True
)
assert_exit_code(result, 0)
finally:
runner.cleanup()
def test_apply_missing_configmap():
"""Test apply.sh with missing configmap files."""
runner = DockerTestRunner(package="nvidia-setup")
try:
# Run with empty configmaps to simulate missing files
result = runner.run_script(
script="apply.sh",
configmaps={},
skip_system_operations=True
)
# Should fail because service and accelerator are required
assert_exit_code(result, 1)
finally:
runner.cleanup()
def test_apply_dynamic_supported_listing(base_image):
"""Test that apply.sh dynamically lists supported combinations from defaults directory."""
runner = DockerTestRunner(package="nvidia-setup", base_image=base_image)
try:
result = runner.run_script(
script="apply.sh",
configmaps={"service": "invalid", "accelerator": "invalid"}
)
assert_exit_code(result, 1)
# Should contain at least eks-h100 and eks-gb200 in the supported list
assert_output_contains(result.stdout, "eks-h100")
assert_output_contains(result.stdout, "eks-gb200")
finally:
runner.cleanup()
def test_apply_install_kernel_only_skips_actual_install(base_image):
"""With NVIDIA_SETUP_INSTALL_KERNEL=true and SKIP_SYSTEM_OPERATIONS, apply runs kernel-only path but skips real install/reboot."""
runner = DockerTestRunner(package="nvidia-setup", base_image=base_image)
try:
result = runner.run_script(
script="apply.sh",
configmaps={"service": "eks", "accelerator": "h100"},
env_vars={"NVIDIA_SETUP_INSTALL_KERNEL": "true"},
skip_system_operations=True,
)
assert_exit_code(result, 0)
assert_output_contains(result.stdout, "Skipping kernel install for test environment")
finally:
runner.cleanup()
def test_apply_install_kernel_only_eks_gb200_skips_actual_install(base_image):
"""Kernel-only path with eks-gb200; skips actual install when SKIP_SYSTEM_OPERATIONS set."""
runner = DockerTestRunner(package="nvidia-setup", base_image=base_image)
try:
result = runner.run_script(
script="apply.sh",
configmaps={"service": "eks", "accelerator": "gb200"},
env_vars={"NVIDIA_SETUP_INSTALL_KERNEL": "true"},
skip_system_operations=True,
)
assert_exit_code(result, 0)
assert_output_contains(result.stdout, "Skipping kernel install for test environment")
finally:
runner.cleanup()