Skip to content

Commit a3a796e

Browse files
authored
fix(agent): chdir to / and make env var setting more robust for host machine (#72)
Redo how environment variables are handled and move it all the to chroot_exec script. Precedence order is now skyhook specific > chroot env > container env This way we should capture anything the user sets and use the proper env vars for chroot
1 parent cbc79a9 commit a3a796e

File tree

4 files changed

+278
-14
lines changed

4 files changed

+278
-14
lines changed

agent/skyhook-agent/src/skyhook_agent/chroot_exec.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,45 @@
2323
import shutil
2424

2525

26+
def _get_process_env(container_env: dict, skyhook_env: dict, chroot_env: dict):
27+
# Set this first with the container environment.
28+
# We need to do this because the skyhook package could set any env var they want so that needs
29+
# to get replicated down. BUT we are in distroless so we then need to overwrite with the chroot environment
30+
# so things like path/user resolution work.
31+
process_env = dict(container_env)
32+
# Overwrite the container environment with the chroot environment
33+
process_env.update(chroot_env)
34+
# Inject the skyhook environment variables
35+
process_env.update(skyhook_env)
36+
return process_env
37+
38+
def _get_chroot_env():
39+
results = subprocess.run(["env"], capture_output=True, text=True)
40+
env = {}
41+
for line in results.stdout.split("\n"):
42+
if "=" in line:
43+
k, v = line.split("=", 1)
44+
env[k] = v
45+
return env
46+
2647
def chroot_exec(config: dict, chroot_dir: str):
2748
cmds = config["cmd"]
2849
no_chmod = config["no_chmod"]
50+
skyhook_env = config["env"]
51+
52+
# Capture container environment before chroot
53+
container_env = dict(os.environ)
54+
2955
if chroot_dir != "local":
3056
os.chroot(chroot_dir)
57+
os.chdir("/")
3158
try:
3259
if not no_chmod:
3360
# chmod +x the step
3461
os.chmod(cmds[0], os.stat(cmds[0]).st_mode | stat.S_IXGRP | stat.S_IXUSR | stat.S_IXOTH)
35-
subprocess.run(cmds, check=True)
62+
63+
process_env = _get_process_env(container_env, skyhook_env, _get_chroot_env())
64+
subprocess.run(cmds, check=True, env=process_env)
3665
except:
3766
raise
3867

agent/skyhook-agent/src/skyhook_agent/controller.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ async def _stream_process(
130130
break
131131

132132

133-
async def tee(chroot_dir: str, cmd: List[str], stdout_sink_path: str, stderr_sink_path: str, write_cmds=False, no_chmod=False, **kwargs):
133+
async def tee(chroot_dir: str, cmd: List[str], stdout_sink_path: str, stderr_sink_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, **kwargs):
134134
"""
135135
Run the cmd in a subprocess and keep the stream of stdout/stderr and merge both into
136136
the sink_path as a log.
@@ -142,7 +142,7 @@ async def tee(chroot_dir: str, cmd: List[str], stdout_sink_path: str, stderr_sin
142142
sys.stdout.write(" ".join(cmd) + "\n")
143143
stdout_sink_f.write(" ".join(cmd) + "\n")
144144
with tempfile.NamedTemporaryFile(mode="w", delete=True) as f:
145-
f.write(json.dumps({"cmd": cmd, "no_chmod": no_chmod}))
145+
f.write(json.dumps({"cmd": cmd, "no_chmod": no_chmod, "env": env}))
146146
f.flush()
147147

148148
# Run the special chroot_exec.py script to chroot into the directory and run the command
@@ -220,7 +220,7 @@ def set_flag(flag_file: str, msg: str = "") -> None:
220220
f.write(msg)
221221

222222

223-
def _run(chroot_dir: str, cmds: list[str], log_path: str, write_cmds=False, no_chmod=False,**kwargs) -> int:
223+
def _run(chroot_dir: str, cmds: list[str], log_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, **kwargs) -> int:
224224
"""
225225
Synchronous wrapper around the tee command to have logs written to disk
226226
"""
@@ -234,6 +234,7 @@ def _run(chroot_dir: str, cmds: list[str], log_path: str, write_cmds=False, no_c
234234
f"{log_path}.err",
235235
write_cmds=write_cmds,
236236
no_chmod=no_chmod,
237+
env=env,
237238
**kwargs
238239
)
239240
)
@@ -284,10 +285,11 @@ def run_step(
284285
time.sleep(1)
285286
log_file = get_log_file(step_path, copy_dir, config_data, chroot_dir)
286287

287-
# Make sure to include the original environment here or else things like path resolution dont work
288-
env = dict(**os.environ)
288+
# Compile additional environment variables
289+
env = {}
289290
env.update(step.env)
290291
env.update({"STEP_ROOT": get_host_path_for_steps(copy_dir), "SKYHOOK_DIR": copy_dir})
292+
291293
return_code = _run(
292294
chroot_dir,
293295
[step_path, *step.arguments],
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import unittest
18+
from unittest import mock
19+
from skyhook_agent.chroot_exec import _get_process_env, _get_chroot_env
20+
21+
22+
class TestChrootExec(unittest.TestCase):
23+
24+
def test_get_process_env_basic_functionality(self):
25+
"""Test _get_process_env with non-overlapping keys"""
26+
container_env = {"CONTAINER_VAR": "container_value"}
27+
chroot_env = {"CHROOT_VAR": "chroot_value"}
28+
skyhook_env = {"SKYHOOK_VAR": "skyhook_value"}
29+
30+
result = _get_process_env(container_env, skyhook_env, chroot_env)
31+
32+
expected = {
33+
"CONTAINER_VAR": "container_value",
34+
"CHROOT_VAR": "chroot_value",
35+
"SKYHOOK_VAR": "skyhook_value"
36+
}
37+
self.assertEqual(result, expected)
38+
39+
def test_get_process_env_chroot_overrides_container(self):
40+
"""Test that chroot_env overrides container_env for same keys"""
41+
container_env = {"SAME_VAR": "container_value", "CONTAINER_VAR": "container_value"}
42+
chroot_env = {"SAME_VAR": "chroot_value", "CHROOT_VAR": "chroot_value"}
43+
skyhook_env = {"SKYHOOK_VAR": "skyhook_value"}
44+
45+
result = _get_process_env(container_env, skyhook_env, chroot_env)
46+
47+
expected = {
48+
"SAME_VAR": "chroot_value", # chroot overrides container
49+
"CONTAINER_VAR": "container_value",
50+
"CHROOT_VAR": "chroot_value",
51+
"SKYHOOK_VAR": "skyhook_value"
52+
}
53+
self.assertEqual(result, expected)
54+
55+
def test_get_process_env_skyhook_overrides_all(self):
56+
"""Test that skyhook_env has highest priority and overrides both chroot and container"""
57+
container_env = {"SAME_VAR": "container_value", "CONTAINER_VAR": "container_value"}
58+
chroot_env = {"SAME_VAR": "chroot_value", "CHROOT_VAR": "chroot_value"}
59+
skyhook_env = {"SAME_VAR": "skyhook_value", "SKYHOOK_VAR": "skyhook_value"}
60+
61+
result = _get_process_env(container_env, skyhook_env, chroot_env)
62+
63+
expected = {
64+
"SAME_VAR": "skyhook_value", # skyhook overrides both chroot and container
65+
"CONTAINER_VAR": "container_value",
66+
"CHROOT_VAR": "chroot_value",
67+
"SKYHOOK_VAR": "skyhook_value"
68+
}
69+
self.assertEqual(result, expected)
70+
71+
def test_get_process_env_with_empty_dicts(self):
72+
"""Test _get_process_env with empty dictionaries"""
73+
result = _get_process_env({}, {}, {})
74+
self.assertEqual(result, {})
75+
76+
# Test with only one dict having values
77+
container_env = {"VAR": "value"}
78+
result = _get_process_env(container_env, {}, {})
79+
self.assertEqual(result, {"VAR": "value"})
80+
81+
chroot_env = {"VAR": "value"}
82+
result = _get_process_env({}, {}, chroot_env)
83+
self.assertEqual(result, {"VAR": "value"})
84+
85+
skyhook_env = {"VAR": "value"}
86+
result = _get_process_env({}, skyhook_env, {})
87+
self.assertEqual(result, {"VAR": "value"})
88+
89+
def test_get_process_env_precedence_order(self):
90+
"""Test complete precedence order: skyhook > chroot > container"""
91+
container_env = {
92+
"PATH": "/container/path",
93+
"HOME": "/container/home",
94+
"USER": "container_user",
95+
"ONLY_CONTAINER": "container_only"
96+
}
97+
chroot_env = {
98+
"PATH": "/chroot/path",
99+
"HOME": "/chroot/home",
100+
"ONLY_CHROOT": "chroot_only"
101+
}
102+
skyhook_env = {
103+
"PATH": "/skyhook/path",
104+
"ONLY_SKYHOOK": "skyhook_only"
105+
}
106+
107+
result = _get_process_env(container_env, skyhook_env, chroot_env)
108+
109+
expected = {
110+
"PATH": "/skyhook/path", # skyhook wins
111+
"HOME": "/chroot/home", # chroot wins over container
112+
"USER": "container_user", # only in container
113+
"ONLY_CONTAINER": "container_only",
114+
"ONLY_CHROOT": "chroot_only",
115+
"ONLY_SKYHOOK": "skyhook_only"
116+
}
117+
self.assertEqual(result, expected)
118+
119+
def test_get_process_env_does_not_modify_input_dicts(self):
120+
"""Test that input dictionaries are not modified"""
121+
container_env = {"VAR": "container"}
122+
chroot_env = {"VAR": "chroot"}
123+
skyhook_env = {"VAR": "skyhook"}
124+
125+
# Keep original references
126+
original_container = container_env.copy()
127+
original_chroot = chroot_env.copy()
128+
original_skyhook = skyhook_env.copy()
129+
130+
result = _get_process_env(container_env, skyhook_env, chroot_env)
131+
132+
# Verify input dicts weren't modified
133+
self.assertEqual(container_env, original_container)
134+
self.assertEqual(chroot_env, original_chroot)
135+
self.assertEqual(skyhook_env, original_skyhook)
136+
137+
# Verify result is correct
138+
self.assertEqual(result, {"VAR": "skyhook"})
139+
140+
@mock.patch('skyhook_agent.chroot_exec.subprocess.run')
141+
def test_get_chroot_env_basic_functionality(self, mock_subprocess):
142+
"""Test _get_chroot_env with typical environment output"""
143+
mock_result = mock.MagicMock()
144+
mock_result.stdout = "PATH=/usr/bin:/bin\nHOME=/root\nUSER=root\n"
145+
mock_subprocess.return_value = mock_result
146+
147+
result = _get_chroot_env()
148+
149+
expected = {
150+
"PATH": "/usr/bin:/bin",
151+
"HOME": "/root",
152+
"USER": "root"
153+
}
154+
self.assertEqual(result, expected)
155+
mock_subprocess.assert_called_once_with(["env"], capture_output=True, text=True)
156+
157+
@mock.patch('skyhook_agent.chroot_exec.subprocess.run')
158+
def test_get_chroot_env_with_multiple_equals(self, mock_subprocess):
159+
"""Test _get_chroot_env correctly handles lines with multiple '=' characters"""
160+
mock_result = mock.MagicMock()
161+
mock_result.stdout = "VAR1=value=with=equals\nVAR2=simple_value\n"
162+
mock_subprocess.return_value = mock_result
163+
164+
result = _get_chroot_env()
165+
166+
expected = {
167+
"VAR1": "value=with=equals", # Should split only on first =
168+
"VAR2": "simple_value"
169+
}
170+
self.assertEqual(result, expected)
171+
172+
@mock.patch('skyhook_agent.chroot_exec.subprocess.run')
173+
def test_get_chroot_env_ignores_lines_without_equals(self, mock_subprocess):
174+
"""Test _get_chroot_env ignores lines that don't contain '='"""
175+
mock_result = mock.MagicMock()
176+
mock_result.stdout = "PATH=/usr/bin\ninvalid_line_no_equals\nHOME=/root\n\n"
177+
mock_subprocess.return_value = mock_result
178+
179+
result = _get_chroot_env()
180+
181+
expected = {
182+
"PATH": "/usr/bin",
183+
"HOME": "/root"
184+
}
185+
self.assertEqual(result, expected)
186+
187+
@mock.patch('skyhook_agent.chroot_exec.subprocess.run')
188+
def test_get_chroot_env_with_empty_output(self, mock_subprocess):
189+
"""Test _get_chroot_env with empty subprocess output"""
190+
mock_result = mock.MagicMock()
191+
mock_result.stdout = ""
192+
mock_subprocess.return_value = mock_result
193+
194+
result = _get_chroot_env()
195+
196+
self.assertEqual(result, {})
197+
mock_subprocess.assert_called_once_with(["env"], capture_output=True, text=True)
198+
199+
@mock.patch('skyhook_agent.chroot_exec.subprocess.run')
200+
def test_get_chroot_env_with_empty_values(self, mock_subprocess):
201+
"""Test _get_chroot_env handles environment variables with empty values"""
202+
mock_result = mock.MagicMock()
203+
mock_result.stdout = "EMPTY_VAR=\nNORM_VAR=value\nANOTHER_EMPTY=\n"
204+
mock_subprocess.return_value = mock_result
205+
206+
result = _get_chroot_env()
207+
208+
expected = {
209+
"EMPTY_VAR": "",
210+
"NORM_VAR": "value",
211+
"ANOTHER_EMPTY": ""
212+
}
213+
self.assertEqual(result, expected)
214+
215+
@mock.patch('skyhook_agent.chroot_exec.subprocess.run')
216+
def test_get_chroot_env_with_whitespace_and_special_chars(self, mock_subprocess):
217+
"""Test _get_chroot_env handles values with whitespace and special characters"""
218+
mock_result = mock.MagicMock()
219+
mock_result.stdout = "VAR_WITH_SPACES=value with spaces\nSPECIAL_CHARS=!@#$%^&*()\nPATH=/usr/bin:/bin\n"
220+
mock_subprocess.return_value = mock_result
221+
222+
result = _get_chroot_env()
223+
224+
expected = {
225+
"VAR_WITH_SPACES": "value with spaces",
226+
"SPECIAL_CHARS": "!@#$%^&*()",
227+
"PATH": "/usr/bin:/bin"
228+
}
229+
self.assertEqual(result, expected)
230+
231+
232+
if __name__ == '__main__':
233+
unittest.main()

agent/skyhook-agent/tests/test_controller.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,9 @@ def test_run_step_replaces_environment_variables(
244244
["copy_dir/skyhook_dir/foo", "a", "foo"],
245245
log_file,
246246
f"{log_file}.err",
247-
env=dict(**os.environ, **{"STEP_ROOT": "copy_dir/skyhook_dir", "FOO": "foo", "SKYHOOK_DIR": "copy_dir"}),
248247
write_cmds=False,
249-
no_chmod=False
248+
no_chmod=False,
249+
env={"STEP_ROOT": "copy_dir/skyhook_dir", "SKYHOOK_DIR": "copy_dir"}
250250
)
251251
]
252252
)
@@ -690,9 +690,9 @@ def test_from_and_to_version_is_given_to_upgrade_step_as_env_var(self, run_mock,
690690
controller.get_log_file(
691691
f"{controller.get_host_path_for_steps(copy_dir)}/foo", f"/foo", config_data, root_dir
692692
),
693-
env=dict(**os.environ,
694-
**{"PREVIOUS_VERSION": "0.0.9", "CURRENT_VERSION": "1.0.0"},
695-
**{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir})
693+
env=dict(
694+
**{"PREVIOUS_VERSION": "0.0.9", "CURRENT_VERSION": "1.0.0"},
695+
**{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir})
696696
)
697697
])
698698

@@ -729,9 +729,9 @@ def test_from_and_to_version_is_given_to_upgradestep_class_as_env_var_and_args(s
729729
controller.get_log_file(
730730
f"{controller.get_host_path_for_steps(copy_dir)}/foo", f"/foo", config_data, root_dir
731731
),
732-
env=dict(**os.environ,
733-
**{"PREVIOUS_VERSION": "2024.07.28", "CURRENT_VERSION": "1.0.0"},
734-
**{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir})
732+
env=dict(
733+
**{"PREVIOUS_VERSION": "2024.07.28", "CURRENT_VERSION": "1.0.0"},
734+
**{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir})
735735
)
736736
])
737737

0 commit comments

Comments
 (0)