Skip to content

Commit eb91727

Browse files
committed
feat(agent): Add env var SKYHOOK_AGENT_WRITE_LOGS to be able to toggle log files on disk
fix(agent): wrong call to make the log_file_glob for cleanup. If it is set to true it will create the logs files. If it is set to false it will NOT create the log files but it will still write to the stdout/stderr
1 parent 9a2ff12 commit eb91727

File tree

2 files changed

+272
-23
lines changed

2 files changed

+272
-23
lines changed

agent/skyhook-agent/src/skyhook_agent/controller.py

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818

1919

20+
import contextlib
21+
from multiprocessing import context
2022
import sys
2123
import os
2224
import shutil
@@ -69,7 +71,9 @@ def _get_env_config() -> tuple[str]:
6971

7072
SKYHOOK_LOG_DIR = os.getenv("SKYHOOK_LOG_DIR", "/var/log/skyhook")
7173

72-
return SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR
74+
SKYHOOK_AGENT_WRITE_LOGS = os.getenv("SKYHOOK_AGENT_WRITE_LOGS", "true").lower() == 'true'
75+
76+
return SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR, SKYHOOK_AGENT_WRITE_LOGS
7377

7478
def _get_package_information(config_data: dict) -> tuple[str, str]:
7579
return config_data["package_name"], config_data["package_version"]
@@ -129,15 +133,43 @@ async def _stream_process(
129133
sink.flush()
130134
break
131135

136+
# A file like context manager that black holes all writes to it. Does not need to implement read
137+
class NullWriter:
138+
"""A file-like context manager that discards all writes."""
139+
140+
def write(self, *args, **kwargs):
141+
# Swallow everything and return len to mimic file behaviour if needed
142+
if args:
143+
return len(args[0])
144+
return 0
145+
146+
def flush(self):
147+
pass
148+
149+
def close(self):
150+
pass
151+
152+
def __enter__(self):
153+
return self
132154

133-
async def tee(chroot_dir: str, cmd: List[str], stdout_sink_path: str, stderr_sink_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, **kwargs):
155+
def __exit__(self, exc_type, exc_val, exc_tb):
156+
# Nothing to cleanup, obviously
157+
return False
158+
159+
160+
async def tee(chroot_dir: str, cmd: List[str], stdout_sink_path: str, stderr_sink_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, write_logs: bool=True, **kwargs):
134161
"""
135162
Run the cmd in a subprocess and keep the stream of stdout/stderr and merge both into
136163
the sink_path as a log.
137164
"""
138165
# get the directory of the script
139166
script_dir = os.path.dirname(os.path.abspath(__file__))
140-
with open(stdout_sink_path, "w") as stdout_sink_f, open(stderr_sink_path, "w") as stderr_sink_f:
167+
# Switch out the opens with nulls in the event of not wanting to write files
168+
if write_logs:
169+
files = (lambda : open(stdout_sink_path, 'w'), lambda: open(stderr_sink_path, 'w'))
170+
else:
171+
files = (lambda: NullWriter(), lambda: NullWriter())
172+
with files[0]() as stdout_sink_f, files[1]() as stderr_sink_f:
141173
if write_cmds:
142174
sys.stdout.write(" ".join(cmd) + "\n")
143175
stdout_sink_f.write(" ".join(cmd) + "\n")
@@ -172,7 +204,7 @@ def get_host_path_for_steps(copy_dir: str):
172204
return f"{copy_dir}/skyhook_dir"
173205

174206
def get_skyhook_directory(root_mount: str) -> str:
175-
_, _, SKYHOOK_ROOT_DIR, _ = _get_env_config()
207+
_, _, SKYHOOK_ROOT_DIR, _, _ = _get_env_config()
176208
return f"{root_mount}{SKYHOOK_ROOT_DIR}"
177209

178210
def get_flag_dir(root_mount: str) -> str:
@@ -182,7 +214,7 @@ def get_history_dir(root_mount: str) -> str:
182214
return f"{get_skyhook_directory(root_mount)}/history"
183215

184216
def get_log_dir(root_mount: str) -> str:
185-
_, _, _, SKYHOOK_LOG_DIR = _get_env_config()
217+
_, _, _, SKYHOOK_LOG_DIR, _ = _get_env_config()
186218
return f"{root_mount}{SKYHOOK_LOG_DIR}"
187219

188220
def get_log_file(step_path: str, copy_dir: str, config_data: dict, root_mount: str, timestamp: str=None) -> str:
@@ -220,21 +252,23 @@ def set_flag(flag_file: str, msg: str = "") -> None:
220252
f.write(msg)
221253

222254

223-
def _run(chroot_dir: str, cmds: list[str], log_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, **kwargs) -> int:
255+
def _run(chroot_dir: str, cmds: list[str], log_path: str|None, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, write_logs: bool=True, **kwargs) -> int:
224256
"""
225257
Synchronous wrapper around the tee command to have logs written to disk
226258
"""
227259
# "tee" the stdout and stderr to a file to log the step results
260+
stderr_path = f"{log_path}.err" if log_path else None
228261

229262
result = asyncio.run(
230263
tee(
231264
chroot_dir,
232265
cmds,
233266
log_path,
234-
f"{log_path}.err",
267+
stderr_path,
235268
write_cmds=write_cmds,
236269
no_chmod=no_chmod,
237270
env=env,
271+
write_logs=write_logs,
238272
**kwargs
239273
)
240274
)
@@ -283,7 +317,11 @@ def run_step(
283317
return True
284318

285319
time.sleep(1)
286-
log_file = get_log_file(step_path, copy_dir, config_data, chroot_dir)
320+
_, _, _, _, SKYHOOK_AGENT_WRITE_LOGS = _get_env_config()
321+
if SKYHOOK_AGENT_WRITE_LOGS:
322+
log_file = get_log_file(step_path, copy_dir, config_data, chroot_dir)
323+
else:
324+
log_file = None
287325

288326
# Compile additional environment variables
289327
env = {}
@@ -294,9 +332,11 @@ def run_step(
294332
chroot_dir,
295333
[step_path, *step.arguments],
296334
log_file,
297-
env=env)
335+
env=env,
336+
write_logs=SKYHOOK_AGENT_WRITE_LOGS)
298337

299-
cleanup_old_logs(get_log_file(step_path, copy_dir, config_data, "*"))
338+
if SKYHOOK_AGENT_WRITE_LOGS:
339+
cleanup_old_logs(get_log_file(step_path, copy_dir, config_data, chroot_dir, "*"))
300340
if return_code not in step.returncodes:
301341
print(f"FAILED: {step.path} {' '.join(step.arguments)} {return_code}")
302342
return True
@@ -421,7 +461,7 @@ def summarize_check_results(results: list[bool], step_data: dict[Mode, list[Step
421461
return False
422462

423463
def make_config_data_from_resource_id() -> dict:
424-
SKYHOOK_RESOURCE_ID, _, _, _ = _get_env_config()
464+
SKYHOOK_RESOURCE_ID, _, _, _, _ = _get_env_config()
425465

426466
# Interrupts don't really have config data we can read from the Package as it is run standalone.
427467
# So read it off of SKYHOOK_RESOURCE_ID instead
@@ -441,7 +481,7 @@ def do_interrupt(interrupt_data: str, root_mount: str, copy_dir: str) -> bool:
441481
def _make_interrupt_flag(interrupt_dir: str, interrupt_id: int) -> str:
442482
return f"{interrupt_dir}/{interrupt_id}.complete"
443483

444-
SKYHOOK_RESOURCE_ID, _, _, _ = _get_env_config()
484+
SKYHOOK_RESOURCE_ID, _, _, _, _ = _get_env_config()
445485
config_data = make_config_data_from_resource_id()
446486

447487
interrupt = interrupts.inflate(interrupt_data)
@@ -509,7 +549,7 @@ def main(mode: Mode, root_mount: str, copy_dir: str, interrupt_data: None|str, a
509549
if mode == Mode.INTERRUPT:
510550
return do_interrupt(interrupt_data, root_mount, copy_dir)
511551

512-
_, SKYHOOK_DATA_DIR, _, _ = _get_env_config()
552+
_, SKYHOOK_DATA_DIR, _, _, _ = _get_env_config()
513553

514554
# Check to see if the directory has already been copied down. If it hasn't assume that we
515555
# are running in legacy mode and copy the directory down.
@@ -651,12 +691,13 @@ def cli(sys_argv: list[str]=sys.argv):
651691
print(str.center("ENV CONFIGURATION", 20, "-"))
652692
print(f"COPY_RESOLV: {copy_resolv}")
653693
print(f"OVERLAY_ALWAYS_RUN_STEP: {always_run_step}")
654-
SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR = _get_env_config()
694+
SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR, SKYHOOK_AGENT_WRITE_LOGS = _get_env_config()
655695
print(f"SKYHOOK_RESOURCE_ID: {SKYHOOK_RESOURCE_ID}")
656696
print(f"SKYHOOK_DATA_DIR: {SKYHOOK_DATA_DIR}")
657697
print(f"SKYHOOK_ROOT_DIR: {SKYHOOK_ROOT_DIR}")
658698
print(f"SKYHOOK_LOG_DIR: {SKYHOOK_LOG_DIR}")
659699
print(f"SKYHOOK_AGENT_BUFFER_LIMIT: {buff_size}")
700+
print(f"SKYHOOK_AGENT_WRITE_LOGS: {SKYHOOK_AGENT_WRITE_LOGS}")
660701
print(str.center("Directory CONFIGURATION", 20, "-"))
661702
# print flag dir and log dir
662703
config_data = make_config_data_from_resource_id()

0 commit comments

Comments
 (0)