Skip to content

Commit 2a0e01b

Browse files
committed
Support user specified python executables for remote bootstrapping and save logs functionality
1 parent dc6af41 commit 2a0e01b

File tree

7 files changed

+23
-18
lines changed

7 files changed

+23
-18
lines changed

metaflow/metaflow_environment.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def decospecs(self):
4242
"""
4343
return ()
4444

45-
def bootstrap_commands(self, step_name, datastore_type):
45+
def bootstrap_commands(self, step_name, datastore_type, default_executable_path=None):
4646
"""
4747
A list of shell commands to bootstrap this environment in a remote runtime.
4848
"""

metaflow/mflog/__init__.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,23 @@
4141
" }" % TASK_LOG_SOURCE
4242
)
4343

44-
BASH_SAVE_LOGS_ARGS = ["python", "-m", "metaflow.mflog.save_logs"]
45-
BASH_SAVE_LOGS = " ".join(BASH_SAVE_LOGS_ARGS)
44+
45+
def _get_bash_capture_log(default_python_executable="python"):
46+
bash_save_log_args = [default_python_executable, "-m", "metaflow.mflog.save_logs"]
47+
return bash_save_log_args, " ".join(bash_save_log_args)
4648

4749

4850
# this function returns a bash expression that redirects stdout
4951
# and stderr of the given bash expression to mflog.tee
50-
def bash_capture_logs(bash_expr, var_transform=None):
52+
def bash_capture_logs(bash_expr, var_transform=None, default_python_executable_path="python"):
5153
if var_transform is None:
5254
var_transform = lambda s: "$%s" % s
5355

54-
cmd = "python -m metaflow.mflog.tee %s %s"
56+
cmd = "%s -m metaflow.mflog.tee %s %s"
5557
parts = (
5658
bash_expr,
57-
cmd % (TASK_LOG_SOURCE, var_transform("MFLOG_STDOUT")),
58-
cmd % (TASK_LOG_SOURCE, var_transform("MFLOG_STDERR")),
59+
cmd % (default_python_executable_path, TASK_LOG_SOURCE, var_transform("MFLOG_STDOUT")),
60+
cmd % (default_python_executable_path, TASK_LOG_SOURCE, var_transform("MFLOG_STDERR")),
5961
)
6062
return "(%s) 1>> >(%s) 2>> >(%s >&2)" % parts
6163

metaflow/mflog/save_logs_periodically.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from threading import Thread
66

77
from metaflow.sidecar import MessageTypes
8-
from . import update_delay, BASH_SAVE_LOGS_ARGS
8+
from . import update_delay, _get_bash_capture_log
99

1010

1111
class SaveLogsPeriodicallySidecar(object):
@@ -38,7 +38,7 @@ def _file_size(path):
3838
if new_sizes != sizes:
3939
sizes = new_sizes
4040
try:
41-
subprocess.call(BASH_SAVE_LOGS_ARGS)
41+
subprocess.call(_get_bash_capture_log()[0])
4242
except:
4343
pass
4444
time.sleep(update_delay(time.time() - start_time))

metaflow/plugins/argo/argo_workflows.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
UI_URL,
4646
)
4747
from metaflow.metaflow_config_funcs import config_values
48-
from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
48+
from metaflow.mflog import _get_bash_capture_log, bash_capture_logs, export_mflog_env_vars
4949
from metaflow.parameters import deploy_time_eval
5050
from metaflow.plugins.kubernetes.kubernetes import (
5151
parse_kube_keyvalue_list,
@@ -1194,7 +1194,7 @@ def _container_templates(self):
11941194

11951195
cmd_str = "%s; c=$?; %s; exit $c" % (
11961196
" && ".join([init_cmds, bash_capture_logs(" && ".join(step_cmds))]),
1197-
BASH_SAVE_LOGS,
1197+
_get_bash_capture_log()[1],
11981198
)
11991199
cmds = shlex.split('bash -c "%s"' % cmd_str)
12001200

metaflow/plugins/aws/batch/batch.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
export_mflog_env_vars,
3131
bash_capture_logs,
3232
tail_logs,
33-
BASH_SAVE_LOGS,
33+
_get_bash_capture_log,
3434
)
3535

3636
from .batch_client import BatchClient
@@ -92,7 +92,7 @@ def _command(self, environment, code_package_url, step_name, step_cmds, task_spe
9292
# Note that if step_expr OOMs, this tail expression is never executed.
9393
# We lose the last logs in this scenario (although they are visible
9494
# still through AWS CloudWatch console).
95-
cmd_str += "c=$?; %s; exit $c" % BASH_SAVE_LOGS
95+
cmd_str += "c=$?; %s; exit $c" % _get_bash_capture_log()[1]
9696
return shlex.split('bash -c "%s"' % cmd_str)
9797

9898
def _search_jobs(self, flow_name, run_id, user):

metaflow/plugins/kubernetes/kubernetes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from metaflow.metaflow_config_funcs import config_values
4242

4343
from metaflow.mflog import (
44-
BASH_SAVE_LOGS,
44+
_get_bash_capture_log,
4545
bash_capture_logs,
4646
export_mflog_env_vars,
4747
get_log_tailer,
@@ -130,7 +130,7 @@ def _command(
130130
# We lose the last logs in this scenario.
131131
#
132132
# TODO: Capture hard exit logs in Kubernetes.
133-
cmd_str += "c=$?; %s; exit $c" % BASH_SAVE_LOGS
133+
cmd_str += "c=$?; %s; exit $c" % _get_bash_capture_log()
134134
# For supporting sandboxes, ensure that a custom script is executed before
135135
# anything else is executed. The script is passed in as an env var.
136136
cmd_str = (

metaflow/plugins/pypi/conda_environment.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -331,19 +331,22 @@ def add_to_package(self):
331331
files.append((manifest, os.path.basename(manifest)))
332332
return files
333333

334-
def bootstrap_commands(self, step_name, datastore_type):
334+
def bootstrap_commands(self, step_name, datastore_type, default_executable_path=None):
335335
# Bootstrap conda and execution environment for step
336336
step = next(step for step in self.flow if step.name == step_name)
337337
id_ = self.get_environment(step).get("id_")
338338
if id_:
339+
python_executable_path = (
340+
"python" if default_executable_path is None else default_executable_path
341+
)
339342
return [
340343
"echo 'Bootstrapping virtual environment...'",
341344
# We have to prevent the tracing module from loading,
342345
# as the bootstrapping process uses the internal S3 client which would fail to import tracing
343346
# due to the required dependencies being bundled into the conda environment,
344347
# which is yet to be initialized at this point.
345-
'DISABLE_TRACING=True python -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
346-
% (self.flow.name, id_, self.datastore_type),
348+
'DISABLE_TRACING=True %s -m metaflow.plugins.pypi.bootstrap "%s" %s "%s" linux-64'
349+
% (python_executable_path, self.flow.name, id_, self.datastore_type),
347350
"echo 'Environment bootstrapped.'",
348351
"export PATH=$PATH:$(pwd)/micromamba",
349352
]

0 commit comments

Comments
 (0)