1
1
import os
2
2
import sys
3
- import time
4
3
import tempfile
4
+ import time
5
5
from typing import Dict , Iterator , Optional , Tuple
6
- from metaflow import Run
7
- from .subprocess_manager import SubprocessManager , CommandManager
6
+
7
+ from metaflow import Run , metadata
8
+
9
+ from .subprocess_manager import CommandManager , SubprocessManager
10
+
11
+
12
+ def clear_and_set_os_environ (env : Dict ):
13
+ os .environ .clear ()
14
+ os .environ .update (env )
8
15
9
16
10
17
def read_from_file_when_ready (file_path : str , timeout : float = 5 ):
@@ -227,7 +234,8 @@ def __init__(
227
234
from metaflow .runner .click_api import MetaflowAPI
228
235
229
236
self .flow_file = flow_file
230
- self .env_vars = os .environ .copy ()
237
+ self .old_env = os .environ .copy ()
238
+ self .env_vars = self .old_env .copy ()
231
239
self .env_vars .update (env or {})
232
240
if profile :
233
241
self .env_vars ["METAFLOW_PROFILE" ] = profile
@@ -241,9 +249,21 @@ def __enter__(self) -> "Runner":
241
249
async def __aenter__ (self ) -> "Runner" :
242
250
return self
243
251
244
- def __get_executing_run (self , tfp_pathspec , command_obj ):
252
+ def __get_executing_run (self , tfp_runner_attribute , command_obj ):
253
+ # When two 'Runner' executions are done sequentially i.e. one after the other
254
+ # the 2nd run kinda uses the 1st run's previously set metadata and
255
+ # environment variables.
256
+
257
+ # It is thus necessary to set them to correct values before we return
258
+ # the Run object.
245
259
try :
246
- pathspec = read_from_file_when_ready (tfp_pathspec .name , timeout = 10 )
260
+ # Set the environment variables to what they were before the run executed.
261
+ clear_and_set_os_environ (self .old_env )
262
+
263
+ # Set the correct metadata from the runner_attribute file corresponding to this run.
264
+ content = read_from_file_when_ready (tfp_runner_attribute .name , timeout = 10 )
265
+ metadata_for_flow , pathspec = content .split (":" , maxsplit = 1 )
266
+ metadata (metadata_for_flow )
247
267
run_object = Run (pathspec , _namespace_check = False )
248
268
return ExecutingRun (self , command_obj , run_object )
249
269
except TimeoutError as e :
@@ -280,17 +300,19 @@ def run(self, show_output: bool = False, **kwargs) -> ExecutingRun:
280
300
ExecutingRun object for this run.
281
301
"""
282
302
with tempfile .TemporaryDirectory () as temp_dir :
283
- tfp_pathspec = tempfile .NamedTemporaryFile (dir = temp_dir , delete = False )
303
+ tfp_runner_attribute = tempfile .NamedTemporaryFile (
304
+ dir = temp_dir , delete = False
305
+ )
284
306
command = self .api (** self .top_level_kwargs ).run (
285
- pathspec_file = tfp_pathspec .name , ** kwargs
307
+ runner_attribute_file = tfp_runner_attribute .name , ** kwargs
286
308
)
287
309
288
310
pid = self .spm .run_command (
289
311
[sys .executable , * command ], env = self .env_vars , show_output = show_output
290
312
)
291
313
command_obj = self .spm .get (pid )
292
314
293
- return self .__get_executing_run (tfp_pathspec , command_obj )
315
+ return self .__get_executing_run (tfp_runner_attribute , command_obj )
294
316
295
317
def resume (self , show_output : bool = False , ** kwargs ):
296
318
"""
@@ -315,17 +337,19 @@ def resume(self, show_output: bool = False, **kwargs):
315
337
ExecutingRun object for this resumed run.
316
338
"""
317
339
with tempfile .TemporaryDirectory () as temp_dir :
318
- tfp_pathspec = tempfile .NamedTemporaryFile (dir = temp_dir , delete = False )
340
+ tfp_runner_attribute = tempfile .NamedTemporaryFile (
341
+ dir = temp_dir , delete = False
342
+ )
319
343
command = self .api (** self .top_level_kwargs ).resume (
320
- pathspec_file = tfp_pathspec .name , ** kwargs
344
+ runner_attribute_file = tfp_runner_attribute .name , ** kwargs
321
345
)
322
346
323
347
pid = self .spm .run_command (
324
348
[sys .executable , * command ], env = self .env_vars , show_output = show_output
325
349
)
326
350
command_obj = self .spm .get (pid )
327
351
328
- return self .__get_executing_run (tfp_pathspec , command_obj )
352
+ return self .__get_executing_run (tfp_runner_attribute , command_obj )
329
353
330
354
async def async_run (self , ** kwargs ) -> ExecutingRun :
331
355
"""
@@ -344,17 +368,20 @@ async def async_run(self, **kwargs) -> ExecutingRun:
344
368
ExecutingRun object for this run.
345
369
"""
346
370
with tempfile .TemporaryDirectory () as temp_dir :
347
- tfp_pathspec = tempfile .NamedTemporaryFile (dir = temp_dir , delete = False )
371
+ tfp_runner_attribute = tempfile .NamedTemporaryFile (
372
+ dir = temp_dir , delete = False
373
+ )
348
374
command = self .api (** self .top_level_kwargs ).run (
349
- pathspec_file = tfp_pathspec .name , ** kwargs
375
+ runner_attribute_file = tfp_runner_attribute .name , ** kwargs
350
376
)
351
377
352
378
pid = await self .spm .async_run_command (
353
- [sys .executable , * command ], env = self .env_vars
379
+ [sys .executable , * command ],
380
+ env = self .env_vars ,
354
381
)
355
382
command_obj = self .spm .get (pid )
356
383
357
- return self .__get_executing_run (tfp_pathspec , command_obj )
384
+ return self .__get_executing_run (tfp_runner_attribute , command_obj )
358
385
359
386
async def async_resume (self , ** kwargs ):
360
387
"""
@@ -373,17 +400,20 @@ async def async_resume(self, **kwargs):
373
400
ExecutingRun object for this resumed run.
374
401
"""
375
402
with tempfile .TemporaryDirectory () as temp_dir :
376
- tfp_pathspec = tempfile .NamedTemporaryFile (dir = temp_dir , delete = False )
403
+ tfp_runner_attribute = tempfile .NamedTemporaryFile (
404
+ dir = temp_dir , delete = False
405
+ )
377
406
command = self .api (** self .top_level_kwargs ).resume (
378
- pathspec_file = tfp_pathspec .name , ** kwargs
407
+ runner_attribute_file = tfp_runner_attribute .name , ** kwargs
379
408
)
380
409
381
410
pid = await self .spm .async_run_command (
382
- [sys .executable , * command ], env = self .env_vars
411
+ [sys .executable , * command ],
412
+ env = self .env_vars ,
383
413
)
384
414
command_obj = self .spm .get (pid )
385
415
386
- return self .__get_executing_run (tfp_pathspec , command_obj )
416
+ return self .__get_executing_run (tfp_runner_attribute , command_obj )
387
417
388
418
def __exit__ (self , exc_type , exc_value , traceback ):
389
419
self .spm .cleanup ()
0 commit comments