|
| 1 | +import os |
| 2 | +import sys |
| 3 | +import time |
1 | 4 | import asyncio
|
| 5 | +import tempfile |
| 6 | +import subprocess |
| 7 | +from metaflow import Run |
| 8 | +from typing import List, Dict |
2 | 9 | from concurrent.futures import ProcessPoolExecutor
|
3 |
| -import subprocess, sys, os |
4 |
| -from typing import List, Dict, Optional |
5 | 10 |
|
6 | 11 |
|
7 |
| -def convert_params_to_cli_args(params: List[Dict]): |
8 |
| - converted_params = [item for pair in params.items() for item in pair] |
9 |
| - converted_params = [ |
10 |
| - str(val) if idx % 2 else f"--{val}" for idx, val in enumerate(converted_params) |
11 |
| - ] |
12 |
| - return converted_params |
| 12 | +def cli_runner(flow_file: str, command: str, args: List[str], env_vars: Dict): |
| 13 | + process = subprocess.Popen( |
| 14 | + [sys.executable, flow_file, command, *args], |
| 15 | + stdout=subprocess.PIPE, |
| 16 | + stderr=subprocess.PIPE, |
| 17 | + env=env_vars, |
| 18 | + ) |
| 19 | + return process |
13 | 20 |
|
14 | 21 |
|
15 |
| -def cli_runner(flow_file: str, command: str, args: List[str], env_vars: Dict): |
16 |
| - result = subprocess.run([sys.executable, flow_file, command, *args], env=env_vars) |
17 |
| - return result.returncode |
| 22 | +def read_from_file_when_ready(file_pointer): |
| 23 | + content = file_pointer.read().decode() |
| 24 | + while not content: |
| 25 | + time.sleep(0.1) |
| 26 | + content = file_pointer.read().decode() |
| 27 | + return content |
18 | 28 |
|
19 | 29 |
|
20 | 30 | class Pool(object):
|
21 | 31 | def __init__(
|
22 | 32 | self,
|
23 | 33 | flow_file: str,
|
24 | 34 | num_processes: int,
|
25 |
| - profile: Optional[str] = None, |
26 |
| - with_context: Optional[str] = None, |
27 | 35 | ):
|
28 | 36 | self.flow_file = flow_file
|
29 | 37 | self.num_processes = num_processes
|
30 |
| - self.profile = profile |
31 |
| - self.with_context = with_context |
| 38 | + self.runner = Runner(self.flow_file) |
32 | 39 |
|
33 |
| - def map(self, params: List[Dict], command: str = "run"): |
34 |
| - return asyncio.run(self._map(params, command)) |
| 40 | + def __enter__(self): |
| 41 | + return self |
35 | 42 |
|
36 |
| - async def _map(self, params: List[Dict], command: str = "run"): |
37 |
| - self.command = command |
| 43 | + def map(self, paramater_space: List[Dict], blocking: bool = False): |
| 44 | + return asyncio.run(self._map(paramater_space, blocking)) |
| 45 | + |
| 46 | + async def _map(self, paramater_space: List[Dict], blocking: bool = False): |
38 | 47 | loop = asyncio.get_running_loop()
|
39 | 48 | tasks, runs = [], []
|
40 | 49 | with ProcessPoolExecutor(max_workers=self.num_processes) as executor:
|
41 |
| - for each_param in params: |
42 |
| - tasks.append(loop.run_in_executor(executor, self._execute, each_param)) |
| 50 | + for set_of_params in paramater_space: |
| 51 | + tasks.append( |
| 52 | + loop.run_in_executor( |
| 53 | + executor, self.runner.run, set_of_params, blocking |
| 54 | + ) |
| 55 | + ) |
43 | 56 | for done in asyncio.as_completed(tasks):
|
44 | 57 | runs.append(await done)
|
45 | 58 |
|
46 | 59 | return runs
|
47 | 60 |
|
48 |
| - def _execute(self, params: List[Dict]): |
49 |
| - env_vars = os.environ.copy() |
50 |
| - if self.profile: |
51 |
| - env_vars.update({"METAFLOW_PROFILE": self.profile}) |
52 |
| - params = convert_params_to_cli_args(params) |
53 |
| - print(f"Starting for {params}") |
54 |
| - result = cli_runner(self.flow_file, self.command, params, env_vars) |
55 |
| - print(f"Result ready for {params}") |
56 |
| - return {" ".join(params): result} |
| 61 | + def __exit__(self, exc_type, exc_value, traceback): |
| 62 | + pass |
| 63 | + |
| 64 | + |
| 65 | +# consider more args in constructor, |
| 66 | +# list of them is available using: |
| 67 | +# `python ../try.py --help` |
| 68 | +class Runner(object): |
| 69 | + def __init__( |
| 70 | + self, |
| 71 | + flow_file: str, |
| 72 | + ): |
| 73 | + self.flow_file = flow_file |
57 | 74 |
|
58 | 75 | def __enter__(self):
|
59 |
| - print("Start....") |
60 | 76 | return self
|
61 | 77 |
|
| 78 | + # consider more args for run method, |
| 79 | + # list of them is available using: |
| 80 | + # `python ../try.py run --help` |
| 81 | + def run( |
| 82 | + self, |
| 83 | + params: Dict, # eventually, parse the file for parameters? contains stuff like {'alpha': 30} |
| 84 | + blocking: bool = False, |
| 85 | + ): |
| 86 | + env_vars = os.environ.copy() |
| 87 | + |
| 88 | + params_as_cli_args = [] |
| 89 | + for (k, v) in params.items(): |
| 90 | + params_as_cli_args.extend(["--" + str(k), str(v)]) |
| 91 | + |
| 92 | + with tempfile.TemporaryDirectory() as temp_dir: |
| 93 | + tfp_flow_name = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) |
| 94 | + tfp_run_id = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) |
| 95 | + |
| 96 | + params_as_cli_args.extend(["--run-id-file", tfp_run_id.name]) |
| 97 | + params_as_cli_args.extend(["--flow-name-file", tfp_flow_name.name]) |
| 98 | + |
| 99 | + process = cli_runner( |
| 100 | + self.flow_file, |
| 101 | + command="run", |
| 102 | + args=params_as_cli_args, |
| 103 | + env_vars=env_vars, |
| 104 | + ) |
| 105 | + if blocking: |
| 106 | + process.wait() |
| 107 | + |
| 108 | + flow_name = read_from_file_when_ready(tfp_flow_name) |
| 109 | + run_id = read_from_file_when_ready(tfp_run_id) |
| 110 | + |
| 111 | + pathspec_components = (flow_name, run_id) |
| 112 | + run_object = Run("/".join(pathspec_components), _namespace_check=False) |
| 113 | + |
| 114 | + return run_object |
| 115 | + |
62 | 116 | def __exit__(self, exc_type, exc_value, traceback):
|
63 |
| - print("End...") |
| 117 | + pass |
0 commit comments