Skip to content

Commit 237f48e

Browse files
feat(optimizer): Add get_best_results API to OptimizerClient (kubeflow#152)
* feat(optimizer): Add get_best_results API to OptimizerClient Signed-off-by: kramaranya <kramaranya15@gmail.com> * Update kubeflow/optimizer/api/optimizer_client.py Co-authored-by: Andrey Velichkevich <andrey.velichkevich@gmail.com> Signed-off-by: Anya Kramar <akramar@redhat.com> --------- Signed-off-by: kramaranya <kramaranya15@gmail.com> Signed-off-by: Anya Kramar <akramar@redhat.com> Co-authored-by: Andrey Velichkevich <andrey.velichkevich@gmail.com>
1 parent bef3dae commit 237f48e

File tree

5 files changed

+82
-47
lines changed

5 files changed

+82
-47
lines changed

kubeflow/optimizer/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@
2020

2121
# Import the Kubeflow Optimizer types.
2222
from kubeflow.optimizer.types.algorithm_types import GridSearch, RandomSearch
23-
from kubeflow.optimizer.types.optimization_types import Objective, OptimizationJob, TrialConfig
23+
from kubeflow.optimizer.types.optimization_types import (
24+
Objective,
25+
OptimizationJob,
26+
Result,
27+
TrialConfig,
28+
)
2429
from kubeflow.optimizer.types.search_types import Search
2530

2631
# Import the Kubeflow Trainer types.
@@ -33,6 +38,7 @@
3338
"OptimizationJob",
3439
"OptimizerClient",
3540
"RandomSearch",
41+
"Result",
3642
"Search",
3743
"TrainJobTemplate",
3844
"TrialConfig",

kubeflow/optimizer/api/optimizer_client.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from kubeflow.optimizer.types.optimization_types import (
2424
Objective,
2525
OptimizationJob,
26-
Trial,
26+
Result,
2727
TrialConfig,
2828
)
2929
from kubeflow.trainer.types.types import TrainJobTemplate
@@ -160,6 +160,25 @@ def get_job_logs(
160160
"""
161161
return self.backend.get_job_logs(name=name, trial_name=trial_name, follow=follow)
162162

163+
def get_best_results(self, name: str) -> Optional[Result]:
164+
"""Get the best hyperparameters and metrics from an OptimizationJob.
165+
166+
This method retrieves the optimal hyperparameters and their corresponding metrics
167+
from the best trial found during the optimization process.
168+
169+
Args:
170+
name: Name of the OptimizationJob.
171+
172+
Returns:
173+
A Result object containing the best hyperparameters and metrics, or None if
174+
no best trial is available yet.
175+
176+
Raises:
177+
TimeoutError: Timeout to get an OptimizationJob.
178+
RuntimeError: Failed to get an OptimizationJob.
179+
"""
180+
return self.backend.get_best_results(name=name)
181+
163182
def wait_for_job_status(
164183
self,
165184
name: str,
@@ -193,22 +212,6 @@ def wait_for_job_status(
193212
polling_interval=polling_interval,
194213
)
195214

196-
def get_best_trial(self, name: str) -> Optional[Trial]:
197-
"""Get the current best Trial for an OptimizationJob.
198-
199-
Args:
200-
name: Name of the OptimizationJob.
201-
202-
Returns:
203-
The current best Trial with parameters, metrics, and associated TrainJob.
204-
Returns None if the best trial is not available yet.
205-
206-
Raises:
207-
TimeoutError: Timeout to get OptimizationJob.
208-
RuntimeError: Failed to get OptimizationJob.
209-
"""
210-
return self.backend.get_best_trial(name=name)
211-
212215
def delete_job(self, name: str):
213216
"""Delete the OptimizationJob.
214217

kubeflow/optimizer/backends/base.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from kubeflow.optimizer.types.optimization_types import (
2222
Objective,
2323
OptimizationJob,
24-
Trial,
24+
Result,
2525
TrialConfig,
2626
)
2727
from kubeflow.trainer.types.types import TrainJobTemplate
@@ -57,6 +57,10 @@ def get_job_logs(
5757
) -> Iterator[str]:
5858
raise NotImplementedError()
5959

60+
@abc.abstractmethod
61+
def get_best_results(self, name: str) -> Optional[Result]:
62+
raise NotImplementedError()
63+
6064
@abc.abstractmethod
6165
def wait_for_job_status(
6266
self,
@@ -67,10 +71,6 @@ def wait_for_job_status(
6771
) -> OptimizationJob:
6872
raise NotImplementedError()
6973

70-
@abc.abstractmethod
71-
def get_best_trial(self, name: str) -> Optional[Trial]:
72-
raise NotImplementedError()
73-
7474
@abc.abstractmethod
7575
def delete_job(self, name: str):
7676
raise NotImplementedError()

kubeflow/optimizer/backends/kubernetes/backend.py

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
Metric,
3636
Objective,
3737
OptimizationJob,
38+
Result,
3839
Trial,
3940
TrialConfig,
4041
)
@@ -223,7 +224,7 @@ def get_job_logs(
223224
# Determine what trial to get logs from.
224225
if trial_name is None:
225226
# Get logs from the best current trial.
226-
best_trial = self.get_best_trial(name)
227+
best_trial = self._get_best_trial(name)
227228
if best_trial is None:
228229
# Get first trial if available.
229230
optimization_job = self.get_job(name)
@@ -249,6 +250,18 @@ def get_job_logs(
249250
pod_name=pod_name, container_name=container_name, follow=follow
250251
)
251252

253+
def get_best_results(self, name: str) -> Optional[Result]:
254+
"""Get the best hyperparameters and metrics from an OptimizationJob"""
255+
best_trial = self._get_best_trial(name)
256+
257+
if best_trial is None:
258+
return None
259+
260+
return Result(
261+
parameters=best_trial.parameters,
262+
metrics=best_trial.metrics,
263+
)
264+
252265
def wait_for_job_status(
253266
self,
254267
name: str,
@@ -293,7 +306,29 @@ def wait_for_job_status(
293306
f"{status}"
294307
)
295308

296-
def get_best_trial(self, name: str) -> Optional[Trial]:
309+
def delete_job(self, name: str):
310+
"""Delete the OptimizationJob"""
311+
312+
try:
313+
self.custom_api.delete_namespaced_custom_object(
314+
constants.GROUP,
315+
constants.VERSION,
316+
self.namespace,
317+
constants.EXPERIMENT_PLURAL,
318+
name=name,
319+
)
320+
except multiprocessing.TimeoutError as e:
321+
raise TimeoutError(
322+
f"Timeout to delete {constants.OPTIMIZATION_JOB_KIND}: {self.namespace}/{name}"
323+
) from e
324+
except Exception as e:
325+
raise RuntimeError(
326+
f"Failed to delete {constants.OPTIMIZATION_JOB_KIND}: {self.namespace}/{name}"
327+
) from e
328+
329+
logger.debug(f"{constants.OPTIMIZATION_JOB_KIND} {self.namespace}/{name} has been deleted")
330+
331+
def _get_best_trial(self, name: str) -> Optional[Trial]:
297332
"""Get the best current Trial for the OptimizationJob"""
298333
optimization_job = self.__get_experiment_cr(name)
299334

@@ -338,28 +373,6 @@ def get_best_trial(self, name: str) -> Optional[Trial]:
338373

339374
return None
340375

341-
def delete_job(self, name: str):
342-
"""Delete the OptimizationJob"""
343-
344-
try:
345-
self.custom_api.delete_namespaced_custom_object(
346-
constants.GROUP,
347-
constants.VERSION,
348-
self.namespace,
349-
constants.EXPERIMENT_PLURAL,
350-
name=name,
351-
)
352-
except multiprocessing.TimeoutError as e:
353-
raise TimeoutError(
354-
f"Timeout to delete {constants.OPTIMIZATION_JOB_KIND}: {self.namespace}/{name}"
355-
) from e
356-
except Exception as e:
357-
raise RuntimeError(
358-
f"Failed to delete {constants.OPTIMIZATION_JOB_KIND}: {self.namespace}/{name}"
359-
) from e
360-
361-
logger.debug(f"{constants.OPTIMIZATION_JOB_KIND} {self.namespace}/{name} has been deleted")
362-
363376
def __get_experiment_cr(self, name: str) -> models.V1beta1Experiment:
364377
"""Get the Experiment CR from Kubernetes API"""
365378
try:

kubeflow/optimizer/types/optimization_types.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,19 @@ class Metric:
7373
latest: str
7474

7575

76+
@dataclass
77+
class Result:
78+
"""Result containing the best hyperparameters and metrics.
79+
80+
Args:
81+
parameters (`dict[str, str]`): The best hyperparameters found during optimization.
82+
metrics (`list[Metric]`): The metrics achieved with these hyperparameters.
83+
"""
84+
85+
parameters: dict[str, str]
86+
metrics: list[Metric]
87+
88+
7689
# Representation of the single trial
7790
@dataclass
7891
class Trial:

0 commit comments

Comments
 (0)