|
12 | 12 | from enum import Enum |
13 | 13 | from functools import wraps |
14 | 14 | from logging import Logger |
15 | | -from typing import Any, NamedTuple |
| 15 | +from typing import Any |
16 | 16 |
|
17 | | -import numpy as np |
18 | | -import numpy.typing as npt |
19 | 17 | import pandas as pd |
20 | 18 | from ax.core.arm import Arm |
21 | 19 | from ax.core.base_trial import BaseTrial, TrialStatus |
22 | 20 | from ax.core.batch_trial import BatchTrial |
23 | | -from ax.core.data import Data |
24 | 21 | from ax.core.experiment import Experiment |
25 | 22 | from ax.core.generator_run import GeneratorRun |
26 | 23 | from ax.core.map_metric import MapMetric |
27 | | -from ax.core.objective import MultiObjective |
28 | 24 | from ax.core.observation import ObservationFeatures |
29 | 25 | from ax.core.optimization_config import OptimizationConfig |
30 | 26 | from ax.core.trial import Trial |
31 | | -from ax.core.types import ComparisonOp |
32 | 27 | from ax.exceptions.core import AxError |
33 | 28 | from ax.utils.common.constants import Keys |
34 | 29 | from ax.utils.common.logger import get_logger |
35 | 30 | from pyre_extensions import none_throws |
36 | 31 |
|
37 | 32 | logger: Logger = get_logger(__name__) |
38 | | -TArmTrial = tuple[str, int] |
39 | 33 |
|
40 | 34 | # Threshold for switching to pending points extraction based on trial status. |
41 | 35 | MANY_TRIALS_IN_EXPERIMENT = 100 |
42 | 36 | OLD_TRIAL_THRESHOLD_DAYS = 10 |
43 | 37 |
|
44 | | -# --------------------------- Data integrity utils. --------------------------- |
45 | | - |
46 | | - |
47 | | -class MissingMetrics(NamedTuple): |
48 | | - objective: dict[str, set[TArmTrial]] |
49 | | - outcome_constraints: dict[str, set[TArmTrial]] |
50 | | - tracking_metrics: dict[str, set[TArmTrial]] |
51 | | - |
52 | | - |
53 | | -def get_missing_metrics( |
54 | | - data: Data, optimization_config: OptimizationConfig |
55 | | -) -> MissingMetrics: |
56 | | - """Return all arm_name, trial_index pairs, for which some of the |
57 | | - observations of optimization config metrics are missing. |
58 | | -
|
59 | | - Args: |
60 | | - data: Data to search. |
61 | | - optimization_config: provides metric_names to search for. |
62 | | -
|
63 | | - Returns: |
64 | | - A NamedTuple(missing_objective, Dict[str, missing_outcome_constraint]) |
65 | | - """ |
66 | | - objective = optimization_config.objective |
67 | | - if isinstance(objective, MultiObjective): |
68 | | - objective_metric_names = [m.name for m in objective.metrics] |
69 | | - else: |
70 | | - objective_metric_names = [optimization_config.objective.metric.name] |
71 | | - |
72 | | - outcome_constraints_metric_names = [ |
73 | | - outcome_constraint.metric.name |
74 | | - for outcome_constraint in optimization_config.outcome_constraints |
75 | | - ] |
76 | | - missing_objectives = { |
77 | | - objective_metric_name: _get_missing_arm_trial_pairs(data, objective_metric_name) |
78 | | - for objective_metric_name in objective_metric_names |
79 | | - } |
80 | | - missing_outcome_constraints = get_missing_metrics_by_name( |
81 | | - data, outcome_constraints_metric_names |
82 | | - ) |
83 | | - all_metric_names = set(data.df["metric_name"]) |
84 | | - optimization_config_metric_names = set(missing_objectives.keys()).union( |
85 | | - outcome_constraints_metric_names |
86 | | - ) |
87 | | - missing_tracking_metric_names = all_metric_names.difference( |
88 | | - optimization_config_metric_names |
89 | | - ) |
90 | | - missing_tracking_metrics = get_missing_metrics_by_name( |
91 | | - data=data, metric_names=missing_tracking_metric_names |
92 | | - ) |
93 | | - return MissingMetrics( |
94 | | - objective={k: v for k, v in missing_objectives.items() if len(v) > 0}, |
95 | | - outcome_constraints={ |
96 | | - k: v for k, v in missing_outcome_constraints.items() if len(v) > 0 |
97 | | - }, |
98 | | - tracking_metrics={ |
99 | | - k: v for k, v in missing_tracking_metrics.items() if len(v) > 0 |
100 | | - }, |
101 | | - ) |
102 | | - |
103 | | - |
104 | | -def get_missing_metrics_by_name( |
105 | | - data: Data, metric_names: Iterable[str] |
106 | | -) -> dict[str, set[TArmTrial]]: |
107 | | - """Return all arm_name, trial_index pairs missing some observations of |
108 | | - specified metrics. |
109 | | -
|
110 | | - Args: |
111 | | - data: Data to search. |
112 | | - metric_names: list of metrics to search for. |
113 | | -
|
114 | | - Returns: |
115 | | - A Dict[str, missing_metrics], one entry for each metric_name. |
116 | | - """ |
117 | | - missing_metrics = { |
118 | | - metric_name: _get_missing_arm_trial_pairs(data=data, metric_name=metric_name) |
119 | | - for metric_name in metric_names |
120 | | - } |
121 | | - return missing_metrics |
122 | | - |
123 | | - |
124 | | -def _get_missing_arm_trial_pairs(data: Data, metric_name: str) -> set[TArmTrial]: |
125 | | - """Return arm_name and trial_index pairs missing a specified metric.""" |
126 | | - metric_df = data.df[data.df.metric_name == metric_name] |
127 | | - present_metric_df = metric_df[metric_df["mean"].notnull()] |
128 | | - arm_trial_pairs = set(zip(data.df["arm_name"], data.df["trial_index"])) |
129 | | - arm_trial_pairs_with_metric = set( |
130 | | - zip(present_metric_df["arm_name"], present_metric_df["trial_index"]) |
131 | | - ) |
132 | | - missing_arm_trial_pairs = arm_trial_pairs.difference(arm_trial_pairs_with_metric) |
133 | | - return missing_arm_trial_pairs |
134 | | - |
135 | 38 |
|
136 | 39 | # ------------------- Utils shared by Client and BatchClient-------------------- |
137 | 40 | def _maybe_update_trial_status_to_complete( |
@@ -169,40 +72,6 @@ def _maybe_update_trial_status_to_complete( |
169 | 72 | # -------------------- Experiment result extraction utils. --------------------- |
170 | 73 |
|
171 | 74 |
|
172 | | -def best_feasible_objective( |
173 | | - optimization_config: OptimizationConfig, |
174 | | - values: dict[str, npt.NDArray], |
175 | | -) -> npt.NDArray: |
176 | | - """Compute the best feasible objective value found by each iteration. |
177 | | -
|
178 | | - Args: |
179 | | - optimization_config: Optimization config. |
180 | | - values: Dictionary from metric name to array of value at each |
181 | | - iteration. If optimization config contains outcome constraints, values |
182 | | - for them must be present in `values`. |
183 | | -
|
184 | | - Returns: Array of cumulative best feasible value. |
185 | | - """ |
186 | | - # Get objective at each iteration |
187 | | - objective = optimization_config.objective |
188 | | - f = values[objective.metric.signature] |
189 | | - # Set infeasible points to have infinitely bad values |
190 | | - infeas_val = np.inf if objective.minimize else -np.inf |
191 | | - for oc in optimization_config.outcome_constraints: |
192 | | - if oc.relative: |
193 | | - raise ValueError( |
194 | | - "Benchmark aggregation does not support relative constraints" |
195 | | - ) |
196 | | - g = values[oc.metric.signature] |
197 | | - feas = g <= oc.bound if oc.op == ComparisonOp.LEQ else g >= oc.bound |
198 | | - f[~feas] = infeas_val |
199 | | - |
200 | | - # Get cumulative best |
201 | | - minimize = objective.minimize |
202 | | - accumulate = np.minimum.accumulate if minimize else np.maximum.accumulate |
203 | | - return accumulate(f) |
204 | | - |
205 | | - |
206 | 75 | def _extract_generator_runs(trial: BaseTrial) -> list[GeneratorRun]: |
207 | 76 | if isinstance(trial, BatchTrial): |
208 | 77 | return trial.generator_runs |
|
0 commit comments