Skip to content

Commit 5e7400d

Browse files
Lena Kashtelyanfacebook-github-bot
authored andcommitted
Remove dead utilities from ax/core/utils.py
Summary: Remove dead utilities from `ax/core/utils.py` that have no production callers outside the module itself (only referenced in `test_utils.py`): - `MissingMetrics` (NamedTuple) - `get_missing_metrics` - `get_missing_metrics_by_name` - `_get_missing_arm_trial_pairs` - `best_feasible_objective` Also remove the now-unused imports: `numpy`, `numpy.typing`, `NamedTuple`, `Data`, `MultiObjective`, and `ComparisonOp` from `utils.py`. Reviewed By: saitcakmak Differential Revision: D96015016
1 parent 1e48d0f commit 5e7400d

2 files changed

Lines changed: 1 addition & 246 deletions

File tree

ax/core/tests/test_utils.py

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from datetime import datetime, timedelta
1111
from unittest.mock import patch
1212

13-
import numpy as np
1413
import pandas as pd
1514
from ax.core.arm import Arm
1615
from ax.core.batch_trial import BatchTrial
@@ -28,19 +27,15 @@
2827
from ax.core.utils import (
2928
_maybe_update_trial_status_to_complete,
3029
batch_trial_only,
31-
best_feasible_objective,
3230
compute_metric_availability,
3331
extract_pending_observations,
34-
get_missing_metrics,
35-
get_missing_metrics_by_name,
3632
get_model_times,
3733
get_model_trace_of_times,
3834
get_pending_observation_features,
3935
get_pending_observation_features_based_on_trial_status as get_pending_status,
4036
get_target_trial_index,
4137
is_bandit_experiment,
4238
MetricAvailability,
43-
MissingMetrics,
4439
)
4540
from ax.exceptions.core import AxError
4641
from ax.utils.common.constants import Keys
@@ -95,120 +90,11 @@ def setUp(self) -> None:
9590
trial_index=self.hss_trial.index,
9691
metadata=self.hss_cand_metadata,
9792
)
98-
self.df = pd.DataFrame(
99-
[
100-
{
101-
"arm_name": "0_0",
102-
"mean": 2.0,
103-
"sem": 0.2,
104-
"trial_index": 1,
105-
"metric_name": "a",
106-
"start_time": "2018-01-01",
107-
"end_time": "2018-01-02",
108-
"metric_signature": "a",
109-
},
110-
{
111-
"arm_name": "0_0",
112-
"mean": 1.8,
113-
"sem": 0.3,
114-
"trial_index": 1,
115-
"metric_name": "b",
116-
"start_time": "2018-01-01",
117-
"end_time": "2018-01-02",
118-
"metric_signature": "b",
119-
},
120-
{
121-
"arm_name": "0_1",
122-
"mean": float("nan"),
123-
"sem": float("nan"),
124-
"trial_index": 1,
125-
"metric_name": "a",
126-
"start_time": "2018-01-01",
127-
"end_time": "2018-01-02",
128-
"metric_signature": "a",
129-
},
130-
{
131-
"arm_name": "0_1",
132-
"mean": 3.7,
133-
"sem": 0.5,
134-
"trial_index": 1,
135-
"metric_name": "b",
136-
"start_time": "2018-01-01",
137-
"end_time": "2018-01-02",
138-
"metric_signature": "b",
139-
},
140-
{
141-
"arm_name": "0_2",
142-
"mean": 0.5,
143-
"sem": None,
144-
"trial_index": 1,
145-
"metric_name": "a",
146-
"start_time": "2018-01-01",
147-
"end_time": "2018-01-02",
148-
"metric_signature": "a",
149-
},
150-
{
151-
"arm_name": "0_2",
152-
"mean": float("nan"),
153-
"sem": float("nan"),
154-
"trial_index": 1,
155-
"metric_name": "b",
156-
"start_time": "2018-01-01",
157-
"end_time": "2018-01-02",
158-
"metric_signature": "b",
159-
},
160-
{
161-
"arm_name": "0_2",
162-
"mean": float("nan"),
163-
"sem": float("nan"),
164-
"trial_index": 1,
165-
"metric_name": "c",
166-
"start_time": "2018-01-01",
167-
"end_time": "2018-01-02",
168-
"metric_signature": "c",
169-
},
170-
]
171-
)
172-
173-
self.data = Data(df=self.df)
174-
175-
self.optimization_config = OptimizationConfig(
176-
objective=Objective(metric=Metric(name="a"), minimize=False),
177-
outcome_constraints=[
178-
OutcomeConstraint(
179-
metric=Metric(name="b"),
180-
op=ComparisonOp.GEQ,
181-
bound=0,
182-
relative=False,
183-
)
184-
],
185-
)
18693
self.batch_experiment = get_branin_experiment(with_completed_trial=False)
18794
self.batch_experiment.status_quo = Arm(
18895
name="status_quo", parameters={"x1": 0.0, "x2": 0.0}
18996
)
19097

191-
def test_get_missing_metrics_by_name(self) -> None:
192-
expected = {"a": {("0_1", 1)}, "b": {("0_2", 1)}}
193-
actual = get_missing_metrics_by_name(self.data, ["a", "b"])
194-
self.assertEqual(actual, expected)
195-
196-
def test_get_missing_metrics(self) -> None:
197-
expected = MissingMetrics(
198-
{"a": {("0_1", 1)}},
199-
{"b": {("0_2", 1)}},
200-
{"c": {("0_0", 1), ("0_1", 1), ("0_2", 1)}},
201-
)
202-
actual = get_missing_metrics(self.data, self.optimization_config)
203-
self.assertEqual(actual, expected)
204-
205-
def test_best_feasible_objective(self) -> None:
206-
bfo = best_feasible_objective(
207-
self.optimization_config,
208-
values={"a": np.array([1.0, 3.0, 2.0]), "b": np.array([0.0, -1.0, 0.0])},
209-
)
210-
self.assertEqual(list(bfo), [1.0, 1.0, 2.0])
211-
21298
def test_get_model_times(self) -> None:
21399
exp = get_branin_experiment(num_trial=2)
214100
fit_times, gen_times = get_model_trace_of_times(exp)

ax/core/utils.py

Lines changed: 1 addition & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -12,126 +12,29 @@
1212
from enum import Enum
1313
from functools import wraps
1414
from logging import Logger
15-
from typing import Any, NamedTuple
15+
from typing import Any
1616

17-
import numpy as np
18-
import numpy.typing as npt
1917
import pandas as pd
2018
from ax.core.arm import Arm
2119
from ax.core.base_trial import BaseTrial, TrialStatus
2220
from ax.core.batch_trial import BatchTrial
23-
from ax.core.data import Data
2421
from ax.core.experiment import Experiment
2522
from ax.core.generator_run import GeneratorRun
2623
from ax.core.map_metric import MapMetric
27-
from ax.core.objective import MultiObjective
2824
from ax.core.observation import ObservationFeatures
2925
from ax.core.optimization_config import OptimizationConfig
3026
from ax.core.trial import Trial
31-
from ax.core.types import ComparisonOp
3227
from ax.exceptions.core import AxError
3328
from ax.utils.common.constants import Keys
3429
from ax.utils.common.logger import get_logger
3530
from pyre_extensions import none_throws
3631

3732
logger: Logger = get_logger(__name__)
38-
TArmTrial = tuple[str, int]
3933

4034
# Threshold for switching to pending points extraction based on trial status.
4135
MANY_TRIALS_IN_EXPERIMENT = 100
4236
OLD_TRIAL_THRESHOLD_DAYS = 10
4337

44-
# --------------------------- Data integrity utils. ---------------------------
45-
46-
47-
class MissingMetrics(NamedTuple):
48-
objective: dict[str, set[TArmTrial]]
49-
outcome_constraints: dict[str, set[TArmTrial]]
50-
tracking_metrics: dict[str, set[TArmTrial]]
51-
52-
53-
def get_missing_metrics(
54-
data: Data, optimization_config: OptimizationConfig
55-
) -> MissingMetrics:
56-
"""Return all arm_name, trial_index pairs, for which some of the
57-
observations of optimization config metrics are missing.
58-
59-
Args:
60-
data: Data to search.
61-
optimization_config: provides metric_names to search for.
62-
63-
Returns:
64-
A NamedTuple(missing_objective, Dict[str, missing_outcome_constraint])
65-
"""
66-
objective = optimization_config.objective
67-
if isinstance(objective, MultiObjective):
68-
objective_metric_names = [m.name for m in objective.metrics]
69-
else:
70-
objective_metric_names = [optimization_config.objective.metric.name]
71-
72-
outcome_constraints_metric_names = [
73-
outcome_constraint.metric.name
74-
for outcome_constraint in optimization_config.outcome_constraints
75-
]
76-
missing_objectives = {
77-
objective_metric_name: _get_missing_arm_trial_pairs(data, objective_metric_name)
78-
for objective_metric_name in objective_metric_names
79-
}
80-
missing_outcome_constraints = get_missing_metrics_by_name(
81-
data, outcome_constraints_metric_names
82-
)
83-
all_metric_names = set(data.df["metric_name"])
84-
optimization_config_metric_names = set(missing_objectives.keys()).union(
85-
outcome_constraints_metric_names
86-
)
87-
missing_tracking_metric_names = all_metric_names.difference(
88-
optimization_config_metric_names
89-
)
90-
missing_tracking_metrics = get_missing_metrics_by_name(
91-
data=data, metric_names=missing_tracking_metric_names
92-
)
93-
return MissingMetrics(
94-
objective={k: v for k, v in missing_objectives.items() if len(v) > 0},
95-
outcome_constraints={
96-
k: v for k, v in missing_outcome_constraints.items() if len(v) > 0
97-
},
98-
tracking_metrics={
99-
k: v for k, v in missing_tracking_metrics.items() if len(v) > 0
100-
},
101-
)
102-
103-
104-
def get_missing_metrics_by_name(
105-
data: Data, metric_names: Iterable[str]
106-
) -> dict[str, set[TArmTrial]]:
107-
"""Return all arm_name, trial_index pairs missing some observations of
108-
specified metrics.
109-
110-
Args:
111-
data: Data to search.
112-
metric_names: list of metrics to search for.
113-
114-
Returns:
115-
A Dict[str, missing_metrics], one entry for each metric_name.
116-
"""
117-
missing_metrics = {
118-
metric_name: _get_missing_arm_trial_pairs(data=data, metric_name=metric_name)
119-
for metric_name in metric_names
120-
}
121-
return missing_metrics
122-
123-
124-
def _get_missing_arm_trial_pairs(data: Data, metric_name: str) -> set[TArmTrial]:
125-
"""Return arm_name and trial_index pairs missing a specified metric."""
126-
metric_df = data.df[data.df.metric_name == metric_name]
127-
present_metric_df = metric_df[metric_df["mean"].notnull()]
128-
arm_trial_pairs = set(zip(data.df["arm_name"], data.df["trial_index"]))
129-
arm_trial_pairs_with_metric = set(
130-
zip(present_metric_df["arm_name"], present_metric_df["trial_index"])
131-
)
132-
missing_arm_trial_pairs = arm_trial_pairs.difference(arm_trial_pairs_with_metric)
133-
return missing_arm_trial_pairs
134-
13538

13639
# ------------------- Utils shared by Client and BatchClient--------------------
13740
def _maybe_update_trial_status_to_complete(
@@ -169,40 +72,6 @@ def _maybe_update_trial_status_to_complete(
16972
# -------------------- Experiment result extraction utils. ---------------------
17073

17174

172-
def best_feasible_objective(
173-
optimization_config: OptimizationConfig,
174-
values: dict[str, npt.NDArray],
175-
) -> npt.NDArray:
176-
"""Compute the best feasible objective value found by each iteration.
177-
178-
Args:
179-
optimization_config: Optimization config.
180-
values: Dictionary from metric name to array of value at each
181-
iteration. If optimization config contains outcome constraints, values
182-
for them must be present in `values`.
183-
184-
Returns: Array of cumulative best feasible value.
185-
"""
186-
# Get objective at each iteration
187-
objective = optimization_config.objective
188-
f = values[objective.metric.signature]
189-
# Set infeasible points to have infinitely bad values
190-
infeas_val = np.inf if objective.minimize else -np.inf
191-
for oc in optimization_config.outcome_constraints:
192-
if oc.relative:
193-
raise ValueError(
194-
"Benchmark aggregation does not support relative constraints"
195-
)
196-
g = values[oc.metric.signature]
197-
feas = g <= oc.bound if oc.op == ComparisonOp.LEQ else g >= oc.bound
198-
f[~feas] = infeas_val
199-
200-
# Get cumulative best
201-
minimize = objective.minimize
202-
accumulate = np.minimum.accumulate if minimize else np.maximum.accumulate
203-
return accumulate(f)
204-
205-
20675
def _extract_generator_runs(trial: BaseTrial) -> list[GeneratorRun]:
20776
if isinstance(trial, BatchTrial):
20877
return trial.generator_runs

0 commit comments

Comments
 (0)