Skip to content

Commit 9175d18

Browse files
ephorismotusbpkroth
authored
Fix mixed numeric datatypes for optimizers (#667)
Addressing issue discussed in #666 --------- Co-authored-by: Sergiy Matusevych <[email protected]> Co-authored-by: Brian Kroth <[email protected]>
1 parent 4d7e08f commit 9175d18

File tree

6 files changed

+77
-7
lines changed

6 files changed

+77
-7
lines changed

Diff for: mlos_bench/mlos_bench/storage/util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def kv_df_to_dict(dataframe: pandas.DataFrame) -> Dict[str, Optional[TunableValu
3030
dataframe.rename(columns={'metric': 'parameter'}, inplace=True)
3131
assert dataframe.columns.tolist() == ['parameter', 'value']
3232
data = {}
33-
for _, row in dataframe.iterrows():
33+
for _, row in dataframe.astype('O').iterrows():
3434
assert isinstance(row['parameter'], str)
3535
assert row['value'] is None or isinstance(row['value'], (str, int, float))
3636
if row['parameter'] in data:

Diff for: mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -336,5 +336,5 @@ def _to_configspace_configs(self, configurations: pd.DataFrame) -> List[ConfigSp
336336
"""
337337
return [
338338
ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict())
339-
for (_, config) in configurations.iterrows()
339+
for (_, config) in configurations.astype('O').iterrows()
340340
]

Diff for: mlos_core/mlos_core/optimizers/flaml_optimizer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _register(self, configurations: pd.DataFrame, scores: pd.Series,
8686
"""
8787
if context is not None:
8888
raise NotImplementedError()
89-
for (_, config), score in zip(configurations.iterrows(), scores):
89+
for (_, config), score in zip(configurations.astype('O').iterrows(), scores):
9090
cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration(
9191
self.optimizer_parameter_space, values=config.to_dict())
9292
if cs_config in self.evaluated_samples:

Diff for: mlos_core/mlos_core/spaces/adapters/llamatune.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:
8989

9090
def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame:
9191
target_configurations = []
92-
for (_, config) in configurations.iterrows():
93-
configuration = ConfigSpace.Configuration(self.orig_parameter_space, values=config.to_dict())
92+
for (_, config) in configurations.astype('O').iterrows():
93+
configuration = ConfigSpace.Configuration(
94+
self.orig_parameter_space, values=config.to_dict())
9495

9596
target_config = self._suggested_configs.get(configuration, None)
9697
# NOTE: HeSBO is a non-linear projection method, and does not inherently support inverse projection

Diff for: mlos_core/mlos_core/tests/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
from pkgutil import walk_packages
1313
from typing import List, Optional, Set, Type, TypeVar
1414

15+
# A common seed to use to avoid tracking down race conditions and intermingling
16+
# issues of seeds across tests that run in non-deterministic parallel orders.
17+
SEED = 42
1518

1619
if sys.version_info >= (3, 10):
1720
from typing import TypeAlias

Diff for: mlos_core/mlos_core/tests/optimizers/optimizer_test.py

+68-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer, SmacOptimizer
2424
from mlos_core.spaces.adapters import SpaceAdapterType
2525

26-
from mlos_core.tests import get_all_concrete_subclasses
26+
from mlos_core.tests import get_all_concrete_subclasses, SEED
2727

2828

2929
_LOG = logging.getLogger(__name__)
@@ -76,7 +76,7 @@ def objective(x: pd.Series) -> npt.ArrayLike: # pylint: disable=invalid-name
7676
ret: npt.ArrayLike = (6 * x - 2)**2 * np.sin(12 * x - 4)
7777
return ret
7878
# Emukit doesn't allow specifying a random state, so we set the global seed.
79-
np.random.seed(42)
79+
np.random.seed(SEED)
8080
optimizer = optimizer_class(parameter_space=configuration_space, **kwargs)
8181

8282
with pytest.raises(ValueError, match="No observations"):
@@ -298,3 +298,69 @@ def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None:
298298
"""
299299
optimizer_type_classes = {member.value for member in OptimizerType}
300300
assert optimizer_class in optimizer_type_classes
301+
302+
303+
@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [
304+
# Default optimizer
305+
(None, {}),
306+
# Enumerate all supported Optimizers
307+
*[(member, {}) for member in OptimizerType],
308+
# Optimizer with non-empty kwargs argument
309+
])
310+
def test_mixed_numeric_type_input_space_types(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None:
311+
"""
312+
Toy problem to test the optimizers with mixed numeric types to ensure that original dtypes are retained.
313+
"""
314+
max_iterations = 10
315+
if kwargs is None:
316+
kwargs = {}
317+
318+
def objective(point: pd.DataFrame) -> pd.Series:
319+
# mix of hyperparameters, optimal is to select the highest possible
320+
ret: pd.Series = point["x"] + point["y"]
321+
return ret
322+
323+
input_space = CS.ConfigurationSpace(seed=SEED)
324+
# add a mix of numeric datatypes
325+
input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5))
326+
input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0))
327+
328+
if optimizer_type is None:
329+
optimizer = OptimizerFactory.create(
330+
parameter_space=input_space,
331+
optimizer_kwargs=kwargs,
332+
)
333+
else:
334+
optimizer = OptimizerFactory.create(
335+
parameter_space=input_space,
336+
optimizer_type=optimizer_type,
337+
optimizer_kwargs=kwargs,
338+
)
339+
340+
with pytest.raises(ValueError, match="No observations"):
341+
optimizer.get_best_observation()
342+
343+
with pytest.raises(ValueError, match="No observations"):
344+
optimizer.get_observations()
345+
346+
for _ in range(max_iterations):
347+
suggestion = optimizer.suggest()
348+
assert isinstance(suggestion, pd.DataFrame)
349+
assert (suggestion.columns == ['x', 'y']).all()
350+
# Check suggestion values are the expected dtype
351+
assert isinstance(suggestion['x'].iloc[0], np.integer)
352+
assert isinstance(suggestion['y'].iloc[0], np.floating)
353+
# Check that suggestion is in the space
354+
test_configuration = CS.Configuration(optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict())
355+
# Raises an error if outside of configuration space
356+
test_configuration.is_valid_configuration()
357+
# Test registering the suggested configuration with a score.
358+
observation = objective(suggestion)
359+
assert isinstance(observation, pd.Series)
360+
optimizer.register(suggestion, observation)
361+
362+
best_observation = optimizer.get_best_observation()
363+
assert isinstance(best_observation, pd.DataFrame)
364+
365+
all_observations = optimizer.get_observations()
366+
assert isinstance(all_observations, pd.DataFrame)

0 commit comments

Comments
 (0)