Skip to content

Commit e127f7a

Browse files
authored
Merge pull request #160 from nabenabe0928/add-user-prior-cma-es
Add CMA-ES with user prior
2 parents 733bce7 + 674bbed commit e127f7a

File tree

4 files changed

+321
-0
lines changed

4 files changed

+321
-0
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 Shuhei Watanabe
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
---
2+
author: Shuhei Watanabe
3+
title: CMA-ES with User Prior
4+
description: You can provide the initial parameters, i.e. mean vector and covariance matrix, for CMA-ES with this sampler.
5+
tags: [sampler, cma-es, meta-learning, prior]
6+
optuna_versions: [4.0.0]
7+
license: MIT License
8+
---
9+
10+
## Abstract
11+
12+
As the Optuna CMA-ES sampler does not support any flexible ways to initialize the parameters of the Gaussian distribution, so I created a workaround to do so.
13+
14+
## Class or Function Names
15+
16+
- UserPriorCmaEsSampler
17+
18+
In principle, most arguments follow [`optuna.samplers.CmaEsSampler`](https://optuna.readthedocs.io/en/stable/reference/samplers/generated/optuna.samplers.CmaEsSampler.html), but some parts are modified.
19+
20+
For example, `UserPriorCmaEsSampler` does not support `source_trials` and `use_separable_cma` due to their incompatibility.
21+
Instead, we replaced `x0` and `sigma0` in `CmaEsSampler` with `mu0` and `cov0`.
22+
In `CmaEsSampler`, we needed to provide `x0` as `dict` and `sigma0` only as `float`.
23+
By adding `param_names` to the requirement, we can now give `mu0` (previously `x0`) and `cov0` (previously `sigma0`) as `np.ndarray`.
24+
Note that the order of each dimension in `mu0` and `cov0` must be consistent with that in `param_names`.
25+
26+
## Installation
27+
28+
```shell
29+
$ pip install optunahub cmaes
30+
```
31+
32+
## Example
33+
34+
The simplest code example is as follows:
35+
36+
```python
37+
import numpy as np
38+
import optuna
39+
import optunahub
40+
41+
42+
def objective(trial: optuna.Trial) -> float:
43+
x = trial.suggest_float("x", -50, -40)
44+
y = trial.suggest_int("y", -5, 5)
45+
return (x + 43)**2 + (y - 2)**2
46+
47+
48+
if __name__ == "__main__":
49+
module = optunahub.load_module(package="samplers/user_prior_cmaes")
50+
# ``with_margin=True`` because the search space has an integer parameter.
51+
sampler = module.UserPriorCmaEsSampler(
52+
param_names=["x", "y"], mu0=np.array([-48., 3.]), cov0=np.diag([2., 0.2]), with_margin=True
53+
)
54+
study = optuna.create_study(sampler=sampler)
55+
study.optimize(objective, n_trials=20)
56+
print(study.best_trial.value, study.best_trial.params)
57+
58+
```
59+
60+
Although `UserPriorCmaEsSampler` CANNOT support log scale from the sampler side, we have a workaround to do so:
61+
62+
```python
63+
import math
64+
65+
import numpy as np
66+
import optuna
67+
import optunahub
68+
69+
70+
def objective(trial: optuna.Trial) -> float:
71+
# For example, trial.suggest_float("x", 1e-5, 1.0, log=True) can be encoded as:
72+
x = 10 ** trial.suggest_float("log10_x", -5, 0)
73+
# trial.suggest_float("y", 2, 1024, log=True) can be encoded as:
74+
y = 2 ** trial.suggest_float("log2_y", 1, 10)
75+
# In general, trial.suggest_float("z", low, high, log=True) can be encoded as:
76+
low, high = 3, 81
77+
b = 3 # The base of log can be any positive number.
78+
z = b ** trial.suggest_float("logb_z", math.log(low, b), math.log(high, b))
79+
return x**2 + y**2 + z**2
80+
81+
82+
if __name__ == "__main__":
83+
module = optunahub.load_module(package="samplers/user_prior_cmaes")
84+
sampler = module.UserPriorCmaEsSampler(
85+
param_names=["log10_x", "log2_y", "logb_z"],
86+
mu0=np.array([-4, 8, 3]),
87+
cov0=np.diag([0.2, 1., 0.1]),
88+
)
89+
study = optuna.create_study(sampler=sampler)
90+
study.optimize(objective, n_trials=20)
91+
print(study.best_trial.value, study.best_trial.params)
92+
```
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .sampler import UserPriorCmaEsSampler
2+
3+
4+
__all__ = ["UserPriorCmaEsSampler"]
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
from __future__ import annotations
2+
3+
import math
4+
from typing import Any
5+
from typing import Union
6+
7+
import cmaes
8+
import numpy as np
9+
from optuna import Study
10+
from optuna._transform import _SearchSpaceTransform
11+
from optuna.distributions import BaseDistribution
12+
from optuna.distributions import FloatDistribution
13+
from optuna.distributions import IntDistribution
14+
from optuna.samplers import BaseSampler
15+
from optuna.samplers import CmaEsSampler
16+
from optuna.study import StudyDirection
17+
from optuna.trial import FrozenTrial
18+
19+
20+
CmaClass = Union[cmaes.CMA, cmaes.SepCMA, cmaes.CMAwM]
21+
22+
23+
class UserPriorCmaEsSampler(CmaEsSampler):
24+
"""A sampler using `cmaes <https://github.com/CyberAgentAILab/cmaes>`__ as the backend with user prior.
25+
26+
Please check ``CmaEsSampler`` in Optuna for more details of each argument.
27+
This class modified the arguments ``x0`` and ``sigma0`` in ``CmaEsSampler`` of Optuna.
28+
Furthermore, due to the incompatibility,
29+
This class does not support ``source_trials`` and ``use_separable_cma``.
30+
31+
Args:
32+
param_names:
33+
The list of the parameter names to be tuned. This list must be a unique list.
34+
mu0:
35+
The mean vector used for the initialization of CMA-ES.
36+
cov0:
37+
The covariance matrix used for the initialization of CMA-ES.
38+
""" # NOQA: E501
39+
40+
def __init__(
41+
self,
42+
param_names: list[str],
43+
mu0: np.ndarray,
44+
cov0: np.ndarray,
45+
n_startup_trials: int = 1,
46+
independent_sampler: BaseSampler | None = None,
47+
warn_independent_sampling: bool = True,
48+
seed: int | None = None,
49+
*,
50+
consider_pruned_trials: bool = False,
51+
restart_strategy: str | None = None,
52+
popsize: int | None = None,
53+
inc_popsize: int = 2,
54+
with_margin: bool = False,
55+
lr_adapt: bool = False,
56+
) -> None:
57+
super().__init__(
58+
x0=None,
59+
sigma0=None,
60+
n_startup_trials=n_startup_trials,
61+
independent_sampler=independent_sampler,
62+
warn_independent_sampling=warn_independent_sampling,
63+
seed=seed,
64+
consider_pruned_trials=consider_pruned_trials,
65+
restart_strategy=restart_strategy,
66+
popsize=popsize,
67+
inc_popsize=inc_popsize,
68+
use_separable_cma=False,
69+
with_margin=with_margin,
70+
lr_adapt=lr_adapt,
71+
source_trials=None,
72+
)
73+
self._validate_user_prior(param_names, mu0, cov0)
74+
self._param_names = param_names[:]
75+
self._mu0 = mu0.astype(float)
76+
self._cov0 = cov0.astype(float)
77+
78+
def _validate_user_prior(
79+
self, param_names: list[str], mu0: np.ndarray, cov0: np.ndarray
80+
) -> None:
81+
dim = len(param_names)
82+
if dim != len(set(param_names)):
83+
raise ValueError(
84+
"Some elements in param_names are duplicated. Please make it a unique list."
85+
)
86+
if mu0.shape != (dim,) or cov0.shape != (dim, dim):
87+
raise ValueError(
88+
f"The shape of mu0 and cov0 must be (len(param_names)={dim}, ) and "
89+
f"(len(param_names)={dim}, len(param_names)={dim}), but got {mu0.shape} and "
90+
f"{cov0.shape}."
91+
)
92+
if not np.allclose(cov0, cov0.T):
93+
raise ValueError("cov0 must be a symmetric matrix.")
94+
if np.any(cov0 < 0.0):
95+
raise ValueError("All elements in cov0 must be non-negative.")
96+
if np.any(np.linalg.eigvals(cov0) < 0.0):
97+
raise ValueError("cov0 must be a semi-positive definite matrix.")
98+
99+
def sample_relative(
100+
self,
101+
study: Study,
102+
trial: FrozenTrial,
103+
search_space: dict[str, BaseDistribution],
104+
) -> dict[str, Any]:
105+
if len(search_space) != 0 and set(search_space.keys()) != set(self._param_names):
106+
raise ValueError(
107+
"The keys in search_space and param_names did not match. "
108+
"The most probable reason is duplicated names in param_names."
109+
)
110+
elif len(search_space) != 0:
111+
# Ensure the parameter order is identical to that in param_names.
112+
search_space = {
113+
param_name: search_space[param_name] for param_name in self._param_names
114+
}
115+
116+
return super().sample_relative(study=study, trial=trial, search_space=search_space)
117+
118+
def _calculate_initial_params(
119+
self, trans: _SearchSpaceTransform
120+
) -> tuple[np.ndarray, float, np.ndarray]:
121+
# NOTE(nabenabe): Except this method, everything is basically based on Optuna v4.0.0.
122+
# As this class does not support some cases supported by Optuna, I simply added validation
123+
# to each method, but otherwise, nothing changed. In principle, if users find a bug, it is
124+
# likely that the bug exists in this method.
125+
search_space = trans._search_space.copy()
126+
if any(
127+
not isinstance(d, (IntDistribution, FloatDistribution)) for d in search_space.values()
128+
):
129+
raise ValueError("search_space cannot include categorical parameters.")
130+
if any(
131+
d.log
132+
for d in search_space.values()
133+
if isinstance(d, (FloatDistribution, IntDistribution))
134+
):
135+
src_url = "https://hub.optuna.org/samplers/user_prior_cmaes/"
136+
raise ValueError(
137+
"search_space for user_prior cannot include log scale. "
138+
f"Please use the workaround described in {src_url}."
139+
)
140+
141+
dim = len(self._param_names)
142+
raw_bounds = trans._raw_bounds
143+
domain_sizes = raw_bounds[:, 1] - raw_bounds[:, 0]
144+
is_single = domain_sizes == 0.0
145+
146+
mu0 = self._mu0.copy()
147+
mu0[is_single] = 0.5
148+
# Clip into [0, 1].
149+
mu0[~is_single] = (mu0[~is_single] - raw_bounds[~is_single, 0]) / domain_sizes[~is_single]
150+
151+
# We also need to transform the covariance matrix accordingly to adapt to the [0, 1] scale.
152+
cov0 = self._cov0 / (domain_sizes * domain_sizes[:, np.newaxis])
153+
154+
# Make the determinant of cov0 1 so that it agrees with the CMA-ES convention.
155+
sigma0 = math.pow(np.linalg.det(cov0), 1.0 / 2.0 / dim)
156+
# Avoid ZeroDivisionError in cmaes.
157+
sigma0 = max(sigma0, 1e-10)
158+
cov0 /= sigma0**2
159+
160+
return mu0, sigma0, cov0
161+
162+
def _init_optimizer(
163+
self,
164+
trans: _SearchSpaceTransform,
165+
direction: StudyDirection,
166+
population_size: int | None = None,
167+
randomize_start_point: bool = False,
168+
) -> CmaClass:
169+
n_dimension = len(trans.bounds)
170+
mu0, sigma0, cov0 = self._calculate_initial_params(trans)
171+
172+
if self._with_margin:
173+
steps = np.empty(len(trans._search_space), dtype=float)
174+
for i, dist in enumerate(trans._search_space.values()):
175+
assert isinstance(dist, (IntDistribution, FloatDistribution))
176+
# Set step 0.0 for continuous search space.
177+
if dist.step is None or dist.log:
178+
steps[i] = 0.0
179+
elif dist.low == dist.high:
180+
steps[i] = 1.0
181+
else:
182+
steps[i] = dist.step / (dist.high - dist.low)
183+
184+
return cmaes.CMAwM(
185+
mean=mu0,
186+
sigma=sigma0,
187+
bounds=trans.bounds,
188+
steps=steps,
189+
cov=cov0,
190+
seed=self._cma_rng.rng.randint(1, 2**31 - 2),
191+
n_max_resampling=10 * n_dimension,
192+
population_size=population_size,
193+
)
194+
195+
return cmaes.CMA(
196+
mean=mu0,
197+
sigma=sigma0,
198+
cov=cov0,
199+
bounds=trans.bounds,
200+
seed=self._cma_rng.rng.randint(1, 2**31 - 2),
201+
n_max_resampling=10 * n_dimension,
202+
population_size=population_size,
203+
lr_adapt=self._lr_adapt,
204+
)

0 commit comments

Comments
 (0)