Skip to content
87 changes: 71 additions & 16 deletions package/samplers/value_at_risk/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ class RobustGPSampler(BaseSampler):
The input noise standard deviations for each parameter. For example, when
`{"x": 0.1, "y": 0.2}` is given, the sampler assumes that the input noise of `x` and
`y` follows `N(0, 0.1**2)` and `N(0, 0.2**2)`, respectively.
const_noisy_param_names:
The list of parameters determined externally rather than being decision variables.
For these parameters, `suggest_float` samples random values instead of searching
values that optimize the objective function.
"""

def __init__(
Expand All @@ -114,6 +118,7 @@ def __init__(
warn_independent_sampling: bool = True,
uniform_input_noise_rads: dict[str, float] | None = None,
normal_input_noise_stdevs: dict[str, float] | None = None,
const_noisy_param_names: list[str] | None = None,
) -> None:
if uniform_input_noise_rads is None and normal_input_noise_stdevs is None:
raise ValueError(
Expand All @@ -125,8 +130,25 @@ def __init__(
"Only one of `uniform_input_noise_rads` and `normal_input_noise_stdevs` "
"can be specified."
)
if const_noisy_param_names is not None:
if uniform_input_noise_rads is not None and len(
const_noisy_param_names & uniform_input_noise_rads.keys()
):
raise ValueError(
"noisy parameters can be specified only in one of "
"`const_noisy_param_names` and `uniform_input_noise_rads`."
)
if normal_input_noise_stdevs is not None and len(
const_noisy_param_names & normal_input_noise_stdevs.keys()
):
raise ValueError(
"noisy parameters can be specified only in one of "
"`const_noisy_param_names` and `normal_input_noise_stdevs`."
)

self._uniform_input_noise_rads = uniform_input_noise_rads
self._normal_input_noise_stdevs = normal_input_noise_stdevs
self._const_noisy_param_names = const_noisy_param_names or []
self._rng = LazyRandomState(seed)
self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
self._intersection_search_space = optuna.search_space.IntersectionSearchSpace()
Expand Down Expand Up @@ -183,17 +205,14 @@ def infer_relative_search_space(

return search_space

def _optimize_acqf(
self, acqf: acqf_module.BaseAcquisitionFunc, best_params: np.ndarray | None
) -> np.ndarray:
def _optimize_acqf(self, acqf: acqf_module.BaseAcquisitionFunc) -> np.ndarray:
# Advanced users can override this method to change the optimization algorithm.
# However, we do not make any effort to keep backward compatibility between versions.
# Particularly, we may remove this function in future refactoring.
assert best_params is None or len(best_params.shape) == 2
normalized_params, _acqf_val = optim_mixed.optimize_acqf_mixed(
# We assume acqf_module.BaseAcquisitionFunc is compatible with optuna._gp.acqf.BaseAcquisitionFunc
cast(optuna._gp.acqf.BaseAcquisitionFunc, acqf),
warmstart_normalized_params_array=best_params,
warmstart_normalized_params_array=None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This modification likely serves to prevent getting stuck in local optima. The change essentially involves setting the initial values for iterative optimization to random values without altering the acquisition function itself - the target of optimization. However, based on my personal verification, even with simple objective functions, the current GPSampler's acquisition function becomes extremely jagged and becomes ill-defined for gradient-based optimization. Therefore, rather than randomly shifting initial values, wouldn't it be better to smooth the acquisition function itself to make it more amenable to optimization?

Specifically, consider setting ConstrainedLogValueAtRisk's stabilizing_noise to a larger value than currently used (e.g., 1e-5). Note that this value requires adjustment based on the specific application.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change was made by @nabenabe0928 -san in 6dae507 .

@nabenabe0928 -san removed best_params from CARBO for that reason (#301), but in this PR, it might be simply because None was passed as best_params at every call site of _optimize_acqf.

How about merging this PR as is, and re-add best_params in different PR if necessary?

Anyway, the fact that samples are overly concentrated in specific areas is a big issue, so I'd like to try modifying the stabilizing_noise parameter.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about merging this PR as is, and re-add best_params in different PR if necessary?

Sounds good! Sorry for the confusion. I misunderstood the motivation of setting best_params=None.

n_preliminary_samples=self._n_preliminary_samples,
n_local_search=self._n_local_search,
tol=self._tol,
Expand Down Expand Up @@ -241,6 +260,14 @@ def _get_constraints_acqf_args(
self._constraints_gprs_cache_list = constraints_gprs
return constraints_gprs, constraints_threshold_list

def _get_internal_search_space_with_fixed_params(
self, search_space: dict[str, BaseDistribution]
) -> gp_search_space.SearchSpace:
search_space_with_fixed_params = search_space.copy()
for param_name in self._const_noisy_param_names:
search_space_with_fixed_params[param_name] = optuna.distributions.IntDistribution(0, 0)
return gp_search_space.SearchSpace(search_space_with_fixed_params)

def _get_value_at_risk(
self,
gpr: gp.GPRegressor,
Expand Down Expand Up @@ -280,23 +307,34 @@ def _get_scaled_input_noise_params(
return scaled_input_noise_params

noise_kwargs: _NoiseKWArgs = {}
const_noise_param_inds = [
i
for i, param_name in enumerate(search_space)
if param_name in self._const_noisy_param_names
]
if self._uniform_input_noise_rads is not None:
scaled_input_noise_params = _get_scaled_input_noise_params(
self._uniform_input_noise_rads, "uniform_input_noise_rads"
)
scaled_input_noise_params[const_noise_param_inds] = 0.5
noise_kwargs["uniform_input_noise_rads"] = scaled_input_noise_params
elif self._normal_input_noise_stdevs is not None:
scaled_input_noise_params = _get_scaled_input_noise_params(
self._normal_input_noise_stdevs, "normal_input_noise_stdevs"
)
# NOTE(nabenabe): \pm 2 sigma will cover the domain.
scaled_input_noise_params[const_noise_param_inds] = 0.25
noise_kwargs["normal_input_noise_stdevs"] = scaled_input_noise_params
else:
assert False, "Should not reach here."

search_space_with_fixed_params = self._get_internal_search_space_with_fixed_params(
search_space
)
if constraints_gpr_list is None or constraints_threshold_list is None:
return acqf_module.ValueAtRisk(
gpr=gpr,
search_space=internal_search_space,
search_space=search_space_with_fixed_params,
confidence_level=self._objective_confidence_level,
n_input_noise_samples=self._n_input_noise_samples,
n_qmc_samples=self._n_qmc_samples,
Expand All @@ -307,7 +345,7 @@ def _get_scaled_input_noise_params(
else:
return acqf_module.ConstrainedLogValueAtRisk(
gpr=gpr,
search_space=internal_search_space,
search_space=search_space_with_fixed_params,
constraints_gpr_list=constraints_gpr_list,
constraints_threshold_list=constraints_threshold_list,
objective_confidence_level=self._objective_confidence_level,
Expand Down Expand Up @@ -379,19 +417,15 @@ def _get_gpr_list(
self._gprs_cache_list = gprs_list
return gprs_list

def sample_relative(
self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
def _optimize_params(
self, study: Study, trials: list[FrozenTrial], search_space: dict[str, BaseDistribution]
) -> dict[str, Any]:
if search_space == {}:
return {}

self._verify_search_space(search_space)
trials = study._get_trials(deepcopy=False, states=(TrialState.COMPLETE,), use_cache=True)
if len(trials) < self._n_startup_trials:
return {}

gprs_list = self._get_gpr_list(study, search_space)
best_params: np.ndarray | None
acqf: acqf_module.BaseAcquisitionFunc
assert len(gprs_list) == 1
internal_search_space = gp_search_space.SearchSpace(search_space)
Expand All @@ -403,7 +437,6 @@ def sample_relative(
search_space,
acqf_type="mean",
)
best_params = None
else:
constraint_vals, _ = _get_constraint_vals_and_feasibility(study, trials)
constr_gpr_list, constr_threshold_list = self._get_constraints_acqf_args(
Expand All @@ -420,11 +453,27 @@ def sample_relative(
constraints_gpr_list=constr_gpr_list,
constraints_threshold_list=constr_threshold_list,
)
best_params = None

normalized_param = self._optimize_acqf(acqf, best_params)
normalized_param = self._optimize_acqf(acqf)
return internal_search_space.get_unnormalized_param(normalized_param)

def sample_relative(
self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
) -> dict[str, Any]:
trials = study._get_trials(deepcopy=False, states=(TrialState.COMPLETE,), use_cache=True)
if len(trials) < self._n_startup_trials:
return {}

params = self._optimize_params(study, trials, search_space)

# Perturb constant noisy parameter uniformly
for name in self._const_noisy_param_names:
dist = search_space[name]
assert isinstance(dist, optuna.distributions.FloatDistribution)
params[name] = self._rng.rng.uniform(dist.low, dist.high)

return params

def get_robust_trial(self, study: Study) -> FrozenTrial:
states = (TrialState.COMPLETE,)
trials = study._get_trials(deepcopy=False, states=states, use_cache=True)
Expand Down Expand Up @@ -457,6 +506,12 @@ def get_robust_trial(self, study: Study) -> FrozenTrial:
best_idx = np.argmax(acqf.eval_acqf_no_grad(X_train)).item()
return trials[best_idx]

def get_robust_params(self, study: Study) -> dict[str, Any]:
states = (TrialState.COMPLETE,)
trials = study._get_trials(deepcopy=False, states=states, use_cache=True)
search_space = self.infer_relative_search_space(study, trials[0])
return self._optimize_params(study, trials, search_space)

def sample_independent(
self,
study: Study,
Expand Down