Skip to content

Commit 009c86f

Browse files
committed
Change how noisy constant values are handled
- Allow specifying nominal values of noisy constants explicitly instead of using the center value of the interval. - Sample noisy constant values first and then optimize other parameter's values based on those values, instead of optimizing optimizing other parameters assuming the noisy constant's nominal values, and then changing the noisy constant's value afterwords. I believe this is desirable because the sampler can explore desirable parameter values under different noisy constant values.
1 parent 6818488 commit 009c86f

File tree

2 files changed

+91
-9
lines changed

2 files changed

+91
-9
lines changed

package/samplers/value_at_risk/_gp/acqf.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ def __init__(
118118
threshold_list: list[float],
119119
n_input_noise_samples: int,
120120
qmc_seed: int | None,
121+
fixed_indices: torch.Tensor,
122+
fixed_values: torch.Tensor,
121123
uniform_input_noise_rads: torch.Tensor | None = None,
122124
normal_input_noise_stdevs: torch.Tensor | None = None,
123125
stabilizing_noise: float = 1e-12,
@@ -133,12 +135,22 @@ def __init__(
133135
)
134136
self._stabilizing_noise = stabilizing_noise
135137
self._confidence_level = confidence_level
138+
self._fixed_indices = fixed_indices
139+
self._fixed_values = fixed_values
136140
super().__init__(
137141
length_scales=np.mean([gpr.length_scales for gpr in gpr_list], axis=0),
138142
search_space=search_space,
139143
)
140144

141145
def eval_acqf(self, x: torch.Tensor) -> torch.Tensor:
146+
# The search space of constant noisy parameters is internally replaced with IntDistribution(0, 0),
147+
# so that their normalized values passed as x is always 0.5. However, values passed to
148+
# const_noisy_param_values argument of RobustGPSampler._get_value_at_risk may be normalized to
149+
# different values under the original search space used by GPRegressor. So we carry around the
150+
# normalized version of const_noisy_param_values explicity and use them instead.
151+
x = x.clone()
152+
x[:, self._fixed_indices] = self._fixed_values
153+
142154
x_noisy = x.unsqueeze(-2) + self._input_noise
143155
log_feas_probs = torch.zeros(x_noisy.shape[:-1], dtype=torch.float64)
144156
for gpr, threshold in zip(self._gpr_list, self._threshold_list):
@@ -168,6 +180,8 @@ def __init__(
168180
n_qmc_samples: int,
169181
qmc_seed: int | None,
170182
acqf_type: str,
183+
fixed_indices: torch.Tensor,
184+
fixed_values: torch.Tensor,
171185
uniform_input_noise_rads: torch.Tensor | None = None,
172186
normal_input_noise_stdevs: torch.Tensor | None = None,
173187
) -> None:
@@ -187,6 +201,8 @@ def __init__(
187201
seed=rng.random_integers(0, 2**31 - 1, size=1).item(),
188202
)
189203
self._acqf_type = acqf_type
204+
self._fixed_indices = fixed_indices
205+
self._fixed_values = fixed_values
190206
super().__init__(length_scales=gpr.length_scales, search_space=search_space)
191207

192208
def _value_at_risk(self, x: torch.Tensor) -> torch.Tensor:
@@ -206,6 +222,15 @@ def eval_acqf(self, x: torch.Tensor) -> torch.Tensor:
206222
4. Then compute (mc_value_at_risk - f0).clamp_min(0).mean()
207223
Appendix B.2 of https://www.robots.ox.ac.uk/~mosb/public/pdf/136/full_thesis.pdf
208224
"""
225+
226+
# The search space of constant noisy parameters is internally replaced with IntDistribution(0, 0),
227+
# so that their normalized values passed as x is always 0.5. However, values passed to
228+
# const_noisy_param_values argument of RobustGPSampler._get_value_at_risk may be normalized to
229+
# different values under the original search space used by GPRegressor. So we carry around the
230+
# normalized version of const_noisy_param_values explicity and use them instead.
231+
x = x.clone()
232+
x[:, self._fixed_indices] = self._fixed_values
233+
209234
if self._acqf_type == "mean":
210235
return self._value_at_risk(x).mean(dim=-1)
211236
elif self._acqf_type == "nei":
@@ -227,6 +252,8 @@ def __init__(
227252
n_qmc_samples: int,
228253
qmc_seed: int | None,
229254
acqf_type: str,
255+
fixed_indices: torch.Tensor,
256+
fixed_values: torch.Tensor,
230257
uniform_input_noise_rads: torch.Tensor | None = None,
231258
normal_input_noise_stdevs: torch.Tensor | None = None,
232259
stabilizing_noise: float = 1e-12,
@@ -241,6 +268,8 @@ def __init__(
241268
acqf_type=acqf_type,
242269
uniform_input_noise_rads=uniform_input_noise_rads,
243270
normal_input_noise_stdevs=normal_input_noise_stdevs,
271+
fixed_indices=fixed_indices,
272+
fixed_values=fixed_values,
244273
)
245274
self._log_prob_at_risk = LogCumulativeProbabilityAtRisk(
246275
gpr_list=constraints_gpr_list,
@@ -252,6 +281,8 @@ def __init__(
252281
uniform_input_noise_rads=uniform_input_noise_rads,
253282
normal_input_noise_stdevs=normal_input_noise_stdevs,
254283
stabilizing_noise=stabilizing_noise,
284+
fixed_indices=fixed_indices,
285+
fixed_values=fixed_values,
255286
)
256287
assert torch.allclose(
257288
self._log_prob_at_risk._input_noise, self._value_at_risk._input_noise

package/samplers/value_at_risk/sampler.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ def __init__(
120120
uniform_input_noise_rads: dict[str, float] | None = None,
121121
normal_input_noise_stdevs: dict[str, float] | None = None,
122122
const_noisy_param_names: list[str] | None = None,
123+
const_noisy_param_nominal_values: dict[str, float] | None = None,
123124
) -> None:
124125
if uniform_input_noise_rads is None and normal_input_noise_stdevs is None:
125126
raise ValueError(
@@ -150,6 +151,7 @@ def __init__(
150151
self._uniform_input_noise_rads = uniform_input_noise_rads
151152
self._normal_input_noise_stdevs = normal_input_noise_stdevs
152153
self._const_noisy_param_names = const_noisy_param_names or []
154+
self._const_noisy_param_nominal_values = const_noisy_param_nominal_values or {}
153155
self._rng = LazyRandomState(seed)
154156
self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
155157
self._intersection_search_space = optuna.search_space.IntersectionSearchSpace()
@@ -262,7 +264,8 @@ def _get_constraints_acqf_args(
262264
return constraints_gprs, constraints_threshold_list
263265

264266
def _get_internal_search_space_with_fixed_params(
265-
self, search_space: dict[str, BaseDistribution]
267+
self,
268+
search_space: dict[str, BaseDistribution],
266269
) -> gp_search_space.SearchSpace:
267270
search_space_with_fixed_params = search_space.copy()
268271
for param_name in self._const_noisy_param_names:
@@ -275,6 +278,7 @@ def _get_value_at_risk(
275278
internal_search_space: gp_search_space.SearchSpace,
276279
search_space: dict[str, BaseDistribution],
277280
acqf_type: str,
281+
const_noisy_param_values: dict[str, float],
278282
constraints_gpr_list: list[gp.GPRegressor] | None = None,
279283
constraints_threshold_list: list[float] | None = None,
280284
) -> acqf_module.ValueAtRisk | acqf_module.ConstrainedLogValueAtRisk:
@@ -313,17 +317,37 @@ def _get_scaled_input_noise_params(
313317
for i, param_name in enumerate(search_space)
314318
if param_name in self._const_noisy_param_names
315319
]
320+
321+
def normalize(dist: BaseDistribution, x: float) -> float:
322+
assert isinstance(
323+
dist,
324+
(optuna.distributions.IntDistribution, optuna.distributions.FloatDistribution),
325+
)
326+
return (x - dist.low) / (dist.high - dist.low)
327+
328+
const_noisy_param_normalized_values = [
329+
normalize(dist, const_noisy_param_values[param_name])
330+
if param_name in const_noisy_param_values
331+
else 0.5
332+
for i, (param_name, dist) in enumerate(search_space.items())
333+
if param_name in self._const_noisy_param_names
334+
]
335+
316336
if self._uniform_input_noise_rads is not None:
317337
scaled_input_noise_params = _get_scaled_input_noise_params(
318338
self._uniform_input_noise_rads, "uniform_input_noise_rads"
319339
)
340+
# FIXME(sakai): If the fixed value is not at the center of the range,
341+
# \pm 0.5 may not cover the domain.
320342
scaled_input_noise_params[const_noise_param_inds] = 0.5
321343
noise_kwargs["uniform_input_noise_rads"] = scaled_input_noise_params
322344
elif self._normal_input_noise_stdevs is not None:
323345
scaled_input_noise_params = _get_scaled_input_noise_params(
324346
self._normal_input_noise_stdevs, "normal_input_noise_stdevs"
325347
)
326348
# NOTE(nabenabe): \pm 2 sigma will cover the domain.
349+
# FIXME(sakai): If the fixed value is not at the center of the range,
350+
# \pm 2 sigma may not cover the domain.
327351
scaled_input_noise_params[const_noise_param_inds] = 0.25
328352
noise_kwargs["normal_input_noise_stdevs"] = scaled_input_noise_params
329353
else:
@@ -341,6 +365,10 @@ def _get_scaled_input_noise_params(
341365
n_qmc_samples=self._n_qmc_samples,
342366
qmc_seed=self._rng.rng.randint(1 << 30),
343367
acqf_type=acqf_type,
368+
fixed_indices=torch.tensor(const_noise_param_inds, dtype=torch.int64),
369+
fixed_values=torch.tensor(
370+
const_noisy_param_normalized_values, dtype=torch.float64
371+
),
344372
**noise_kwargs,
345373
)
346374
else:
@@ -355,6 +383,10 @@ def _get_scaled_input_noise_params(
355383
n_qmc_samples=self._n_qmc_samples,
356384
qmc_seed=self._rng.rng.randint(1 << 30),
357385
acqf_type=acqf_type,
386+
fixed_indices=torch.tensor(const_noise_param_inds, dtype=torch.int64),
387+
fixed_values=torch.tensor(
388+
const_noisy_param_normalized_values, dtype=torch.float64
389+
),
358390
**noise_kwargs,
359391
)
360392

@@ -419,7 +451,11 @@ def _get_gpr_list(
419451
return gprs_list
420452

421453
def _optimize_params(
422-
self, study: Study, trials: list[FrozenTrial], search_space: dict[str, BaseDistribution]
454+
self,
455+
study: Study,
456+
trials: list[FrozenTrial],
457+
search_space: dict[str, BaseDistribution],
458+
const_noisy_param_values: dict[str, float],
423459
) -> dict[str, Any]:
424460
if search_space == {}:
425461
return {}
@@ -437,6 +473,7 @@ def _optimize_params(
437473
internal_search_space,
438474
search_space,
439475
acqf_type="mean",
476+
const_noisy_param_values=const_noisy_param_values,
440477
)
441478
else:
442479
constraint_vals, _ = _get_constraint_vals_and_feasibility(study, trials)
@@ -453,10 +490,18 @@ def _optimize_params(
453490
acqf_type="mean",
454491
constraints_gpr_list=constr_gpr_list,
455492
constraints_threshold_list=constr_threshold_list,
493+
const_noisy_param_values=const_noisy_param_values,
456494
)
457495

458496
normalized_param = self._optimize_acqf(acqf)
459-
return internal_search_space.get_unnormalized_param(normalized_param)
497+
# The normalized values of constant noise parameters are fixed at 0.5 during search
498+
# regardless of their original values given as const_noisy_param_values, so
499+
# `internal_search_space.get_unnormalized_param` cannot decode them correctly.
500+
# Therefore, we overwrite those values with their original values.
501+
return (
502+
internal_search_space.get_unnormalized_param(normalized_param)
503+
| const_noisy_param_values
504+
)
460505

461506
def sample_relative(
462507
self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution]
@@ -465,15 +510,14 @@ def sample_relative(
465510
if len(trials) < self._n_startup_trials:
466511
return {}
467512

468-
params = self._optimize_params(study, trials, search_space)
469-
470513
# Perturb constant noisy parameter uniformly
514+
const_noisy_param_values = {}
471515
for name in self._const_noisy_param_names:
472516
dist = search_space[name]
473517
assert isinstance(dist, optuna.distributions.FloatDistribution)
474-
params[name] = self._rng.rng.uniform(dist.low, dist.high)
518+
const_noisy_param_values[name] = self._rng.rng.uniform(dist.low, dist.high)
475519

476-
return params
520+
return self._optimize_params(study, trials, search_space, const_noisy_param_values)
477521

478522
def get_robust_trial(self, study: Study) -> FrozenTrial:
479523
states = (TrialState.COMPLETE,)
@@ -485,7 +529,11 @@ def get_robust_trial(self, study: Study) -> FrozenTrial:
485529
acqf: acqf_module.BaseAcquisitionFunc
486530
if self._constraints_func is None:
487531
acqf = self._get_value_at_risk(
488-
gpr, internal_search_space, search_space, acqf_type="mean"
532+
gpr,
533+
internal_search_space,
534+
search_space,
535+
acqf_type="mean",
536+
const_noisy_param_values=self._const_noisy_param_nominal_values,
489537
)
490538
else:
491539
constraint_vals, _ = _get_constraint_vals_and_feasibility(study, trials)
@@ -502,6 +550,7 @@ def get_robust_trial(self, study: Study) -> FrozenTrial:
502550
acqf_type="mean",
503551
constraints_gpr_list=constr_gpr_list,
504552
constraints_threshold_list=constr_threshold_list,
553+
const_noisy_param_values=self._const_noisy_param_nominal_values,
505554
)
506555

507556
best_idx = np.argmax(acqf.eval_acqf_no_grad(X_train)).item()
@@ -511,7 +560,9 @@ def get_robust_params(self, study: Study) -> dict[str, Any]:
511560
states = (TrialState.COMPLETE,)
512561
trials = study._get_trials(deepcopy=False, states=states, use_cache=True)
513562
search_space = self.infer_relative_search_space(study, trials[0])
514-
return self._optimize_params(study, trials, search_space)
563+
return self._optimize_params(
564+
study, trials, search_space, self._const_noisy_param_nominal_values
565+
)
515566

516567
def sample_independent(
517568
self,

0 commit comments

Comments
 (0)