Skip to content

Commit 6b8930f

Browse files
francesdingFrances Ding
andauthored
Allow LaMBO2 to optionally accept a config object at initialization (#76)
* update lambo2 to accept a constructed config at initialization and a logger Previously LaMBO2 only accepted a path to a config file at initialization and ran hydra initialization to compile this config. To allow for users to run hydra initialization themselves (and potentially include other config parameters besides those related to LaMBO2, such as black box parameters), this updates the initialization to also optionally accept a compiled config. This also adds an optional logger to track metrics and adds the fft expansion factor as a config param, which used to be hard-coded to 2. * make x0 and y0 consistent for black box * update config with new field * remove extra prints * ruff formatting --------- Co-authored-by: Frances Ding <dingf7@gene.com>
1 parent e4161b7 commit 6b8930f

File tree

2 files changed

+29
-10
lines changed

2 files changed

+29
-10
lines changed

src/poli_baselines/solvers/bayesian_optimization/lambo2/hydra_configs/generic_training.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ max_sequence_length: 256
2424
num_samples: ${batch_size}
2525
allow_length_change: false
2626
accelerator: cpu
27+
fft_expansion_factor: 2
2728

2829
trainer:
2930
_target_: lightning.Trainer

src/poli_baselines/solvers/bayesian_optimization/lambo2/solver.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,23 +101,28 @@ def __init__(
101101
black_box: AbstractBlackBox,
102102
x0: np.ndarray | None = None,
103103
y0: np.ndarray | None = None,
104+
config: OmegaConf | None = None,
104105
config_dir: Path | str | None = None,
105106
config_name: str = "generic_training",
106107
overrides: list[str] | None = None,
107108
seed: int | None = None,
108109
max_epochs_for_retraining: int = 1,
109110
restrict_candidate_points_to: np.ndarray | None = None,
111+
logger=None,
110112
):
111113
super().__init__(black_box=black_box, x0=x0, y0=y0)
112114
self.experiment_id = f"{uuid4()}"[:8]
113115
self.max_epochs_for_retraining = max_epochs_for_retraining
114116
self.restrict_candidate_points_to = restrict_candidate_points_to
115117

116-
if config_dir is None:
117-
config_dir = DEFAULT_CONFIG_DIR
118-
with hydra.initialize_config_dir(config_dir=str(config_dir)):
119-
cfg = hydra.compose(config_name=config_name, overrides=overrides)
120-
OmegaConf.set_struct(cfg, False)
118+
if config is None:
119+
if config_dir is None:
120+
config_dir = DEFAULT_CONFIG_DIR
121+
with hydra.initialize_config_dir(config_dir=str(config_dir)):
122+
cfg = hydra.compose(config_name=config_name, overrides=overrides)
123+
OmegaConf.set_struct(cfg, False)
124+
else:
125+
cfg = config
121126

122127
# Setting the random seed
123128
# We are ignoring the seed in the original config file.
@@ -129,6 +134,7 @@ def __init__(
129134

130135
self.cfg = cfg
131136
print(OmegaConf.to_yaml(cfg))
137+
self.logger = logger
132138

133139
if x0 is None:
134140
raise ValueError(
@@ -145,10 +151,12 @@ def __init__(
145151

146152
tokenizable_x0 = np.array([" ".join(x_i) for x_i in x0])
147153

154+
x0_for_black_box = np.array([seq.replace(" ", "") for seq in tokenizable_x0])
155+
148156
if y0 is None:
149-
y0 = self.black_box(x0)
157+
y0 = self.black_box(x0_for_black_box)
150158
elif y0.shape[0] < x0.shape[0]:
151-
y0 = np.vstack([y0, self.black_box(x0[original_size:])])
159+
y0 = np.vstack([y0, self.black_box(x0_for_black_box[original_size:])])
152160

153161
self.history_for_training = {
154162
"x": [tokenizable_x0],
@@ -322,8 +330,16 @@ def get_candidate_points_from_history(self) -> np.ndarray:
322330
x = np.concatenate(self.history_for_training["x"], axis=0)
323331
y = np.concatenate(self.history_for_training["y"], axis=0)
324332
sorted_y0_idxs = np.argsort(y.flatten())[::-1]
325-
candidate_points = x[sorted_y0_idxs[: min(len(x), 2 * self.cfg.num_samples)]]
326-
candidate_scores = y[sorted_y0_idxs[: min(len(x), 2 * self.cfg.num_samples)]]
333+
candidate_points = x[
334+
sorted_y0_idxs[
335+
: min(len(x), self.cfg.fft_expansion_factor * self.cfg.num_samples)
336+
]
337+
]
338+
candidate_scores = y[
339+
sorted_y0_idxs[
340+
: min(len(x), self.cfg.fft_expansion_factor * self.cfg.num_samples)
341+
]
342+
]
327343

328344
indices = farthest_first_traversal(
329345
library=candidate_points,
@@ -388,7 +404,9 @@ def step(self) -> tuple[np.ndarray, np.ndarray]:
388404
# Compute proposals using the optimizer
389405
for _ in range(self.cfg.num_steps):
390406
# Take a step on the optimizer, diffusing towards promising sequences.
391-
optimizer.step()
407+
metrics = optimizer.step()
408+
if self.logger:
409+
self.logger.log_metrics(metrics)
392410

393411
# Get the most promising sequences from the optimizer
394412
best_solutions = optimizer.get_best_solutions()

0 commit comments

Comments
 (0)