Skip to content

Commit 19805d3

Browse files
committed
added HyperOpt dev dependency
1 parent 2301475 commit 19805d3

File tree

14 files changed

+377
-201
lines changed

14 files changed

+377
-201
lines changed

bore/engine.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# `plugins.hpbandster` and place it here.
1010

1111

12-
class Ledger:
12+
class Record:
1313

1414
def __init__(self):
1515
self.features = []
@@ -47,13 +47,13 @@ def to_dataframe(self):
4747
loss=self.targets)
4848
return frame
4949

50-
51-
def is_duplicate(x, xs, rtol=1e-5, atol=1e-8):
52-
# Clever ways of doing this would involve data structs. like KD-trees
53-
# or locality sensitive hashing (LSH), but these are premature
54-
# optimizations at this point, especially since the `any` below does lazy
55-
# evaluation, i.e. is early stopped as soon as anything returns `True`.
56-
return any(np.allclose(x_prev, x, rtol=rtol, atol=atol) for x_prev in xs)
50+
def is_duplicate(self, x, rtol=1e-5, atol=1e-8):
51+
# Clever ways of doing this would involve data structs. like KD-trees
52+
# or locality sensitive hashing (LSH), but these are premature
53+
# optimizations at this point, especially since the `any` below does lazy
54+
# evaluation, i.e. is early stopped as soon as anything returns `True`.
55+
return any(np.allclose(x_prev, x, rtol=rtol, atol=atol)
56+
for x_prev in self.features)
5757

5858

5959
minimize_multi_start = multi_start(minimizer_fn=minimize)

bore/plotting.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,6 @@
44
import matplotlib.pyplot as plt
55

66

7-
def plot_image_grid(ax, images, shape, nrows=20, ncols=None, cmap=None):
8-
9-
if ncols is None:
10-
ncols = nrows
11-
12-
grid = images[:nrows*ncols].reshape(nrows, ncols, *shape).squeeze()
13-
14-
return ax.imshow(np.vstack(np.dstack(grid)), cmap=cmap)
15-
16-
177
def fill_between_stddev(X_pred, mean_pred, stddev_pred, n=1, ax=None, *args,
188
**kwargs):
199

bore/plugins/hpbandster.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from tensorflow.keras.losses import BinaryCrossentropy
55
import scipy.stats as sps
66

7-
from ..engine import Ledger, minimize_multi_start, is_duplicate
7+
from ..engine import Record, minimize_multi_start
88
from ..types import DenseConfigurationSpace, DenseConfiguration
99
from ..models import DenseSequential
1010
from ..decorators import unbatch, value_and_gradient, numpy_io
@@ -20,19 +20,21 @@ def __init__(self, config_space, eta=3, min_budget=0.01, max_budget=1,
2020
num_restarts=10, batch_size=64, num_steps_per_iter=1000,
2121
optimizer="adam", num_layers=2, num_units=32,
2222
activation="relu", normalize=True, method="L-BFGS-B",
23-
max_iter=100, ftol=1e-2, distortion=1e-3, seed=None, **kwargs):
23+
max_iter=100, ftol=1e-2, distortion=None, seed=None, **kwargs):
2424

2525
if gamma is None:
2626
gamma = 1/eta
2727

2828
cg = RatioEstimator(config_space=config_space, gamma=gamma,
29-
num_random_init=num_random_init, random_rate=random_rate,
30-
num_restarts=num_restarts, batch_size=batch_size,
29+
num_random_init=num_random_init,
30+
random_rate=random_rate, num_restarts=num_restarts,
31+
batch_size=batch_size,
3132
num_steps_per_iter=num_steps_per_iter,
3233
optimizer=optimizer, num_layers=num_layers,
3334
num_units=num_units, activation=activation,
3435
normalize=normalize, method=method,
35-
max_iter=max_iter, ftol=ftol, distortion=distortion, seed=seed)
36+
max_iter=max_iter, ftol=ftol,
37+
distortion=distortion, seed=seed)
3638
# (LT): Note this is using the *grandparent* class initializer to
3739
# replace the config_generator!
3840
super(HyperBand, self).__init__(config_generator=cg, **kwargs)
@@ -71,7 +73,7 @@ def __init__(self, config_space, gamma=1/3, num_random_init=10,
7173
random_rate=0.25, num_restarts=3, batch_size=64,
7274
num_steps_per_iter=1000, optimizer="adam", num_layers=2,
7375
num_units=32, activation="relu", normalize=True,
74-
method="L-BFGS-B", max_iter=100, ftol=1e-2, distortion=1e-3,
76+
method="L-BFGS-B", max_iter=100, ftol=1e-2, distortion=None,
7577
seed=None, **kwargs):
7678

7779
super(RatioEstimator, self).__init__(**kwargs)
@@ -106,7 +108,7 @@ def __init__(self, config_space, gamma=1/3, num_random_init=10,
106108
self.batch_size = batch_size
107109
self.num_steps_per_iter = num_steps_per_iter
108110

109-
self.ledger = Ledger()
111+
self.record = Record()
110112

111113
self.seed = seed
112114
self.random_state = np.random.RandomState(seed)
@@ -151,9 +153,9 @@ def loss(x):
151153

152154
def _update_model(self):
153155

154-
X, z = self.ledger.load_classification_data(self.gamma)
156+
X, z = self.record.load_classification_data(self.gamma)
155157

156-
dataset_size = self.ledger.size()
158+
dataset_size = self.record.size()
157159
steps_per_epoch = self._get_steps_per_epoch(dataset_size)
158160
num_epochs = self.num_steps_per_iter // steps_per_epoch
159161

@@ -189,8 +191,10 @@ def _get_maximum(self):
189191
f" ({res.message})")
190192

191193
# TODO(LT): Create Enum type for these status codes
192-
if (res.status == 0 or res.status == 9) and \
193-
not is_duplicate(res.x, self.ledger.features):
194+
# status == 1 signifies maximum iteration reached, which we don't
195+
# want to treat as a failure condition.
196+
if (res.success or res.status == 1) and \
197+
not self.record.is_duplicate(res.x):
194198
# if (res_best is not None) *implies* (res.fun < res_best.fun)
195199
# (i.e. material implication) is logically equivalent to below
196200
if res_best is None or res.fun < res_best.fun:
@@ -200,20 +204,20 @@ def _get_maximum(self):
200204

201205
def get_config(self, budget):
202206

203-
dataset_size = self.ledger.size()
207+
dataset_size = self.record.size()
204208

205209
config_random = self.config_space.sample_configuration()
206210
config_random_dict = config_random.get_dictionary()
207211

208212
if dataset_size < self.num_random_init:
209213
self.logger.debug(f"Completed {dataset_size}/{self.num_random_init}"
210-
" initial runs. Returning random candidate...")
214+
" initial runs. Suggesting random candidate...")
211215
return (config_random_dict, {})
212216

213217
if self.random_state.binomial(p=self.random_rate, n=1):
214218
self.logger.info("[Glob. maximum: skipped "
215219
f"(prob={self.random_rate:.2f})] "
216-
"Returning random candidate ...")
220+
"Suggesting random candidate ...")
217221
return (config_random_dict, {})
218222

219223
# Update model
@@ -227,11 +231,9 @@ def get_config(self, budget):
227231
self.logger.warn("[Glob. maximum: not found!] Either optimization "
228232
f"failed in all {self.num_restarts} starts, or "
229233
"all maxima found have been evaluated previously!"
230-
" Returning random candidate...")
234+
" Suggesting random candidate...")
231235
return (config_random_dict, {})
232236

233-
self.logger.info(f"[Glob. maximum: value={-opt.fun:.3f}, x={opt.x}")
234-
235237
loc = opt.x
236238

237239
if self.distortion is None:
@@ -246,6 +248,9 @@ def get_config(self, budget):
246248

247249
config_opt_arr = dist.rvs(random_state=self.random_state)
248250

251+
self.logger.info(f"[Glob. maximum: value={-opt.fun:.3f} x={loc}] "
252+
f"Suggesting x={config_opt_arr}")
253+
249254
config_opt_dict = self._dict_from_array(config_opt_arr)
250255

251256
return (config_opt_dict, {})
@@ -262,4 +267,4 @@ def new_result(self, job, update_model=True):
262267

263268
loss = job.result["loss"]
264269

265-
self.ledger.append(x=config_arr, y=loss, b=budget)
270+
self.record.append(x=config_arr, y=loss, b=budget)

bore/utils.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +0,0 @@
1-
import pandas as pd
2-
3-
4-
def dataframe_from_result(results):
5-
6-
rows = []
7-
8-
for task, config_id in enumerate(results.data):
9-
10-
d = results.data[config_id]
11-
bracket, _, _ = config_id
12-
13-
for epoch in d.results:
14-
15-
row = dict(task=task,
16-
bracket=bracket,
17-
epoch=int(epoch),
18-
loss=d.results[epoch]["loss"],
19-
info=d.results[epoch]["info"],
20-
submitted=d.time_stamps[epoch]["submitted"],
21-
started=d.time_stamps[epoch]["started"],
22-
finished=d.time_stamps[epoch]["finished"])
23-
row.update(d.config)
24-
rows.append(row)
25-
26-
return pd.DataFrame(rows)

requirements.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
tensorflow-probability==0.11.0
21
tqdm>=4.48.2
3-
# numpy>=1.17.3
42
scipy>=1.4.1
53
scikit-learn>=0.23.2
64
pandas>=1.1.1
75
seaborn>=0.11.0
86
tables>=3.6.1
97
statsmodels>=0.12.0
10-
11-
GPy==1.9.9
12-
gpyopt==1.2.6

requirements_dev.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,7 @@ nbsphinx
1616
sphinx_bootstrap_theme
1717
sphinx-gallery>=0.7.0
1818
pillow
19+
20+
GPy==1.9.9
21+
gpyopt==1.2.6
22+
hyperopt

scripts/plotting/plot_result.py

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import sys
22
import click
3+
import yaml
34

45
import pandas as pd
56

@@ -10,28 +11,27 @@
1011
from utils import (GOLDEN_RATIO, WIDTH, size, load_frame, extract_series,
1112
merge_stack_series, get_error_mins, sanitize, get_ci)
1213

13-
OUTPUT_DIR = "figures/"
14-
1514

1615
@click.command()
1716
@click.argument("benchmark_name")
18-
@click.argument("input_dir", default="results",
17+
@click.argument("input_dir", default="results/",
18+
type=click.Path(file_okay=False, dir_okay=True))
19+
@click.argument("output_dir", default="figures/",
1920
type=click.Path(file_okay=False, dir_okay=True))
2021
@click.option('--num-runs', '-n', default=20)
2122
@click.option('--methods', '-m', multiple=True)
2223
@click.option('--ci')
23-
@click.option('--duration-key', default="info")
24+
@click.option('--duration-key', default=None)
2425
@click.option('--context', default="paper")
2526
@click.option('--style', default="ticks")
2627
@click.option('--palette', default="muted")
2728
@click.option('--width', '-w', type=float, default=WIDTH)
2829
@click.option('--aspect', '-a', type=float, default=GOLDEN_RATIO)
2930
@click.option('--extension', '-e', multiple=True, default=["png"])
30-
@click.option("--output-dir", default=OUTPUT_DIR,
31-
type=click.Path(file_okay=False, dir_okay=True),
32-
help="Output directory.")
33-
def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
34-
context, style, palette, width, aspect, extension, output_dir):
31+
@click.option("--config-file", type=click.File('r'))
32+
def main(benchmark_name, input_dir, output_dir, num_runs, methods, ci,
33+
duration_key, context, style, palette, width, aspect, extension,
34+
config_file):
3535

3636
figsize = size(width, aspect)
3737
height = width / aspect
@@ -48,19 +48,8 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
4848
output_path = Path(output_dir).joinpath(benchmark_name)
4949
output_path.mkdir(parents=True, exist_ok=True)
5050

51-
METHOD_PRETTY_NAMES = {
52-
"random": "Random",
53-
"tpe": "TPE",
54-
"bore": "BORE",
55-
"bore-steps-50": "BORE (50 steps)",
56-
"bore-steps": "BORE (250 steps)",
57-
"boredom": "BOREDOM",
58-
"boredom-real": "BORE",
59-
"bore-sigmoid-elu-ftol-1e-2-gamma-0.33333333333333333333": "BORE",
60-
"boredom-real": "BORE",
61-
"bore-logit-elu-ftol-1e-2-gamma-0.33333333333333333333": "BORE",
62-
"bore-sigmoid-elu-ftol-1e-9-random-0.1": "BORE"
63-
}
51+
config = yaml.safe_load(config_file) if config_file else {}
52+
method_names_mapping = config.get("names", {})
6453

6554
loss_min = get_error_mins(benchmark_name, input_dir,
6655
data_dir="datasets/fcnet_tabular_benchmarks")
@@ -78,28 +67,25 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
7867
frame = load_frame(path, run, loss_min=loss_min,
7968
duration_key=duration_key)
8069
frames.append(frame.assign(method=method))
81-
8270
series[run] = extract_series(frame, index="elapsed", column="regret")
8371

8472
frame_merged = merge_stack_series(series, y_key="regret")
8573
frames_merged.append(frame_merged.assign(method=method))
8674

8775
data = pd.concat(frames, axis="index", ignore_index=True, sort=True)
88-
data = sanitize(data, mapping=METHOD_PRETTY_NAMES)
76+
data = sanitize(data, mapping=method_names_mapping)
8977

9078
data_merged = pd.concat(frames_merged, axis="index", ignore_index=True, sort=True)
91-
data_merged = sanitize(data_merged, mapping=METHOD_PRETTY_NAMES)
92-
93-
print(data_merged)
79+
data_merged = sanitize(data_merged, mapping=method_names_mapping)
9480

95-
hue_order = style_order = list(map(METHOD_PRETTY_NAMES.get, methods))
81+
hue_order = style_order = list(map(method_names_mapping.get, methods))
9682

9783
fig, ax = plt.subplots()
9884
sns.despine(fig=fig, ax=ax, top=True)
9985

10086
sns.lineplot(x="evaluation", y="regret",
10187
hue="method", # hue_order=hue_order,
102-
style="method", # style_order=style_order,
88+
# style="method", style_order=style_order,
10389
# units="run", estimator=None,
10490
ci=get_ci(ci), err_kws=dict(edgecolor='none'),
10591
data=data, ax=ax)
@@ -120,7 +106,7 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
120106

121107
sns.lineplot(x="elapsed", y="regret",
122108
hue="method", # hue_order=hue_order,
123-
style="method", # style_order=style_order,
109+
# style="method", style_order=style_order,
124110
# units="run", estimator=None,
125111
ci=get_ci(ci), err_kws=dict(edgecolor='none'),
126112
data=data_merged, ax=ax)
@@ -155,7 +141,7 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
155141
# # for ext in extension:
156142
# # g.savefig(output_path.joinpath(f"error_vs_iterations_{context}_{suffix}.{ext}"))
157143

158-
# return 0
144+
return 0
159145

160146

161147
if __name__ == "__main__":

0 commit comments

Comments
 (0)