Skip to content

Commit

Permalink
added HyperOpt dev dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
ltiao committed Sep 26, 2020
1 parent 2301475 commit 19805d3
Show file tree
Hide file tree
Showing 14 changed files with 377 additions and 201 deletions.
16 changes: 8 additions & 8 deletions bore/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# `plugins.hpbandster` and place it here.


class Ledger:
class Record:

def __init__(self):
self.features = []
Expand Down Expand Up @@ -47,13 +47,13 @@ def to_dataframe(self):
loss=self.targets)
return frame


def is_duplicate(x, xs, rtol=1e-5, atol=1e-8):
# Clever ways of doing this would involve data structs. like KD-trees
# or locality sensitive hashing (LSH), but these are premature
# optimizations at this point, especially since the `any` below does lazy
# evaluation, i.e. is early stopped as soon as anything returns `True`.
return any(np.allclose(x_prev, x, rtol=rtol, atol=atol) for x_prev in xs)
def is_duplicate(self, x, rtol=1e-5, atol=1e-8):
# Clever ways of doing this would involve data structs. like KD-trees
# or locality sensitive hashing (LSH), but these are premature
# optimizations at this point, especially since the `any` below does lazy
# evaluation, i.e. is early stopped as soon as anything returns `True`.
return any(np.allclose(x_prev, x, rtol=rtol, atol=atol)
for x_prev in self.features)


minimize_multi_start = multi_start(minimizer_fn=minimize)
10 changes: 0 additions & 10 deletions bore/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,6 @@
import matplotlib.pyplot as plt


def plot_image_grid(ax, images, shape, nrows=20, ncols=None, cmap=None):

if ncols is None:
ncols = nrows

grid = images[:nrows*ncols].reshape(nrows, ncols, *shape).squeeze()

return ax.imshow(np.vstack(np.dstack(grid)), cmap=cmap)


def fill_between_stddev(X_pred, mean_pred, stddev_pred, n=1, ax=None, *args,
**kwargs):

Expand Down
41 changes: 23 additions & 18 deletions bore/plugins/hpbandster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from tensorflow.keras.losses import BinaryCrossentropy
import scipy.stats as sps

from ..engine import Ledger, minimize_multi_start, is_duplicate
from ..engine import Record, minimize_multi_start
from ..types import DenseConfigurationSpace, DenseConfiguration
from ..models import DenseSequential
from ..decorators import unbatch, value_and_gradient, numpy_io
Expand All @@ -20,19 +20,21 @@ def __init__(self, config_space, eta=3, min_budget=0.01, max_budget=1,
num_restarts=10, batch_size=64, num_steps_per_iter=1000,
optimizer="adam", num_layers=2, num_units=32,
activation="relu", normalize=True, method="L-BFGS-B",
max_iter=100, ftol=1e-2, distortion=1e-3, seed=None, **kwargs):
max_iter=100, ftol=1e-2, distortion=None, seed=None, **kwargs):

if gamma is None:
gamma = 1/eta

cg = RatioEstimator(config_space=config_space, gamma=gamma,
num_random_init=num_random_init, random_rate=random_rate,
num_restarts=num_restarts, batch_size=batch_size,
num_random_init=num_random_init,
random_rate=random_rate, num_restarts=num_restarts,
batch_size=batch_size,
num_steps_per_iter=num_steps_per_iter,
optimizer=optimizer, num_layers=num_layers,
num_units=num_units, activation=activation,
normalize=normalize, method=method,
max_iter=max_iter, ftol=ftol, distortion=distortion, seed=seed)
max_iter=max_iter, ftol=ftol,
distortion=distortion, seed=seed)
# (LT): Note this is using the *grandparent* class initializer to
# replace the config_generator!
super(HyperBand, self).__init__(config_generator=cg, **kwargs)
Expand Down Expand Up @@ -71,7 +73,7 @@ def __init__(self, config_space, gamma=1/3, num_random_init=10,
random_rate=0.25, num_restarts=3, batch_size=64,
num_steps_per_iter=1000, optimizer="adam", num_layers=2,
num_units=32, activation="relu", normalize=True,
method="L-BFGS-B", max_iter=100, ftol=1e-2, distortion=1e-3,
method="L-BFGS-B", max_iter=100, ftol=1e-2, distortion=None,
seed=None, **kwargs):

super(RatioEstimator, self).__init__(**kwargs)
Expand Down Expand Up @@ -106,7 +108,7 @@ def __init__(self, config_space, gamma=1/3, num_random_init=10,
self.batch_size = batch_size
self.num_steps_per_iter = num_steps_per_iter

self.ledger = Ledger()
self.record = Record()

self.seed = seed
self.random_state = np.random.RandomState(seed)
Expand Down Expand Up @@ -151,9 +153,9 @@ def loss(x):

def _update_model(self):

X, z = self.ledger.load_classification_data(self.gamma)
X, z = self.record.load_classification_data(self.gamma)

dataset_size = self.ledger.size()
dataset_size = self.record.size()
steps_per_epoch = self._get_steps_per_epoch(dataset_size)
num_epochs = self.num_steps_per_iter // steps_per_epoch

Expand Down Expand Up @@ -189,8 +191,10 @@ def _get_maximum(self):
f" ({res.message})")

# TODO(LT): Create Enum type for these status codes
if (res.status == 0 or res.status == 9) and \
not is_duplicate(res.x, self.ledger.features):
# status == 1 signifies maximum iteration reached, which we don't
# want to treat as a failure condition.
if (res.success or res.status == 1) and \
not self.record.is_duplicate(res.x):
# if (res_best is not None) *implies* (res.fun < res_best.fun)
# (i.e. material implication) is logically equivalent to below
if res_best is None or res.fun < res_best.fun:
Expand All @@ -200,20 +204,20 @@ def _get_maximum(self):

def get_config(self, budget):

dataset_size = self.ledger.size()
dataset_size = self.record.size()

config_random = self.config_space.sample_configuration()
config_random_dict = config_random.get_dictionary()

if dataset_size < self.num_random_init:
self.logger.debug(f"Completed {dataset_size}/{self.num_random_init}"
" initial runs. Returning random candidate...")
" initial runs. Suggesting random candidate...")
return (config_random_dict, {})

if self.random_state.binomial(p=self.random_rate, n=1):
self.logger.info("[Glob. maximum: skipped "
f"(prob={self.random_rate:.2f})] "
"Returning random candidate ...")
"Suggesting random candidate ...")
return (config_random_dict, {})

# Update model
Expand All @@ -227,11 +231,9 @@ def get_config(self, budget):
self.logger.warn("[Glob. maximum: not found!] Either optimization "
f"failed in all {self.num_restarts} starts, or "
"all maxima found have been evaluated previously!"
" Returning random candidate...")
" Suggesting random candidate...")
return (config_random_dict, {})

self.logger.info(f"[Glob. maximum: value={-opt.fun:.3f}, x={opt.x}")

loc = opt.x

if self.distortion is None:
Expand All @@ -246,6 +248,9 @@ def get_config(self, budget):

config_opt_arr = dist.rvs(random_state=self.random_state)

self.logger.info(f"[Glob. maximum: value={-opt.fun:.3f} x={loc}] "
f"Suggesting x={config_opt_arr}")

config_opt_dict = self._dict_from_array(config_opt_arr)

return (config_opt_dict, {})
Expand All @@ -262,4 +267,4 @@ def new_result(self, job, update_model=True):

loss = job.result["loss"]

self.ledger.append(x=config_arr, y=loss, b=budget)
self.record.append(x=config_arr, y=loss, b=budget)
26 changes: 0 additions & 26 deletions bore/utils.py
Original file line number Diff line number Diff line change
@@ -1,26 +0,0 @@
import pandas as pd


def dataframe_from_result(results):

rows = []

for task, config_id in enumerate(results.data):

d = results.data[config_id]
bracket, _, _ = config_id

for epoch in d.results:

row = dict(task=task,
bracket=bracket,
epoch=int(epoch),
loss=d.results[epoch]["loss"],
info=d.results[epoch]["info"],
submitted=d.time_stamps[epoch]["submitted"],
started=d.time_stamps[epoch]["started"],
finished=d.time_stamps[epoch]["finished"])
row.update(d.config)
rows.append(row)

return pd.DataFrame(rows)
5 changes: 0 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
tensorflow-probability==0.11.0
tqdm>=4.48.2
# numpy>=1.17.3
scipy>=1.4.1
scikit-learn>=0.23.2
pandas>=1.1.1
seaborn>=0.11.0
tables>=3.6.1
statsmodels>=0.12.0

GPy==1.9.9
gpyopt==1.2.6
4 changes: 4 additions & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ nbsphinx
sphinx_bootstrap_theme
sphinx-gallery>=0.7.0
pillow

GPy==1.9.9
gpyopt==1.2.6
hyperopt
48 changes: 17 additions & 31 deletions scripts/plotting/plot_result.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import click
import yaml

import pandas as pd

Expand All @@ -10,28 +11,27 @@
from utils import (GOLDEN_RATIO, WIDTH, size, load_frame, extract_series,
merge_stack_series, get_error_mins, sanitize, get_ci)

OUTPUT_DIR = "figures/"


@click.command()
@click.argument("benchmark_name")
@click.argument("input_dir", default="results",
@click.argument("input_dir", default="results/",
type=click.Path(file_okay=False, dir_okay=True))
@click.argument("output_dir", default="figures/",
type=click.Path(file_okay=False, dir_okay=True))
@click.option('--num-runs', '-n', default=20)
@click.option('--methods', '-m', multiple=True)
@click.option('--ci')
@click.option('--duration-key', default="info")
@click.option('--duration-key', default=None)
@click.option('--context', default="paper")
@click.option('--style', default="ticks")
@click.option('--palette', default="muted")
@click.option('--width', '-w', type=float, default=WIDTH)
@click.option('--aspect', '-a', type=float, default=GOLDEN_RATIO)
@click.option('--extension', '-e', multiple=True, default=["png"])
@click.option("--output-dir", default=OUTPUT_DIR,
type=click.Path(file_okay=False, dir_okay=True),
help="Output directory.")
def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
context, style, palette, width, aspect, extension, output_dir):
@click.option("--config-file", type=click.File('r'))
def main(benchmark_name, input_dir, output_dir, num_runs, methods, ci,
duration_key, context, style, palette, width, aspect, extension,
config_file):

figsize = size(width, aspect)
height = width / aspect
Expand All @@ -48,19 +48,8 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
output_path = Path(output_dir).joinpath(benchmark_name)
output_path.mkdir(parents=True, exist_ok=True)

METHOD_PRETTY_NAMES = {
"random": "Random",
"tpe": "TPE",
"bore": "BORE",
"bore-steps-50": "BORE (50 steps)",
"bore-steps": "BORE (250 steps)",
"boredom": "BOREDOM",
"boredom-real": "BORE",
"bore-sigmoid-elu-ftol-1e-2-gamma-0.33333333333333333333": "BORE",
"boredom-real": "BORE",
"bore-logit-elu-ftol-1e-2-gamma-0.33333333333333333333": "BORE",
"bore-sigmoid-elu-ftol-1e-9-random-0.1": "BORE"
}
config = yaml.safe_load(config_file) if config_file else {}
method_names_mapping = config.get("names", {})

loss_min = get_error_mins(benchmark_name, input_dir,
data_dir="datasets/fcnet_tabular_benchmarks")
Expand All @@ -78,28 +67,25 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
frame = load_frame(path, run, loss_min=loss_min,
duration_key=duration_key)
frames.append(frame.assign(method=method))

series[run] = extract_series(frame, index="elapsed", column="regret")

frame_merged = merge_stack_series(series, y_key="regret")
frames_merged.append(frame_merged.assign(method=method))

data = pd.concat(frames, axis="index", ignore_index=True, sort=True)
data = sanitize(data, mapping=METHOD_PRETTY_NAMES)
data = sanitize(data, mapping=method_names_mapping)

data_merged = pd.concat(frames_merged, axis="index", ignore_index=True, sort=True)
data_merged = sanitize(data_merged, mapping=METHOD_PRETTY_NAMES)

print(data_merged)
data_merged = sanitize(data_merged, mapping=method_names_mapping)

hue_order = style_order = list(map(METHOD_PRETTY_NAMES.get, methods))
hue_order = style_order = list(map(method_names_mapping.get, methods))

fig, ax = plt.subplots()
sns.despine(fig=fig, ax=ax, top=True)

sns.lineplot(x="evaluation", y="regret",
hue="method", # hue_order=hue_order,
style="method", # style_order=style_order,
# style="method", style_order=style_order,
# units="run", estimator=None,
ci=get_ci(ci), err_kws=dict(edgecolor='none'),
data=data, ax=ax)
Expand All @@ -120,7 +106,7 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,

sns.lineplot(x="elapsed", y="regret",
hue="method", # hue_order=hue_order,
style="method", # style_order=style_order,
# style="method", style_order=style_order,
# units="run", estimator=None,
ci=get_ci(ci), err_kws=dict(edgecolor='none'),
data=data_merged, ax=ax)
Expand Down Expand Up @@ -155,7 +141,7 @@ def main(benchmark_name, input_dir, num_runs, methods, ci, duration_key,
# # for ext in extension:
# # g.savefig(output_path.joinpath(f"error_vs_iterations_{context}_{suffix}.{ext}"))

# return 0
return 0


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 19805d3

Please sign in to comment.