ltiao
diff --git a/‎bore/benchmarks.py‎
Lines changed: 22 additions & 2 deletions b/‎bore/benchmarks.py‎
Lines changed: 22 additions & 2 deletions
diff --git a/‎bore/engine.py‎
Lines changed: 77 additions & 56 deletions b/‎bore/engine.py‎
Lines changed: 77 additions & 56 deletions
diff --git a/‎bore/losses.py‎
Lines changed: 0 additions & 5 deletions b/‎bore/losses.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎bore/utils.py‎
Lines changed: 13 additions & 10 deletions b/‎bore/utils.py‎
Lines changed: 13 additions & 10 deletions
diff --git a/‎scripts/plotting/plot_result.py‎
Lines changed: 18 additions & 9 deletions b/‎scripts/plotting/plot_result.py‎
Lines changed: 18 additions & 9 deletions
@@ -8,6 +8,14 @@
                                 FCNetParkinsonsTelemonitoringBenchmark)
 
 
+def goldstein_price(x, y):
+
+    a = 1 + (x + y + 1)**2 * (19 - 14*x + 3*x**2 - 14*y + 6*x*y + 3*y**2)
+    b = 30 + (2*x - 3*y)**2 * (18 - 32*x + 12*x**2 - 48*y + 36*x*y + 27*y**2)
+
+    return a * b
+
+
 def branin(x, y, a=1.0, b=5.1/(4*np.pi**2), c=5.0/np.pi, r=6.0, s=10.0,
            t=1.0/(8*np.pi)):
     return a*(y - b * x**2 + c*x - r)**2 + s*(1 - t)*np.cos(x) + s
@@ -32,9 +40,7 @@ def hartmann(x, alpha, A, P):
 class BraninWorker(Worker):
 
     def compute(self, config, budget, **kwargs):
-
         y = branin(**config)
-
         return dict(loss=y, info=None)
 
     @staticmethod
@@ -45,6 +51,20 @@ def get_config_space():
         return cs
 
 
+class GoldsteinPriceWorker(Worker):
+
+    def compute(self, config, budget, **kwargs):
+        y = goldstein_price(**config)
+        return dict(loss=y, info=None)
+
+    @staticmethod
+    def get_config_space():
+        cs = CS.ConfigurationSpace()
+        cs.add_hyperparameter(CS.UniformFloatHyperparameter("x", lower=0, upper=1))
+        cs.add_hyperparameter(CS.UniformFloatHyperparameter("y", lower=0, upper=1))
+        return cs
+
+
 class BoreholeWorker(Worker):
 
     def compute(self, config, budget, **kwargs):
 
@@ -1,14 +1,13 @@
 import numpy as np
 import tensorflow as tf
 
-# from tensorflow.keras.initializers import GlorotUniform
-from tensorflow.keras.regularizers import l2
-
 from scipy.optimize import minimize
 
+from tensorflow.keras.losses import BinaryCrossentropy
+# from tensorflow.keras.initializers import GlorotUniform
+
 from .types import DenseConfigurationSpace, DenseConfiguration
 from .models import DenseSequential
-from .losses import binary_crossentropy_from_logits
 from .decorators import unbatch, value_and_gradient, numpy_io
 from .optimizers import multi_start
 
@@ -31,7 +30,8 @@ def __init__(self, config_space, eta=3, min_budget=0.01, max_budget=1,
                  gamma=None, num_random_init=10, random_rate=0.25,
                  num_restarts=10, batch_size=64, num_steps_per_iter=1000,
                  optimizer="adam", num_layers=2, num_units=32,
-                 activation="relu", seed=None, **kwargs):
+                 activation="relu", normalize=True, method="L-BFGS-B",
+                 max_iter=100, ftol=1e-2, seed=None, **kwargs):
 
         if gamma is None:
             gamma = 1/eta
@@ -41,7 +41,9 @@ def __init__(self, config_space, eta=3, min_budget=0.01, max_budget=1,
                  random_rate=random_rate, num_restarts=num_restarts,
                  batch_size=batch_size, num_steps_per_iter=num_steps_per_iter,
                  optimizer=optimizer, num_layers=num_layers,
-                 num_units=num_units, activation=activation, seed=seed)
+                 num_units=num_units, activation=activation,
+                 normalize=normalize, method=method, max_iter=max_iter,
+                 ftol=ftol, seed=seed)
         # (LT): Note this is using the *grandparent* class initializer to
         # replace the config_generator!
         super(HyperBand, self).__init__(config_generator=cg, **kwargs)
@@ -76,20 +78,32 @@ class DRE(base_config_generator):
     class to implement random sampling from a ConfigSpace
     """
     def __init__(self, config_space, gamma=1/3, num_random_init=10,
-                 random_rate=0.25, num_restarts=10, batch_size=64,
+                 random_rate=0.25, num_restarts=3, batch_size=64,
                  num_steps_per_iter=1000, optimizer="adam", num_layers=2,
-                 num_units=32, activation="relu", seed=None, **kwargs):
+                 num_units=32, activation="relu", normalize=True,
+                 method="L-BFGS-B", max_iter=100, ftol=1e-2, seed=None,
+                 **kwargs):
 
         super(DRE, self).__init__(**kwargs)
 
+        assert 0. <= gamma < 1., "`gamma` must be in [0, 1)"
+        assert 0. <= random_rate < 1., "`random_rate` must be in [0, 1)"
+        assert num_random_init > 0
+        assert num_restarts > 0
+
         self.config_space = DenseConfigurationSpace(config_space, seed=seed)
+        self.bounds = self.config_space.get_bounds()
+
+        self.logit = self._build_compile_network(num_layers, num_units,
+                                                 activation, optimizer)
+        self.loss = self._build_loss(self.logit, normalize=normalize)
+        self.minimizer = self._build_minimizer(num_restarts=num_restarts,
+                                               method=method, ftol=ftol,
+                                               max_iter=max_iter)
 
         self.gamma = gamma
         self.num_random_init = num_random_init
-
-        assert 0. <= random_rate <= 1., "random rate must be in [0, 1]"
         self.random_rate = random_rate
-
         self.num_restarts = num_restarts
 
         self.batch_size = batch_size
@@ -98,44 +112,60 @@ def __init__(self, config_space, gamma=1/3, num_random_init=10,
         self.config_arrs = []
         self.losses = []
 
-        l2_factor = 1e-4
-
-        self._init_model(num_layers, num_units, activation, optimizer, l2_factor)
-
         self.seed = seed
         self.random_state = np.random.RandomState(seed)
 
-    def _init_model(self, num_layers, num_units, activation, optimizer, l2_factor):
+    @staticmethod
+    def _build_compile_network(num_layers, num_units, activation, optimizer):
+
+        network = DenseSequential(output_dim=1,
+                                  num_layers=num_layers,
+                                  num_units=num_units,
+                                  layer_kws=dict(activation=activation))
+        network.compile(optimizer=optimizer, metrics=["accuracy"],
+                        loss=BinaryCrossentropy(from_logits=True))
+        return network
+
+    @staticmethod
+    def _build_loss(logit, normalize=True):
+
+        if normalize:
+            activation = tf.sigmoid
+        else:
+            activation = tf.identity
+
+        @numpy_io
+        @value_and_gradient
+        @unbatch
+        def loss(x):
+            return - activation(logit(x))
 
-        self.model = DenseSequential(output_dim=1,
-                                     num_layers=num_layers,
-                                     num_units=num_units,
-                                     layer_kws=dict(activation=activation,
-                                                    kernel_regularizer=l2(l2_factor))) # TODO(LT): make this an argument
-        self.model.compile(optimizer=optimizer, metrics=["accuracy"],
-                           loss=binary_crossentropy_from_logits)
+        return loss
 
     @staticmethod
-    def make_minimizer(num_restarts, method="L-BFGS-B", max_iter=10000,
-                       tol=1e-8):
+    def _build_minimizer(num_restarts, method="L-BFGS-B", max_iter=100,
+                         ftol=1e-2):
 
         @multi_start(num_restarts=num_restarts)
         def multi_start_minimizer(fn, x0, bounds):
             return minimize(fn, x0=x0, method=method, jac=True, bounds=bounds,
-                            tol=tol, options=dict(maxiter=max_iter))
+                            options=dict(maxiter=max_iter, ftol=ftol))
 
         return multi_start_minimizer
 
-    def make_minimizee(self):
-
-        @numpy_io
-        @value_and_gradient
-        @unbatch
-        def func(x):
+    def _load_data(self):
+        X = np.vstack(self.config_arrs)
+        y = np.hstack(self.losses)
+        return X, y
 
-            return - tf.sigmoid(self.model(x))
+    def _load_labels(self, y):
+        tau = np.quantile(y, q=self.gamma)
+        return np.less(y, tau)
 
-        return func
+    def _get_steps_per_epoch(self, dataset_size):
+        steps_per_epoch = int(np.ceil(np.true_divide(dataset_size,
+                                                     self.batch_size)))
+        return steps_per_epoch
 
     def get_config(self, budget):
 
@@ -155,20 +185,15 @@ def get_config(self, budget):
                              "Returning random candidate ...")
             return (config_random_dict, {})
 
-        # Model fitting
-        X = np.vstack(self.config_arrs)
-        y = np.hstack(self.losses)
-
-        y_threshold = np.quantile(y, q=self.gamma)
-        z = np.less_equal(y, y_threshold)
+        X, y = self._load_data()
+        z = self._load_labels(y)
 
-        steps_per_epoch = int(np.ceil(np.true_divide(dataset_size,
-                                                     self.batch_size)))
+        steps_per_epoch = self._get_steps_per_epoch(dataset_size)
         num_epochs = self.num_steps_per_iter // steps_per_epoch
 
-        self.model.fit(X, z, epochs=num_epochs, batch_size=self.batch_size,
+        self.logit.fit(X, z, epochs=num_epochs, batch_size=self.batch_size,
                        verbose=False)  # TODO(LT): Make this an argument
-        loss, accuracy = self.model.evaluate(X, z, verbose=False)
+        loss, accuracy = self.logit.evaluate(X, z, verbose=False)
 
         self.logger.info(f"[Model fit: loss={loss:.3f}, "
                          f"accuracy={accuracy:.3f}] "
@@ -177,36 +202,32 @@ def get_config(self, budget):
                          f"steps per epoch: {steps_per_epoch}, "
                          f"num steps per iter: {self.num_steps_per_iter}, "
                          f"num epochs: {num_epochs}")
-        self.logger.debug(X)
-        self.logger.debug(y)
 
         # Maximize acquisition function
-
-        # TODO(LT): The following three assignments can all be done at
-        #   initialization time
-        minimize = self.make_minimizer(num_restarts=self.num_restarts)
-        func = self.make_minimizee()
-        bounds = self.config_space.get_bounds()
-
         self.logger.debug("Beginning multi-start maximization with "
                           f"{self.num_restarts} starts...")
 
-        results = minimize(func, bounds, random_state=self.random_state)
+        results = self.minimizer(self.loss, self.bounds,
+                                 random_state=self.random_state)
 
         res_best = None
         for i, res in enumerate(results):
             self.logger.debug(f"[Maximum {i+1:02d}/{self.num_restarts:02d}: "
                               f"logit={-res.fun:.3f}] success: {res.success}, "
                               f"iterations: {res.nit:02d}, status: {res.status}"
-                              f" ({res.message.decode('utf-8')})")
+                              f" ({res.message})")
 
-            if res.success and not is_duplicate(res.x, self.config_arrs):
+            # TODO(LT): Create Enum type for these status codes
+            if (res.status == 0 or res.status == 9) and \
+                    not is_duplicate(res.x, self.config_arrs):
                 # if (res_best is not None) *implies* (res.fun < res_best.fun)
                 # (i.e. material implication) is logically equivalent to below
                 if res_best is None or res.fun < res_best.fun:
                     res_best = res
 
         if res_best is None:
+            # TODO(LT): It's actually important to report what one of these
+            # occurred...
             self.logger.warn("[Glob. maximum: not found!] Either optimization "
                              f"failed in all {self.num_restarts} starts, or "
                              "all maxima found have been evaluated previously!"
 
@@ -4,24 +4,27 @@
 from pathlib import Path
 
 
-def dataframe_from_result(result):
+def dataframe_from_result(results):
 
     rows = []
 
-    for task, config_id in enumerate(result.data):
+    for task, config_id in enumerate(results.data):
 
-        d = result.data[config_id]
+        d = results.data[config_id]
         bracket, _, _ = config_id
 
         for epoch in d.results:
 
-            rows.append(dict(task=task,
-                             bracket=bracket,
-                             epoch=int(epoch),
-                             error=d.results[epoch]["loss"],
-                             cost=d.results[epoch]["info"],
-                             submitted=d.time_stamps[epoch]["submitted"],
-                             runtime=d.time_stamps[epoch]["finished"]))
+            row = dict(task=task,
+                       bracket=bracket,
+                       epoch=int(epoch),
+                       loss=d.results[epoch]["loss"],
+                       info=d.results[epoch]["info"],
+                       submitted=d.time_stamps[epoch]["submitted"],
+                       started=d.time_stamps[epoch]["started"],
+                       finished=d.time_stamps[epoch]["finished"])
+            row.update(d.config)
+            rows.append(row)
 
     return pd.DataFrame(rows)
 
 
@@ -221,8 +221,9 @@ def main(benchmark_name, input_dir, methods, ci, context, style, palette,
         "random": "Random Search",
         "tpe": "TPE",
         "bore": "BORE",
-        "bore-0.15": r"BORE $\gamma=0.15$",
-        "bore-sigmoid": r"BORE (with sigmoid)"
+        "bore-elu-normalize-ftol": "BORE (elu)",
+        "bore-relu-normalize-ftol": "BORE (relu)",
+        "bore-relu-normalize-high-ftol": "BORE (relu high ftol)"
     }
 
     num_runs = 20
@@ -238,16 +239,23 @@ def main(benchmark_name, input_dir, methods, ci, context, style, palette,
         for run in range(num_runs):
 
             path = input_path.joinpath(benchmark_name, method, f"{run:03d}.csv")
-            frame = pd.read_csv(path, index_col=0).assign(run=run)
+            frame = pd.read_csv(path, index_col=0).assign(run=run, method=method)
 
-            best = frame.error.cummin()
-            elapsed = frame.cost.cumsum()
+            try:
+                loss = frame["loss"]
+                cost = frame["info"]
+            except KeyError:
+                loss = frame["error"]
+                cost = frame["cost"]
+
+            best = loss.cummin()
+            elapsed = cost.cumsum()
             frame = frame.assign(best=best, elapsed=elapsed)
 
             if error_min is not None:
-                regret = (error_min - frame.error).abs()
+                regret = (error_min - loss).abs()
                 regret_best = regret.cummin()
-                frame = frame.assign(regret=regret, regret_best=regret_best, method=method)
+                frame = frame.assign(regret=regret, regret_best=regret_best)
 
             frames.append(frame)
 
@@ -273,11 +281,12 @@ def main(benchmark_name, input_dir, methods, ci, context, style, palette,
                  hue="method", hue_order=hue_order,
                  style="method", style_order=style_order,
                  # units="run", estimator=None,
-                 # ci=ci,
+                 # ci=None,
                  err_kws=dict(edgecolor='none'),
                  data=data, ax=ax)
 
-    ax.set_xlabel("wall-clock time elapsed (s)")
+    # ax.set_xlabel("wall-clock time elapsed (s)")
+    ax.set_xlabel("iteration")
     ax.set_ylabel("incumbent regret")
 
     ax.set_xscale("log")