11import numpy as np
22import tensorflow as tf
33
4- # from tensorflow.keras.initializers import GlorotUniform
5- from tensorflow .keras .regularizers import l2
6-
74from scipy .optimize import minimize
85
6+ from tensorflow .keras .losses import BinaryCrossentropy
7+ # from tensorflow.keras.initializers import GlorotUniform
8+
99from .types import DenseConfigurationSpace , DenseConfiguration
1010from .models import DenseSequential
11- from .losses import binary_crossentropy_from_logits
1211from .decorators import unbatch , value_and_gradient , numpy_io
1312from .optimizers import multi_start
1413
@@ -31,7 +30,8 @@ def __init__(self, config_space, eta=3, min_budget=0.01, max_budget=1,
3130 gamma = None , num_random_init = 10 , random_rate = 0.25 ,
3231 num_restarts = 10 , batch_size = 64 , num_steps_per_iter = 1000 ,
3332 optimizer = "adam" , num_layers = 2 , num_units = 32 ,
34- activation = "relu" , seed = None , ** kwargs ):
33+ activation = "relu" , normalize = True , method = "L-BFGS-B" ,
34+ max_iter = 100 , ftol = 1e-2 , seed = None , ** kwargs ):
3535
3636 if gamma is None :
3737 gamma = 1 / eta
@@ -41,7 +41,9 @@ def __init__(self, config_space, eta=3, min_budget=0.01, max_budget=1,
4141 random_rate = random_rate , num_restarts = num_restarts ,
4242 batch_size = batch_size , num_steps_per_iter = num_steps_per_iter ,
4343 optimizer = optimizer , num_layers = num_layers ,
44- num_units = num_units , activation = activation , seed = seed )
44+ num_units = num_units , activation = activation ,
45+ normalize = normalize , method = method , max_iter = max_iter ,
46+ ftol = ftol , seed = seed )
4547 # (LT): Note this is using the *grandparent* class initializer to
4648 # replace the config_generator!
4749 super (HyperBand , self ).__init__ (config_generator = cg , ** kwargs )
@@ -76,20 +78,32 @@ class DRE(base_config_generator):
7678 class to implement random sampling from a ConfigSpace
7779 """
7880 def __init__ (self , config_space , gamma = 1 / 3 , num_random_init = 10 ,
79- random_rate = 0.25 , num_restarts = 10 , batch_size = 64 ,
81+ random_rate = 0.25 , num_restarts = 3 , batch_size = 64 ,
8082 num_steps_per_iter = 1000 , optimizer = "adam" , num_layers = 2 ,
81- num_units = 32 , activation = "relu" , seed = None , ** kwargs ):
83+ num_units = 32 , activation = "relu" , normalize = True ,
84+ method = "L-BFGS-B" , max_iter = 100 , ftol = 1e-2 , seed = None ,
85+ ** kwargs ):
8286
8387 super (DRE , self ).__init__ (** kwargs )
8488
89+ assert 0. <= gamma < 1. , "`gamma` must be in [0, 1)"
90+ assert 0. <= random_rate < 1. , "`random_rate` must be in [0, 1)"
91+ assert num_random_init > 0
92+ assert num_restarts > 0
93+
8594 self .config_space = DenseConfigurationSpace (config_space , seed = seed )
95+ self .bounds = self .config_space .get_bounds ()
96+
97+ self .logit = self ._build_compile_network (num_layers , num_units ,
98+ activation , optimizer )
99+ self .loss = self ._build_loss (self .logit , normalize = normalize )
100+ self .minimizer = self ._build_minimizer (num_restarts = num_restarts ,
101+ method = method , ftol = ftol ,
102+ max_iter = max_iter )
86103
87104 self .gamma = gamma
88105 self .num_random_init = num_random_init
89-
90- assert 0. <= random_rate <= 1. , "random rate must be in [0, 1]"
91106 self .random_rate = random_rate
92-
93107 self .num_restarts = num_restarts
94108
95109 self .batch_size = batch_size
@@ -98,44 +112,60 @@ def __init__(self, config_space, gamma=1/3, num_random_init=10,
98112 self .config_arrs = []
99113 self .losses = []
100114
101- l2_factor = 1e-4
102-
103- self ._init_model (num_layers , num_units , activation , optimizer , l2_factor )
104-
105115 self .seed = seed
106116 self .random_state = np .random .RandomState (seed )
107117
108- def _init_model (self , num_layers , num_units , activation , optimizer , l2_factor ):
118+ @staticmethod
119+ def _build_compile_network (num_layers , num_units , activation , optimizer ):
120+
121+ network = DenseSequential (output_dim = 1 ,
122+ num_layers = num_layers ,
123+ num_units = num_units ,
124+ layer_kws = dict (activation = activation ))
125+ network .compile (optimizer = optimizer , metrics = ["accuracy" ],
126+ loss = BinaryCrossentropy (from_logits = True ))
127+ return network
128+
129+ @staticmethod
130+ def _build_loss (logit , normalize = True ):
131+
132+ if normalize :
133+ activation = tf .sigmoid
134+ else :
135+ activation = tf .identity
136+
137+ @numpy_io
138+ @value_and_gradient
139+ @unbatch
140+ def loss (x ):
141+ return - activation (logit (x ))
109142
110- self .model = DenseSequential (output_dim = 1 ,
111- num_layers = num_layers ,
112- num_units = num_units ,
113- layer_kws = dict (activation = activation ,
114- kernel_regularizer = l2 (l2_factor ))) # TODO(LT): make this an argument
115- self .model .compile (optimizer = optimizer , metrics = ["accuracy" ],
116- loss = binary_crossentropy_from_logits )
143+ return loss
117144
118145 @staticmethod
119- def make_minimizer (num_restarts , method = "L-BFGS-B" , max_iter = 10000 ,
120- tol = 1e-8 ):
146+ def _build_minimizer (num_restarts , method = "L-BFGS-B" , max_iter = 100 ,
147+ ftol = 1e-2 ):
121148
122149 @multi_start (num_restarts = num_restarts )
123150 def multi_start_minimizer (fn , x0 , bounds ):
124151 return minimize (fn , x0 = x0 , method = method , jac = True , bounds = bounds ,
125- tol = tol , options = dict (maxiter = max_iter ))
152+ options = dict (maxiter = max_iter , ftol = ftol ))
126153
127154 return multi_start_minimizer
128155
129- def make_minimizee (self ):
130-
131- @numpy_io
132- @value_and_gradient
133- @unbatch
134- def func (x ):
156+ def _load_data (self ):
157+ X = np .vstack (self .config_arrs )
158+ y = np .hstack (self .losses )
159+ return X , y
135160
136- return - tf .sigmoid (self .model (x ))
161+ def _load_labels (self , y ):
162+ tau = np .quantile (y , q = self .gamma )
163+ return np .less (y , tau )
137164
138- return func
165+ def _get_steps_per_epoch (self , dataset_size ):
166+ steps_per_epoch = int (np .ceil (np .true_divide (dataset_size ,
167+ self .batch_size )))
168+ return steps_per_epoch
139169
140170 def get_config (self , budget ):
141171
@@ -155,20 +185,15 @@ def get_config(self, budget):
155185 "Returning random candidate ..." )
156186 return (config_random_dict , {})
157187
158- # Model fitting
159- X = np .vstack (self .config_arrs )
160- y = np .hstack (self .losses )
161-
162- y_threshold = np .quantile (y , q = self .gamma )
163- z = np .less_equal (y , y_threshold )
188+ X , y = self ._load_data ()
189+ z = self ._load_labels (y )
164190
165- steps_per_epoch = int (np .ceil (np .true_divide (dataset_size ,
166- self .batch_size )))
191+ steps_per_epoch = self ._get_steps_per_epoch (dataset_size )
167192 num_epochs = self .num_steps_per_iter // steps_per_epoch
168193
169- self .model .fit (X , z , epochs = num_epochs , batch_size = self .batch_size ,
194+ self .logit .fit (X , z , epochs = num_epochs , batch_size = self .batch_size ,
170195 verbose = False ) # TODO(LT): Make this an argument
171- loss , accuracy = self .model .evaluate (X , z , verbose = False )
196+ loss , accuracy = self .logit .evaluate (X , z , verbose = False )
172197
173198 self .logger .info (f"[Model fit: loss={ loss :.3f} , "
174199 f"accuracy={ accuracy :.3f} ] "
@@ -177,36 +202,32 @@ def get_config(self, budget):
177202 f"steps per epoch: { steps_per_epoch } , "
178203 f"num steps per iter: { self .num_steps_per_iter } , "
179204 f"num epochs: { num_epochs } " )
180- self .logger .debug (X )
181- self .logger .debug (y )
182205
183206 # Maximize acquisition function
184-
185- # TODO(LT): The following three assignments can all be done at
186- # initialization time
187- minimize = self .make_minimizer (num_restarts = self .num_restarts )
188- func = self .make_minimizee ()
189- bounds = self .config_space .get_bounds ()
190-
191207 self .logger .debug ("Beginning multi-start maximization with "
192208 f"{ self .num_restarts } starts..." )
193209
194- results = minimize (func , bounds , random_state = self .random_state )
210+ results = self .minimizer (self .loss , self .bounds ,
211+ random_state = self .random_state )
195212
196213 res_best = None
197214 for i , res in enumerate (results ):
198215 self .logger .debug (f"[Maximum { i + 1 :02d} /{ self .num_restarts :02d} : "
199216 f"logit={ - res .fun :.3f} ] success: { res .success } , "
200217 f"iterations: { res .nit :02d} , status: { res .status } "
201- f" ({ res .message . decode ( 'utf-8' ) } )" )
218+ f" ({ res .message } )" )
202219
203- if res .success and not is_duplicate (res .x , self .config_arrs ):
220+ # TODO(LT): Create Enum type for these status codes
221+ if (res .status == 0 or res .status == 9 ) and \
222+ not is_duplicate (res .x , self .config_arrs ):
204223 # if (res_best is not None) *implies* (res.fun < res_best.fun)
205224 # (i.e. material implication) is logically equivalent to below
206225 if res_best is None or res .fun < res_best .fun :
207226 res_best = res
208227
209228 if res_best is None :
229+ # TODO(LT): It's actually important to report what one of these
230+ # occurred...
210231 self .logger .warn ("[Glob. maximum: not found!] Either optimization "
211232 f"failed in all { self .num_restarts } starts, or "
212233 "all maxima found have been evaluated previously!"
0 commit comments