adding an EarlyStopper class for managing that functionality

Jordan Stomps · Jordan Stomps · commit 32b80767876c · 2022-10-07T17:58:14.000-04:00
diff --git a/models/SSML/ShadowCNN.py b/models/SSML/ShadowCNN.py
@@ -13,7 +13,7 @@
 import shadow.utils
 from shadow.utils import set_seed
 # diagnostics
-from scripts.utils import run_hyperopt
+from scripts.utils import EarlyStopper, run_hyperopt
 import joblib
 
 
@@ -322,6 +322,9 @@ def train(self, trainx, trainy, Ux, testx=None, testy=None):
         # labels for unlabeled data are always "-1"
         xEnt = torch.nn.CrossEntropyLoss(ignore_index=-1)
 
+        # generate early-stopping watchdog
+        # TODO: allow a user of ShadowCNN to specify EarlyStopper's params
+        stopper = EarlyStopper(patience=3, min_delta=0)
         n_epochs = 100
         self.eaat.to(self.device)
         losscurve = []
@@ -345,6 +348,20 @@ def train(self, trainx, trainy, Ux, testx=None, testy=None):
                 pred, acc = self.predict(testx, testy)
                 evalcurve.append(acc)
 
+                self.eaat.train()
+                # test for early stopping
+                x_val = torch.FloatTensor(
+                                    testx.copy()[:, ::self.params['binning']])
+                x_val = x_val.reshape((x_val.shape[0],
+                                       1,
+                                       x_val.shape[1])).to(self.device)
+                y_val = torch.LongTensor(testy).to(self.device)
+                out = self.eaat(x_val)
+                val_loss = xEnt(out, y_val) + \
+                    self.eaat.get_technique_cost(x_val)
+                if stopper.early_stop(val_loss):
+                    break
+
         # optionally return the training accuracy if test data was provided
         return losscurve, evalcurve
 
diff --git a/models/SSML/ShadowNN.py b/models/SSML/ShadowNN.py
@@ -9,7 +9,7 @@
 import shadow.utils
 from shadow.utils import set_seed
 # diagnostics
-from scripts.utils import run_hyperopt
+from scripts.utils import EarlyStopper, run_hyperopt
 import joblib
 
 
@@ -199,12 +199,15 @@ def train(self, trainx, trainy, Ux, testx=None, testy=None):
         n_epochs = 100
         xt = torch.Tensor(xtens).to(self.device)
         yt = torch.LongTensor(ytens).to(self.device)
+        # generate early-stopping watchdog
+        # TODO: allow a user of ShadowCNN to specify EarlyStopper's params
+        stopper = EarlyStopper(patience=3, min_delta=0)
         # saves history for max accuracy
         acc_history = []
-        # set the model into training mode
-        # NOTE: change this to .eval() mode for testing and back again
-        self.eaat.train()
         for epoch in range(n_epochs):
+            # set the model into training mode
+            # NOTE: change this to .eval() mode for testing and back again
+            self.eaat.train()
             # Forward/backward pass for training semi-supervised model
             out = self.eaat(xt)
             # supervised + unsupervised loss
@@ -214,20 +217,26 @@ def train(self, trainx, trainy, Ux, testx=None, testy=None):
             self.eaat_opt.step()
 
             if testx is not None and testy is not None:
+                x_val = torch.FloatTensor(
+                            testx.copy()
+                        )[:, ::self.params['binning']].to(self.device)
+                y_val = torch.LongTensor(testy.copy()).to(self.device)
+
                 self.eaat.eval()
-                eaat_pred = torch.max(self.eaat(
-                                        torch.FloatTensor(
-                                            testx.copy()[:,
-                                                         ::self.params[
-                                                            'binning']
-                                                         ]
-                                            )
-                                        ), 1)[-1]
+                eaat_pred = torch.max(self.eaat(x_val), 1)[-1]
                 acc = shadow.losses.accuracy(eaat_pred,
-                                             torch.LongTensor(testy.copy())
+                                             y_val
                                              ).data.item()
                 acc_history.append(acc)
 
+                self.eaat.train()
+                # test for early stopping
+                out = self.eaat(x_val)
+                val_loss = self.xEnt(out, y_val) + \
+                    self.eaat.get_technique_cost(x_val)
+                if stopper.early_stop(val_loss):
+                    break
+
         # optionally return the training accuracy if test data was provided
         return acc_history
 
@@ -245,15 +254,18 @@ def predict(self, testx, testy=None):
         eaat_pred = torch.max(self.eaat(
                                 torch.FloatTensor(
                                     testx.copy()[:, ::self.params['binning']]
-                                    )
+                                    ).to(self.device)
                                 ), 1)[-1]
 
         acc = None
         if testy is not None:
             acc = shadow.losses.accuracy(eaat_pred,
-                                         torch.LongTensor(testy.copy())
+                                         torch.LongTensor(
+                                            testy.copy()).to(self.device)
                                          ).data.item()
 
+        # return tensor to cpu if on gpu and convert to numpy for return
+        eaat_pred = eaat_pred.cpu().numpy()
         return eaat_pred, acc
 
     def save(self, filename):
diff --git a/scripts/utils.py b/scripts/utils.py
@@ -11,6 +11,47 @@
 from sklearn.decomposition import PCA
 
 
+class EarlyStopper:
+    '''
+    Early stopping mechanism for neural networks.
+    Code adapted from user "isle_of_gods" from StackOverflow:
+    https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
+    Use this class to break a training loop if the validation loss is low.
+    Inputs:
+    patience: integer; forces stop if validation loss has not improved
+        for some time
+    min_delta: "fudge value" for how much loss to tolerate before stopping
+    '''
+
+    def __init__(self, patience=1, min_delta=0):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.min_validation_loss = np.inf
+
+    def early_stop(self, validation_loss):
+        '''
+        Tests for the early stopping condition if the validation loss
+        has not improved for a certain period of time (patience).
+        Inputs:
+        validation_loss: typically a float value for the loss function of
+            a neural network training loop
+        '''
+
+        if validation_loss < self.min_validation_loss:
+            # keep track of the smallest validation loss
+            # if it has been beaten, restart patience
+            self.min_validation_loss = validation_loss
+            self.counter = 0
+        elif validation_loss > (self.min_validation_loss + self.min_delta):
+            # keep track of whether validation loss has been decreasing
+            # by a tolerable amount
+            self.counter += 1
+            if self.counter >= self.patience:
+                return True
+        return False
+
+
 def run_hyperopt(space, model, data_dict, max_evals=50, verbose=True):
     '''
     Runs hyperparameter optimization on a model given a parameter space.
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -337,8 +337,7 @@ def test_ShadowNN():
     # rather than decimals
     # uninteresting test if Shadow predicts all one class
     # TODO: make the default params test meaningful
-    # NOTE: .numpy() needed because model.predict() returns a tensor
-    assert np.count_nonzero(pred.numpy() == y_test) > 0
+    assert np.count_nonzero(pred == y_test) > 0
 
     # testing hyperopt optimize methods
     space = {'hidden_layer': 10,