simplify user interface to compare

bjarthur · bjarthur · commit a6125459a8e4 · 2024-07-26T13:11:44.000-04:00
diff --git a/README.md b/README.md
@@ -1247,33 +1247,36 @@ samples are drawn from the ground-truth data set during training.  A value of
 -1 results in a different order for each fold and run;  any other number
 results in a unique order specific to that number across folds and runs.
 
-To perform a simple grid search for the optimal value for a particular
+To perform a simple grid search for the optimal value of a particular
 hyperparameter, first choose how many folds you want to partition your
-ground-truth data into using `k-fold`.  Then set the hyperparameter of interest
-to the first value you want to try and choose a name for the `Logs Folder` such
-that its prefix will be shared across all of the hyperparameter values you plan
-to validate.  Suffix any additional hyperparameters of interest using
-underscores.  (For example, to search mini-batch and keep track of kernel size
-and feature maps, use "mb-64_ks129_fm64".)  If your model is small, use
-`models_per_job` in "configuration.py" to train multiple folds on a GPU.
-Click the `X-Validate` button and then `DoIt!`.  One classifier will be trained
-for each fold, using it as the validation set and the remaining folds for
-training.  Separate files and subdirectories are created in the `Logs Folder`
-that are suffixed by the fold number and the letter "k".  Plot overlayed
-training curves with the `Accuracy` button, as before.  Repeat the above
-procedure for each of remaining hyperparameter values you want to try (e.g.
-"mb-128_ks129_fm64", "mb-256_ks129_fm64", etc.).  Then use the `Compare` button
-to create a figure of the cross-validation data over the hyperparameter values,
-specifying the prefix that the logs folders have in common ("mb" in this case).
-Output are three files:
-
-* "[suffix]-compare-confusion-matrices.pdf" contains the summed confusion matrix
+ground-truth data into using `k-fold`.  More folds permit characterizing the
+variance better, but take longer to train and also result in fewer annotations
+to measure the accuracy.  Ensure that you have at least 10 annotations for each
+label in the validation set if using many folds.  Then set the hyperparameter
+of interest to the first value you want to optimize and use the name of the
+hyperparameter and it's value as the `Logs Folder` (e.g. "mb64" for a
+mini-batch size of 64).  Suffix any additional hyperparameters of interest
+using underscores (e.g. "mb64_ks129_fm64" for a kernel size of 129 and 64
+feature maps).  If your model is small, use `models_per_job` in
+"configuration.py" to train multiple folds on a GPU.  Click the `X-Validate`
+button and then `DoIt!`.  One classifier will be trained for each fold, using
+it as the validation set and the remaining folds for training.  Separate files
+and subdirectories are created in the `Logs Folder` that are suffixed by the
+fold number and the letter "k".  Plot training curves with the `Accuracy`
+button, as before.  Repeat the above procedure for each of remaining
+hyperparameter values you want to try (e.g. "mb128_ks129_fm64",
+"mb256_ks129_fm64", etc.).  Then use the `Compare` button to create a figure of
+the cross-validation data over the hyperparameter values, specifying for the
+`Logs Folder` the independent variable (e.g. "mb") suffixed with the fixed
+hyperparameters of interest (e.g. "mb_ks128_fm64").  Output are three files:
+
+* "[prefix]-compare-confusion-matrices.pdf" contains the summed confusion matrix
 for each of the values tested.
 
-* "[suffix]-compare-overall-params-speed.pdf" plots the accuracy, number of
+* "[prefix]-compare-overall-params-speed.pdf" plots the accuracy, number of
 trainable parameters, and training time for each model.
 
-* "[suffix]-compare-precision-recall.pdf" shows the final error rates for each
+* "[prefix]-compare-precision-recall.pdf" shows the final error rates for each
 model and wanted word.
 
 Training multiple models like this with the same hyperparameters is not
diff --git a/src/compare b/src/compare
@@ -3,7 +3,7 @@
 # plot accuracy across hyperparameter values
  
 # e.g. compare <logdirs-prefix>
-#     --logdirs_prefix=trained- \
+#     --logdirs_filter=trained- \
 #     --loss=exclusive \
 #     --overlapped_prefix=not_
 
@@ -33,8 +33,9 @@ def main():
     for key in sorted(flags.keys()):
         print('%s = %s' % (key, flags[key]))
 
-    logdirs_prefix = FLAGS.logdirs_prefix
-    basename, dirname = os.path.split(logdirs_prefix)
+    logdirs_filter = FLAGS.logdirs_filter
+    logdirs_dirname , logdirs_basename = os.path.split(logdirs_filter)
+    indepvar, *filters = logdirs_basename.split('_')
 
     same_time=False
     outlier_criteria=50
@@ -51,20 +52,24 @@ def main():
     nlayers={}
     hyperparameters={}
 
-    logdirs = list(filter(lambda x: x.startswith(dirname+'-') and \
-                          os.path.isdir(os.path.join(basename,x)), os.listdir(basename)))
+    def filter_logdirs(logdir):
+        params = logdir.split('_')
+        return all([f in params for f in filters]) and \
+               any([p.startswith(indepvar) for p in params])
+
+    logdirs = list(filter(filter_logdirs, os.listdir(logdirs_dirname)))
 
     for logdir in logdirs:
         print(logdir)
-        hyperparameters[logdir] = set(logdir.split('-')[-1].split('_'))
+        hyperparameters[logdir] = set(logdir.split('_'))
         _, _, train_time[logdir], _, \
                 _, _, validation_precision[logdir], validation_recall[logdir], \
                 validation_time[logdir], validation_step[logdir], \
                 _, _, _, _, \
                 labels_touse[logdir], _, \
                 nparameters_total[logdir], nparameters_finallayer[logdir], \
                 batch_size[logdir], nlayers[logdir] = \
-                read_logs(os.path.join(basename,logdir))
+                read_logs(os.path.join(logdirs_dirname,logdir))
         if len(set([tuple(x) for x in labels_touse[logdir].values()]))>1:
             print('WARNING: not all labels_touse are the same')
         if len(set(nparameters_total[logdir].values()))>1:
@@ -89,15 +94,20 @@ def main():
 
     commonparameters = reduce(lambda x,y: x&y, hyperparameters.values())
     differentparameters = {x:','.join(natsorted(list(hyperparameters[x]-commonparameters))) \
-                           for x in natsorted(logdirs)}
+                           for x in logdirs}
+
 
+    def sortby_indepvar(logdir):
+        params = logdir.split('_')
+        iindepvar = next(i for i,x in enumerate(params) if x.startswith(indepvar))
+        return str(params[iindepvar]) + str(params[:iindepvar]) + str(params[iindepvar+1:])
 
     fig = plt.figure(figsize=(8,10*2/3))
 
     ax = fig.add_subplot(2,2,1)
 
     precisions_mean, recalls_mean = [], []
-    for (ilogdir,logdir) in enumerate(natsorted(logdirs)):
+    for (ilogdir,logdir) in enumerate(natsorted(logdirs, key=sortby_indepvar)):
         color = cm.viridis(ilogdir/max(1,len(validation_recall)-1))
         precisions_all, recalls_all = [], []
         for model in validation_recall[logdir].keys():
@@ -111,7 +121,7 @@ def main():
 
     ax = fig.add_subplot(2,2,2)
     bottom=100
-    for (iexpt,expt) in enumerate(natsorted(validation_recall.keys())):
+    for (iexpt,expt) in enumerate(natsorted(validation_recall.keys(), key=sortby_indepvar)):
         color = cm.viridis(iexpt/max(1,len(validation_recall)-1))
         validation_recall_average = np.zeros(len(next(iter(validation_recall[expt].values()))))
         for model in validation_time[expt].keys():
@@ -127,45 +137,45 @@ def main():
     ax.set_ylim(bottom=bottom-5, top=100)
     ax.set_xlabel('Training time (min)')
     ax.set_ylabel('Overall validation recall')
-    ax.legend(loc='lower right', title=dirname, ncol=2 if "Annotations" in dirname else 1)
+    ax.legend(loc='lower right', ncol=2 if "Annotations" in logdirs_basename else 1)
 
     ax = fig.add_subplot(2,2,3)
-    ldata = natsorted(nparameters_total.keys())
+    ldata = natsorted(nparameters_total.keys(), key=sortby_indepvar)
     xdata = range(len(ldata))
     ydata = [next(iter(nparameters_total[x].values())) - \
              next(iter(nparameters_finallayer[x].values())) for x in ldata]
     ydata2 = [next(iter(nparameters_finallayer[x].values())) for x in ldata]
     bar1 = ax.bar(xdata,ydata,color='k')
     bar2 = ax.bar(xdata,ydata2,bottom=ydata,color='gray')
     ax.legend((bar2,bar1), ('last','rest'))
-    ax.set_xlabel(dirname)
+    ax.set_xlabel(logdirs_basename)
     ax.set_ylabel('Trainable parameters')
     ax.set_xticks(xdata)
     ax.set_xticklabels([differentparameters[x] for x in ldata], rotation=40, ha='right')
 
     ax = fig.add_subplot(2,2,4)
-    data = {k:list([np.median(np.diff(x)) for x in train_time[k].values()]) for k in train_time}
+    data = {k:list([np.median(np.diff(x)) for x in train_time[k].values()])
+            for k in sorted(train_time.keys(), key=sortby_indepvar)}
     ldata = jitter_plot(ax, data)
     ax.set_ylabel('time / step (ms)')
-    ax.set_xlabel(dirname)
+    ax.set_xlabel(logdirs_basename)
     ax.set_xticks(range(len(ldata)))
     ax.set_xticklabels([differentparameters[x] for x in ldata], rotation=40, ha='right')
 
-    fig.suptitle(','.join(list(commonparameters)))
-
-    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
-    plt.savefig(logdirs_prefix+'-compare-overall-params-speed.pdf')
+    fig.suptitle(','.join(list(commonparameters)), fontsize='xx-large')
+    fig.tight_layout(rect=[0, 0.03, 1, 0.97])
+    plt.savefig(logdirs_filter+'-compare-overall-params-speed.pdf')
     plt.close()
 
 
     recall_confusion_matrices={}
     precision_confusion_matrices={}
     labels=None
 
-    for ilogdir,logdir in enumerate(natsorted(logdirs)):
+    for ilogdir,logdir in enumerate(natsorted(logdirs, key=sortby_indepvar)):
         kind = next(iter(validation_time[logdir].keys())).split('_')[0]
         confusion_matrices, theselabels = \
-                parse_confusion_matrices(os.path.join(basename,logdir), kind, \
+                parse_confusion_matrices(os.path.join(logdirs_dirname,logdir), kind, \
                                          idx_time=idx_time[logdir] if same_time else None)
 
         recall_confusion_matrices[logdir]={}
@@ -215,7 +225,7 @@ def main():
                               summed2_confusion_matrix,
                               precision_summed_matrix, recall_summed_matrix,
                               len(labels)<10,
-                              logdir+"\n",
+                              differentparameters[logdir]+"\n",
                               labels if FLAGS.loss=='exclusive' else
                                   ["song", FLAGS.overlapped_prefix+"song"],
                               precision_summed, recall_summed)
@@ -229,12 +239,13 @@ def main():
                                       summed_confusion_matrix[ilabel], \
                                       precision_summed_matrix, recall_summed_matrix, \
                                       len(labels)<10,
-                                      logdir+"\n",
+                                      differentparameters[logdir]+"\n",
                                       [labels[ilabel], FLAGS.overlapped_prefix+labels[ilabel]],
                                       precision_summed, recall_summed)
 
-    fig.tight_layout()
-    plt.savefig(logdirs_prefix+'-compare-confusion-matrices.pdf')
+    fig.suptitle(','.join(list(commonparameters)), fontsize='xx-large')
+    fig.tight_layout(rect=[0, 0.03, 1, 0.97])
+    plt.savefig(logdirs_filter+'-compare-confusion-matrices.pdf')
     plt.close()
 
 
@@ -245,7 +256,7 @@ def main():
     for (ilabel,label) in enumerate(labels):
         ax = fig.add_subplot(nrows, ncols, ilabel+1)
         precisions_mean, recalls_mean = [], []
-        for (ilogdir,logdir) in enumerate(natsorted(logdirs)):
+        for (ilogdir,logdir) in enumerate(natsorted(logdirs, key=sortby_indepvar)):
             color = cm.viridis(ilogdir/max(1,len(validation_recall)-1))
             precisions_all, recalls_all = [], []
             for (imodel,model) in enumerate(recall_confusion_matrices[logdir].keys()):
@@ -261,14 +272,15 @@ def main():
                     'o', markeredgecolor='k', color=color)
         label_precisions_recall(ax, recalls_mean, precisions_mean, label+"\n")
 
-    fig.tight_layout()
-    plt.savefig(logdirs_prefix+'-compare-PR-classes.pdf')
+    fig.suptitle(','.join(list(commonparameters)), fontsize='xx-large')
+    fig.tight_layout(rect=[0, 0.03, 1, 0.97])
+    plt.savefig(logdirs_filter+'-compare-PR-classes.pdf')
     plt.close()
   
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--logdirs_prefix',
+        '--logdirs_filter',
         type=str,
         default='/tmp/speech_commands_train',
         help='Common prefix of the directories of logs and checkpoints')
diff --git a/src/gui/controller.py b/src/gui/controller.py
@@ -1962,7 +1962,7 @@ def compare_succeeded(logdirprefix, reftime):
 async def compare_actuate():
     currtime = time.time()
     logfile = V.logs_folder.value+'-compare.log'
-    args = ["--logdirs_prefix="+V.logs_folder.value, \
+    args = ["--logdirs_filter="+V.logs_folder.value, \
             "--loss="+V.loss.value, \
             "--overlapped_prefix="+M.overlapped_prefix]
     jobid = generic_actuate("compare", logfile,
diff --git a/src/jitter.py b/src/jitter.py
@@ -22,7 +22,7 @@ def jitter_plot(ax, data, orientation='vertical', reverse=False, \
     if len(d)<3:
       continue
     y = np.nanmean(d)
-    h = np.nanstdev(d)
+    h = np.nanstd(d)
     if orientation=='vertical':
       aveboxes.append(Rectangle((x-0.25,y-h),0.5,2*h))
       ax.plot([x-0.25,x+0.25],[y,y],'w-')