Add further details blog post.

GMvandeVen · GMvandeVen · commit d98e3d2d3be5 · 2025-03-17T14:32:07.000+01:00
diff --git a/ICLRblogpost/README.md b/ICLRblogpost/README.md
@@ -1,3 +1,52 @@
 # On the computation of the Fisher Information in continual learning (2025, ICLR Blogpost)
 
-... to be added ...
+The code in this repository is used for the experiments reported in the
+[ICLR 2025 blog post "On the computation of the Fisher Information in continual learning"](https://arxiv.org/abs/2502.11756).
+
+This blog post compares the performance of Elastic Weight Consolidation (EWC) with various different ways of computing the diagonal elements of the Fisher Information matrix.
+The following options are considered:
+- **EXACT**  
+The elements of the Fisher Information are computed exactly. All training samples are used.  
+To use this option: `./main.py --ewc --fisher-labels='all'`
+
+- **EXACT ($n$=500)**  
+The elements of the Fisher Information are computed exactly. Only 500 training samples are used.  
+To use this option: `./main.py --ewc --fisher-labels='all' --fisher-n=500`
+
+- **SAMPLE**  
+The elements of the Fisher Information are estimated using a single Monte Carlo sample. All training samples are used.  
+To use this option: `./main.py --ewc --fisher-labels='sample'`
+
+- **EMPIRICAL**  
+The empirical Fisher Information is used. All training samples are used.  
+To use this option: `./main.py --ewc --fisher-labels='true'`
+
+- **BATCHED ($b$=128)**  
+The empirical Fisher Information is approximated using mini-batches (see blog post for details).  
+To use this option: `./main.py --ewc --fisher-labels='true' --fisher-batch=128`
+
+
+To run the experiments from the blog post, the following lines of code can be used:
+
+```bash
+python3 ICLRblogpost/compare_FI.py --seed=1 --n-seeds=30 --experiment=splitMNIST --scenario=task
+python3 ICLRblogpost/compare_FI.py --seed=1 --n-seeds=30 --experiment=CIFAR10 --scenario=task --reducedResNet --iters=2000 --lr=0.001
+```
+
+
+### Citation
+If this is useful, please consider citing the blog post:
+```
+@inproceedings{vandeven2025fisher,
+  title={On the computation of the {F}isher {I}nformation in continual learning},
+  author={van de Ven, Gido M},
+  booktitle={ICLR Blogposts 2025},
+  year={2025},
+  date={April 28, 2025}
+}
+```
+
+
+### Acknowledgments
+This project has been supported by a senior postdoctoral fellowship from the
+Resarch Foundation -- Flanders (FWO) under grant number 1266823N.
diff --git a/ICLRblogpost/compare_FI.py b/ICLRblogpost/compare_FI.py
@@ -10,7 +10,6 @@
 from params.param_values import check_for_errors,set_default_values
 from params import options
 from visual import visual_plt as my_plt
-import torch
 
 
 ## Parameter-values to compare
@@ -33,8 +32,9 @@ def handle_inputs():
     parser.add_argument('--n-seeds', type=int, default=1, help='how often to repeat?')
     # Add options specific for EWC
     param_reg = parser.add_argument_group('Parameter Regularization')
-    param_reg.add_argument('--online', action='store_true', help='use Online EWC rather than Offline EWC')
-    param_reg.add_argument("--fisher-n-all", type=float, default=500, help="how many samples to approximate FI in 'ALL-n=X'")
+    param_reg.add_argument('--offline', action='store_true', help='use Offline EWC rather than Online EWC')
+    param_reg.add_argument("--fisher-n-all", type=float, default=500, metavar='N',
+                           help="how many samples to approximate FI in 'ALL-n=X'")
     # Parse, process (i.e., set defaults for unselected options) and check chosen options
     args = parser.parse_args()
     args.log_per_context = True
@@ -108,7 +108,6 @@ def collect_all(method_dict, seed_list, args, name=None):
     # -set EWC-specific arguments
     args.weight_penalty = True
     args.importance_weighting = 'fisher'
-    args.offline = False if args.online else True
 
     ## EWC, "sample"
     SAMPLE = {}
diff --git a/README.md b/README.md
@@ -48,7 +48,7 @@ see the folder [NeurIPS-tutorial](NeurIPStutorial).
 
 
 ## ICLR blog post "On the computation of the Fisher Information in continual learning"
-This code repository is also used for the
+This repository is also used for the
 [ICLR 2025 blog post "On the computation of the Fisher Information in continual learning"](https://arxiv.org/abs/2502.11756).
 For details and instructions on how to re-run the experiments reported in this blog post,
 see the folder [ICLR-blogpost](ICLRblogpost).
diff --git a/all_results.sh b/all_results.sh
@@ -5,8 +5,8 @@
 
 ########### ICLR 2025 Blogpost ###########
 
-python3 ICLRblogpost/compare_FI.py --seed=1 --n-seeds=30 --experiment=splitMNIST --scenario=task --online
-python3 ICLRblogpost/compare_FI.py --seed=1 --n-seeds=30 --experiment=CIFAR10 --scenario=task --contexts=5 --conv-type=resNet --fc-layers=1 --iters=2000 --reducing-layers=3 --depth=5 --global-pooling --channels=20 --lr=0.001 --online
+python3 ICLRblogpost/compare_FI.py --seed=1 --n-seeds=30 --experiment=splitMNIST --scenario=task
+python3 ICLRblogpost/compare_FI.py --seed=1 --n-seeds=30 --experiment=CIFAR10 --scenario=task --reducedResNet --iters=2000 --lr=0.001
 
 
 
diff --git a/params/options.py b/params/options.py
@@ -95,17 +95,20 @@ def add_problem_options(parser, pretrain=False, no_boundaries=False, **kwargs):
 
 def add_model_options(parser, pretrain=False, compare_replay=False, **kwargs):
     model = parser.add_argument_group('Parameters Main Model')
+    # 'Convenience-commands' that select the defaults for specific architectures
+    model.add_argument('--reducedResNet', action='store_true', help="select defaults for 'Reduced ResNet-18' (e.g., as in Hess et al, 2023)")
     # -convolutional layers
     model.add_argument('--conv-type', type=str, default="standard", choices=["standard", "resNet"])
     model.add_argument('--n-blocks', type=int, default=2, help="# blocks per conv-layer (only for 'resNet')")
     model.add_argument('--depth', type=int, default=None, help="# of convolutional layers (0 = only fc-layers)")
-    model.add_argument('--reducing-layers', type=int, dest='rl', help="# of layers with stride (=image-size halved)")
-    model.add_argument('--channels', type=int, default=16, help="# of channels 1st conv-layer (doubled every 'rl')")
+    model.add_argument('--reducing-layers', type=int, dest='rl', default=None,
+                       help="# of layers with stride (=image-size halved)")
+    model.add_argument('--channels', type=int, default=None, help="# of channels 1st conv-layer (doubled every 'rl')")
     model.add_argument('--conv-bn', type=str, default="yes", help="use batch-norm in the conv-layers (yes|no)")
     model.add_argument('--conv-nl', type=str, default="relu", choices=["relu", "leakyrelu"])
     model.add_argument('--global-pooling', action='store_true', dest='gp', help="ave global pool after conv-layers")
     # -fully connected layers
-    model.add_argument('--fc-layers', type=int, default=3, dest='fc_lay', help="# of fully-connected layers")
+    model.add_argument('--fc-layers', type=int, default=None, dest='fc_lay', help="# of fully-connected layers")
     model.add_argument('--fc-units', type=int, metavar="N", help="# of units in hidden fc-layers")
     model.add_argument('--fc-drop', type=float, default=0., help="dropout probability for fc-units")
     model.add_argument('--fc-bn', type=str, default="no", help="use batch-norm in the fc-layers (no|yes)")
diff --git a/params/param_values.py b/params/param_values.py
@@ -49,7 +49,14 @@ def set_method_options(args, **kwargs):
 def set_default_values(args, also_hyper_params=True, single_context=False, no_boundaries=False):
     # -set default-values for certain arguments based on chosen experiment
     args.normalize = args.normalize if args.experiment in ('CIFAR10', 'CIFAR100') else False
-    args.depth = (5 if args.experiment in ('CIFAR10', 'CIFAR100') else 0) if args.depth is None else args.depth
+    args.depth = (
+            5 if (args.experiment in ('CIFAR10', 'CIFAR100')) or checkattr(args, 'reducedResNet') else 0
+        ) if args.depth is None else args.depth
+    args.fc_lay = (1 if checkattr(args, 'reducedResNet') else 3) if args.fc_lay is None else args.fc_lay
+    args.channels = (20 if checkattr(args, 'reducedResNet') else 16) if args.channels is None else args.channels
+    args.rl = 3 if checkattr(args, 'reducedResNet') and (args.rl is None) else args.rl
+    args.gp = True if checkattr(args, 'reducedResNet') else args.gp
+    args.conv_type = 'resNet' if checkattr(args, 'reducedResNet') else args.conv_type
     if not single_context:
         args.contexts = (
             5 if args.experiment in ('splitMNIST', 'CIFAR10') else 10