Add new benchmark (#115)

ebennequin · web-flow · commit 0fccd75dc80c · 2023-09-25T10:10:37.000+02:00
diff --git a/Makefile b/Makefile
@@ -67,15 +67,7 @@ extract-all-features-with-resnet12:
 
 benchmark-mini-imagenet:
 	for n_shot in 1 5; do \
-		for method in bd_cspn prototypical_networks simple_shot ; do \
-			python -m scripts.benchmark_methods \
-				$${method} \
-				data/features/mini_imagenet/test/feat_resnet12_mini_imagenet.parquet.gzip \
-				--n-shot=$${n_shot} \
-				--device=${DEVICE} \
-				--num-workers=${NUM_WORKERS}; \
-		done; \
-		for method in tim; do \
+		for method in bd_cspn prototypical_networks simple_shot tim finetune laplacian_shot pt_map transductive_finetuning; do \
 			python -m scripts.benchmark_methods \
 				$${method} \
 				data/features/mini_imagenet/test/feat_resnet12_mini_imagenet.parquet.gzip \
@@ -84,15 +76,6 @@ benchmark-mini-imagenet:
 				--device=${DEVICE} \
 				--num-workers=${NUM_WORKERS}; \
 		done; \
-		for method in finetune laplacian_shot; do \
-			python -m scripts.benchmark_methods \
-				$${method} \
-				data/features/mini_imagenet/test/feat_resnet12_mini_imagenet.parquet.gzip \
-				--config=$${n_shot}_shot \
-				--n-shot=$${n_shot} \
-				--device=${DEVICE} \
-				--num-workers=${NUM_WORKERS}; \
-		done; \
 		python -m scripts.benchmark_methods \
 			feat \
 			data/features/mini_imagenet/test/feat_resnet12_mini_imagenet.parquet.gzip \
@@ -104,15 +87,7 @@ benchmark-mini-imagenet:
 
 benchmark-tiered-imagenet:
 	for n_shot in 1 5; do \
-		for method in bd_cspn prototypical_networks simple_shot ; do \
-			python -m scripts.benchmark_methods \
-				$${method} \
-				data/features/tiered_imagenet/test/feat_resnet12_tiered_imagenet.parquet.gzip \
-				--n-shot=$${n_shot} \
-				--device=${DEVICE} \
-				--num-workers=${NUM_WORKERS}; \
-		done; \
-		for method in tim; do \
+		for method in bd_cspn prototypical_networks simple_shot tim finetune laplacian_shot pt_map transductive_finetuning; do \
 			python -m scripts.benchmark_methods \
 				$${method} \
 				data/features/tiered_imagenet/test/feat_resnet12_tiered_imagenet.parquet.gzip \
@@ -121,15 +96,6 @@ benchmark-tiered-imagenet:
 				--device=${DEVICE} \
 				--num-workers=${NUM_WORKERS}; \
 		done; \
-		for method in finetune laplacian_shot; do \
-			python -m scripts.benchmark_methods \
-				$${method} \
-				data/features/tiered_imagenet/test/feat_resnet12_tiered_imagenet.parquet.gzip \
-				--config=$${n_shot}_shot \
-				--n-shot=$${n_shot} \
-				--device=${DEVICE} \
-				--num-workers=${NUM_WORKERS}; \
-		done; \
 		python -m scripts.benchmark_methods \
 			feat \
 			data/features/tiered_imagenet/test/feat_resnet12_tiered_imagenet.parquet.gzip \
diff --git a/README.md b/README.md
@@ -167,11 +167,14 @@ We used EasyFSL to benchmark a dozen methods.
 Inference times are computed over 1000 tasks using pre-extracted features. They are only indicative.
 Note that the inference time for fine-tuning methods highly depends on the number of fine-tuning steps.
 
-All methods hyper-parameters are defined in [this JSON file](scripts/backbones_configs.json). 
+All methods hyperparameters are defined in [this JSON file](scripts/backbones_configs.json). 
 They were selected on miniImageNet validation set. 
-The procedure can be reproduced from [this other project](https://github.com/ebennequin/few-shot-open-set).
+The procedure can be reproduced with `make hyperparameter-search`.
 We decided to use miniImageNet's hyperparameters for all benchmarks in order to highlight the adaptability of
 the different methods.
+Note that all methods use L2 normalization of features, except for FEAT as it harms its performance.
+
+There are no results for Mathing and Relation Networks as the trained weights for their additional modules are unavailable.
 
 ### miniImageNet & tieredImageNet
 
@@ -180,36 +183,26 @@ provided by the authors from [FEAT](https://github.com/Sha-Lab/FEAT)
 (download: [miniImageNet](https://drive.google.com/file/d/1ixqw1l9XVxl3lh1m5VXkctw6JssahGbQ/view),
 [tieredImageNet](https://drive.google.com/file/d/1M93jdOjAn8IihICPKJg8Mb4B-eYDSZfE/view)).
 
+Best inductive and best transductive results for each column are shown in bold.
+
 | Method                                                                    | Ind / Trans  | *mini*Imagenet<br/>1-shot | *mini*Imagenet<br/>5-shot | *tiered*Imagenet<br/>1-shot | *tiered*Imagenet<br/>5-shot | Time    |
 |---------------------------------------------------------------------------|--------------|---------------------------|---------------------------|-----------------------------|-----------------------------|---------|
-| **[ProtoNet](easyfsl/methods/prototypical_networks.py)**                  | Inductive    | 61.5                      | 79.3                      | 56.4                        | 76.5                        | 10s     |
-| **[SimpleShot](easyfsl/methods/simple_shot.py)**                          | Inductive    | 65.5                      | 80.3                      | 60.2                        | 77.3                        | 9s      |
+| **[ProtoNet](easyfsl/methods/prototypical_networks.py)**                  | Inductive    | 63.6                      | 80.4                      | 60.2                        | 77.4                        | 6s      |
+| **[SimpleShot](easyfsl/methods/simple_shot.py)**                          | Inductive    | 63.6                      | **80.5**                  | 60.2                        | 77.4                        | 6s      |
 | **[MatchingNet](easyfsl/methods/matching_networks.py)**                   | Inductive    | -                         | -                         | -                           | -                           | -       |
 | **[RelationNet](easyfsl/methods/relation_networks.py)**                   | Inductive    | -                         | -                         | -                           | -                           | -       |
-| **[Finetune](easyfsl/methods/finetune.py)**                               | Inductive    | 63.4                      | 80.38                     | 60.1                        | 77.4                        | 3mn03s  |
-| **[FEAT](easyfsl/methods/feat.py)**                                       | Inductive    | 64.7                      | 80.13                     | 61.3                        | 76.2                        | 3s      |
-| **[BD-CSPN](easyfsl/methods/bd_cspn.py)**                                 | Transductive | 71.7                      | 82.17                     | 66.1                        | 79.1                        | 10s     |
-| **[LaplacianShot](easyfsl/methods/laplacian_shot.py)**                    | Transductive | 69.6                      | 81.9                      | 66.0                        | 78.9                        | 12s     |
-| **[PT-MAP](easyfsl/methods/pt_map.py)**                                   | Transductive | -                         | -                         | -                           | -                           | 22mn50s |
-| **[TIM](easyfsl/methods/tim.py)**                                         | Transductive | 64.0                      | 80.4                      | 60.4                        | 77.4                        | 2mn48s  |
-| **[Transductive Finetuning](easyfsl/methods/transductive_finetuning.py)** | Transductive | -                         | -                         | -                           | -                           | -       |
+| **[Finetune](easyfsl/methods/finetune.py)**                               | Inductive    | 63.3                      | **80.5**                  | 59.8                        | **77.5**                    | 1mn33s  |
+| **[FEAT](easyfsl/methods/feat.py)**                                       | Inductive    | **64.7**                  | 80.1                      | **61.3**                    | 76.2                        | 3s      |
+| **[BD-CSPN](easyfsl/methods/bd_cspn.py)**                                 | Transductive | 69.8                      | 82.2                      | 66.3                        | 79.1                        | 7s      |
+| **[LaplacianShot](easyfsl/methods/laplacian_shot.py)**                    | Transductive | 69.8                      | 82.3                      | 66.2                        | 79.2                        | 9s      |
+| **[PT-MAP](easyfsl/methods/pt_map.py)**                                   | Transductive | **76.1**                  | **84.2**                  | **71.7**                    | **80.7**                    | 39mn40s |
+| **[TIM](easyfsl/methods/tim.py)**                                         | Transductive | 74.3                      | **84.2**                  | 70.7                        | **80.7**                    | 3mn05s  |
+| **[Transductive Finetuning](easyfsl/methods/transductive_finetuning.py)** | Transductive | 63.0                      | 80.6                      | 59.1                        | 77.5                        | 30s     |
 
 To reproduce:
 
 1. Download the [*mini*ImageNet](https://drive.google.com/file/d/1ixqw1l9XVxl3lh1m5VXkctw6JssahGbQ/view) 
    and [tieredImageNet](https://drive.google.com/file/d/1M93jdOjAn8IihICPKJg8Mb4B-eYDSZfE/view) weights for ResNet12 
    and save them under `data/models/feat_resnet12_mini_imagenet.pth` (resp. `tiered`).
 2. Extract all embeddings from the test sets of all datasets with `make extract-all-features-with-resnet12`.
-3. Run the evaluation script with `make benchmark-mini-imagenet`.
-
-
-### Next steps (01/06/2023): 
-
-1. Complete benchmark with Matching and Relation Nets, PT-MAP, Transductive Finetuning (est. July 23)
-2. Add explicit hyperparameter selection (est. July 23)
-3. Add feature normalization as it's been proven to have a huge impact on the results (est. July 23)
-4. Add cross-domain benchmarks (CUB, Fungi) and using other backbones (est. September 23)
-
-
-
-
+3. Run the evaluation scripts with `make benchmark-mini-imagenet` (resp. `tiered`).
diff --git a/easyfsl/methods/feat.py b/easyfsl/methods/feat.py
@@ -66,6 +66,7 @@ def from_resnet12_checkpoint(
         device: str = "cpu",
         feature_dimension: int = 640,
         use_backbone: bool = True,
+        **kwargs,
     ):
         """
         Load a FEAT model from a checkpoint of a resnet12 model as provided by the authors.
@@ -115,4 +116,4 @@ def from_resnet12_checkpoint(
                 f"Missing keys for attention module: {attention_missing_keys}"
             )
 
-        return cls(backbone, attention_module=attention_module).to(device)
+        return cls(backbone, attention_module=attention_module, **kwargs).to(device)
diff --git a/easyfsl/tests/utils_test.py b/easyfsl/tests/utils_test.py
@@ -317,4 +317,4 @@ class TestComputeAverageFeaturesFromImages:
     @pytest.mark.parametrize("dataloader, expected_average", cases_grid)
     def test_returns_expected_average(dataloader, expected_average):
         output_tensor = compute_average_features_from_images(dataloader, nn.Identity())
-        torch.testing.assert_allclose(output_tensor, expected_average)
+        torch.testing.assert_close(output_tensor, expected_average)
diff --git a/scripts/grid_search.json b/scripts/grid_search.json
@@ -1,5 +1,6 @@
 {
   "tim": {
+    "feature_normalization":  [2],
     "fine_tuning_steps": [50, 100, 200, 300],
     "fine_tuning_lr": [0.001, 0.0001],
     "cross_entropy_weight": [1.0],
@@ -8,22 +9,25 @@
     "temperature": [1, 5, 10]
   },
   "finetune": {
+    "feature_normalization": [2],
     "fine_tuning_steps": [50, 100, 200, 300],
     "fine_tuning_lr": [0.001, 0.0001],
     "temperature": [1, 5, 10]
   },
   "pt_map": {
+    "feature_normalization": [2],
     "fine_tuning_steps": [10, 20, 30],
     "fine_tuning_lr": [0.2, 0.3, 0.5],
     "lambda_regularization": [10, 20, 30]
   },
   "laplacian_shot": {
-      "inference_steps": [10, 20, 30],
-      "knn": [1, 3, 5, 7],
-      "lambda_regularization": [0.1, 0.3, 0.5, 0.7, 0.8],
-      "temperature": [1, 5, 10]
+    "feature_normalization": [2],
+    "inference_steps": [10, 20, 30],
+    "knn": [1, 3, 5, 7],
+    "lambda_regularization": [0.1, 0.3, 0.5, 0.7, 0.8]
   },
   "transductive_finetuning": {
+    "feature_normalization": [2],
     "fine_tuning_steps": [10, 25, 40],
     "fine_tuning_lr": [0.001, 0.0001, 0.00001],
     "temperature": [1, 5, 10]
diff --git a/scripts/methods_configs.json b/scripts/methods_configs.json
@@ -2,11 +2,12 @@
   "tim": {
     "default": {
       "fine_tuning_steps": 50,
-      "fine_tuning_lr": 0.0001,
+      "fine_tuning_lr": 0.001,
       "cross_entropy_weight": 1.0,
       "marginal_entropy_weight": 1.0,
-      "conditional_entropy_weight": 1.0,
-      "temperature": 10.0
+      "conditional_entropy_weight": 0.5,
+      "temperature": 10.0,
+      "feature_normalization": 2
     }
   },
   "feat": {
@@ -20,36 +21,51 @@
     }
   },
   "finetune": {
-    "1_shot": {
-      "fine_tuning_steps": 200,
-      "fine_tuning_lr": 0.0001,
-      "temperature": 1.0
-    },
-    "5_shot": {
-      "fine_tuning_steps": 100,
+    "default": {
+      "fine_tuning_steps": 50,
       "fine_tuning_lr": 0.0001,
-      "temperature": 10.0
+      "temperature": 10.0,
+      "feature_normalization": 2
     }
   },
   "pt_map": {
     "default": {
-      "fine_tuning_steps": 10,
-      "fine_tuning_lr": 0.2,
+      "fine_tuning_steps": 30,
+      "fine_tuning_lr": 0.3,
       "lambda_regularization": 10,
       "power_factor": 0.5,
       "feature_normalization": 2
     }
   },
+  "transductive_finetuning": {
+    "default": {
+      "fine_tuning_steps": 10,
+      "fine_tuning_lr": 0.0001,
+      "temperature": 10.0,
+      "feature_normalization": 2
+    }
+  },
   "laplacian_shot": {
-    "1_shot": {
-      "inference_steps": 20,
-      "knn": 3,
-      "lambda_regularization": 0.7
-    },
-    "5_shot": {
-      "inference_steps": 20,
+    "default": {
+      "inference_steps": 10,
       "knn": 3,
-      "lambda_regularization": 0.1
+      "lambda_regularization": 0.1,
+      "feature_normalization": 2
+    }
+  },
+  "bd_cspn": {
+    "default": {
+      "feature_normalization": 2
+    }
+  },
+  "prototypical_networks": {
+    "default": {
+      "feature_normalization": 2
+    }
+  },
+  "simple_shot": {
+    "default": {
+      "feature_normalization": 2
     }
   }
 }