amazon-science · RenShuhuai-Andy · Aug 14, 2023 · Aug 14, 2023 · Oct 5, 2023 · Dec 28, 2023
diff --git a/README.md b/README.md
@@ -18,10 +18,12 @@ https://colab.research.google.com/drive/1OEFw1GfKXogx8mdFS2pClLjPK3aPEZyY?usp=sh
 [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/prompt-pre-training-with-twenty-thousand/open-vocabulary-semantic-segmentation-on-5)](https://paperswithcode.com/sota/open-vocabulary-semantic-segmentation-on-5?p=prompt-pre-training-with-twenty-thousand) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/prompt-pre-training-with-twenty-thousand/open-vocabulary-object-detection-on-lvis-v1-0)](https://paperswithcode.com/sota/open-vocabulary-object-detection-on-lvis-v1-0?p=prompt-pre-training-with-twenty-thousand) 
 
 # :rocket: News
+* **(Sep 22, 2023)** 
+  * Our paper has been accepted by NeurIPS 2023. 
 * **(Jul 11, 2023)** 
-  * Inference demo for object detection in Jupyter. 
+  * Inference demo for object detection in [Jupyter](third_party/Detic/demo/demo.ipynb). 
 * **(May 31, 2023)** 
-  * Inference demo for image classification in Google Colab. 
+  * Inference demo for image classification in [Google Colab](https://colab.research.google.com/drive/1OEFw1GfKXogx8mdFS2pClLjPK3aPEZyY?usp=sharing). 
 * **(Mar 22, 2023)** 
   * Codes for prompt pretraining (POMP) on ImageNet-21K, cross-dataset and cross-task evaluation.
   * Checkpoints of pre-trained POMP prompts, segmentation backbones, and detection backbones.

diff --git a/configs/trainers/POMP/vit_b32_ep20_randaug2.yaml b/configs/trainers/POMP/vit_b32_ep20_randaug2.yaml
@@ -1,9 +1,10 @@
 DATALOADER:
   TRAIN_X:
     BATCH_SIZE: 32
-#    SAMPLER: "DistributedSampler"
+    SAMPLER: "DistributedSampler"
   TEST:
     BATCH_SIZE: 100
+    SAMPLER: "SequentialDistributedSampler"
   NUM_WORKERS: 8
   K_TRANSFORMS: 4
 
@@ -29,7 +30,6 @@ TRAIN:
 
 TEST:
   NO_TEST: True
-  PER_CLASS_RESULT: True
 
 MODEL:
   BACKBONE:

diff --git a/configs/trainers/POMP/vit_l14_ep20_randaug2.yaml b/configs/trainers/POMP/vit_l14_ep20_randaug2.yaml
@@ -0,0 +1,36 @@
+DATALOADER:
+  TRAIN_X:
+    BATCH_SIZE: 32
+    SAMPLER: "DistributedSampler"
+  TEST:
+    BATCH_SIZE: 100
+    SAMPLER: "SequentialDistributedSampler"
+  NUM_WORKERS: 8
+  K_TRANSFORMS: 4
+
+INPUT:
+  SIZE: (224, 224)
+  INTERPOLATION: "bicubic"
+  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
+  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
+  TRANSFORMS: ["random_resized_crop", "random_flip", "randaugment2", "normalize"]
+
+OPTIM:
+  NAME: "sgd"
+  LR: 0.002
+  MAX_EPOCH: 20
+  LR_SCHEDULER: "cosine"
+  WARMUP_EPOCH: 1
+  WARMUP_TYPE: "constant"
+  WARMUP_CONS_LR: 1e-5
+
+TRAIN:
+  CHECKPOINT_FREQ: 1
+  PRINT_FREQ: 100
+
+TEST:
+  NO_TEST: True
+
+MODEL:
+  BACKBONE:
+    NAME: "ViT-L/14"
diff --git a/docs/DATASETS.md b/docs/DATASETS.md
@@ -50,16 +50,15 @@ imagenet/
 ### ImageNet-21K
 - Create a folder named `imagenet21k_resized/` under `$DATA`.
 - Download the `imagenet21k_miil_tree.pth` to `$DATA/imagenet21k_resized/` from this [link](https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/resources/winter21/imagenet21k_miil_tree.pth). 
-- Download the dataset from the [official website](https://image-net.org/index.php) (official winter 2021 released version) and extract the training and validation sets to `$DATA/imagenet/images`. The directory structure should look like
+- Download the dataset from the [official website](https://image-net.org/index.php) (official winter 2021 released, Processed version of ImageNet21K) and extract the training and validation sets to `$DATA/imagenet21k_resized`. The directory structure should look like
 ```
 imagenet21k_resized/
 |–– imagenet21k_train/ # contains 10,451 folders like n01440764, n01443537, etc.
 |-- imagenet21k_val/ # contains 10,451 folders like n01440764, n01443537, etc.
 |-- imagenet21k_small_classes/ # contains 8,718 folders like n01440764, n01443537, etc.
 |-- imagenet21k_miil_tree.pth
 ```
-- If you had downloaded the ImageNet dataset before, you can create symbolic links to map the training and validation sets to `$DATA/imagenet/images`.
-- Download the `classnames.txt` to `$DATA/imagenet/` from this [link](https://drive.google.com/file/d/1-61f_ol79pViBFDG_IDlUQSwoLcn2XXF/view?usp=sharing). The class names are copied from [CLIP](https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb).
+Note that downloading ImageNet-21k requires huge disk space (around 280 GB).
 
 ### Caltech101
 - Create a folder named `caltech-101/` under `$DATA`.

diff --git a/third_party/zsseg.baseline/third_party/CLIP/clip/clip.py b/third_party/zsseg.baseline/third_party/CLIP/clip/clip.py
@@ -34,6 +34,8 @@
     "RN50x16": "https://openaipublic.azureedge.net/clip/models/52378b407f34354e150460fe41077663dd5b39c54cd0bfd2b27167a4a06ec9aa/RN50x16.pt",
     "ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
     "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
+    "ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt",
+    "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt",
 }