From 1e8336e0ed457b38d847bd1c07bbe980f83f186e Mon Sep 17 00:00:00 2001 From: RBZ-99 Date: Fri, 15 Nov 2024 17:03:14 -0500 Subject: [PATCH 1/4] integrate vit, many files were moved to src beforehand so handled imports --- topomoe/src/models/__init__.py | 1 + topomoe/src/models/common.py | 2 +- topomoe/src/models/vit.py | 135 +++++++++++++++++++++++++++++++++ topomoe/src/train.py | 8 +- topomoe/tests/test_train.py | 18 ++++- 5 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 topomoe/src/models/vit.py diff --git a/topomoe/src/models/__init__.py b/topomoe/src/models/__init__.py index c40c5dc..42a3a57 100644 --- a/topomoe/src/models/__init__.py +++ b/topomoe/src/models/__init__.py @@ -1,4 +1,5 @@ from . import quadmoe # noqa from . import softmoe # noqa from . import topomoe # noqa +from . import vit from .registry import create_model, list_models # noqa diff --git a/topomoe/src/models/common.py b/topomoe/src/models/common.py index 5f922de..8170086 100644 --- a/topomoe/src/models/common.py +++ b/topomoe/src/models/common.py @@ -6,7 +6,7 @@ from timm.layers import trunc_normal_ from timm.layers.helpers import to_2tuple, to_3tuple -from topomoe.utils import filter_kwargs +from topomoe.src.utils import filter_kwargs State = Dict[str, torch.Tensor] Layer = Callable[..., nn.Module] diff --git a/topomoe/src/models/vit.py b/topomoe/src/models/vit.py new file mode 100644 index 0000000..28fb8c1 --- /dev/null +++ b/topomoe/src/models/vit.py @@ -0,0 +1,135 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Tuple, Dict +from . import wiring +from .common import Attention, Layer, Mlp, State, init_weights, model_factory, to_list +from .registry import register_model + + +class ViTBlock(nn.Module): + def __init__(self, dim, num_heads, mlp_ratio=4.0, qkv_bias=False, drop_rate=0.0): + super(ViTBlock, self).__init__() + self.norm1 = nn.LayerNorm(dim) + self.attn = Attention( + dim=dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + attn_drop=drop_rate, + proj_drop=drop_rate, + ) + self.drop_path = nn.Dropout(drop_rate) + self.norm2 = nn.LayerNorm(dim) + self.mlp = Mlp( + in_features=dim, + hidden_features=int(dim * mlp_ratio), + drop=drop_rate, + ) + + def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, dict]: + state = {} + + + x_norm = self.norm1(x) + attn_output, attn_state = self.attn(x_norm) + x = x + self.drop_path(attn_output) + state["attention_weights"] = attn_state["attn"] # Store attention weights + + + x_norm = self.norm2(x) + mlp_output = self.mlp(x_norm) + x = x + self.drop_path(mlp_output) + + return x, state + + +class ViTWithStateAndLoss(nn.Module): + def __init__( + self, + img_size: int = 224, + patch_size: int = 16, + in_chans: int = 3, + embed_dim: int = 768, + depth: int = 12, + num_heads: int = 12, + mlp_ratio: float = 4.0, + qkv_bias: bool = True, + drop_rate: float = 0.0, + num_classes=100, + ): + super(ViTWithStateAndLoss, self).__init__() + + + self.patch_embed = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) + num_patches = (img_size // patch_size) ** 2 + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) + self.pos_drop = nn.Dropout(drop_rate) + self.num_classes = num_classes + + + self.blocks = nn.ModuleList([ + ViTBlock(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop_rate=drop_rate) + for _ in range(depth) + ]) + + + self.norm = nn.LayerNorm(embed_dim) + self.head = nn.Linear(embed_dim, self.num_classes) + self.apply(init_weights) + + def forward_features(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: + x = self.patch_embed(x).flatten(2).transpose(1, 2) + x = x + self.pos_embed + x = self.pos_drop(x) + + state = {} + losses = {} # Populate losses? + + + for i, block in enumerate(self.blocks): + x, block_state = block(x) + state[f"block_{i}"] = block_state # Collecting intermediate state from each block + + x = self.norm(x) + return x, losses, state + + def forward_head(self, x: torch.Tensor) -> torch.Tensor: + x = x.mean(dim=1) + return self.head(x) + + def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: + x, losses, state = self.forward_features(x) + x = self.forward_head(x) + return x, losses, state + + + +@register_model +def vit_base_patch16_128(**kwargs): + params = { + "img_size": 128, + "patch_size": 16, + "in_chans": 3, + "embed_dim": 384, + "depth": 12, # Vary number of transformer blocks based on size + "num_heads": 6, + "mlp_ratio": 4.0, + "qkv_bias": True, + "drop_rate": 0.0, + "num_classes": 100, + } + + + model = ViTWithStateAndLoss( + img_size=params["img_size"], + patch_size=params["patch_size"], + in_chans=params["in_chans"], + embed_dim=params["embed_dim"], + depth=params["depth"], + num_heads=params["num_heads"], + mlp_ratio=params["mlp_ratio"], + qkv_bias=params["qkv_bias"], + drop_rate=params["drop_rate"], + num_classes=params["num_classes"] + ) + return model diff --git a/topomoe/src/train.py b/topomoe/src/train.py index 4adda2a..8c606ea 100644 --- a/topomoe/src/train.py +++ b/topomoe/src/train.py @@ -28,9 +28,9 @@ from torch.utils.data import DataLoader from transformers.hf_argparser import HfArg, HfArgumentParser -from topomoe import utils as ut -from topomoe.inspection import Figure, Metric, create_figures, create_metrics -from topomoe.models import create_model, list_models +from topomoe.src import utils as ut +from topomoe.src.inspection import Figure, Metric, create_figures, create_metrics +from topomoe.src.models import create_model, list_models np.set_printoptions(precision=3) plt.switch_backend("Agg") @@ -116,7 +116,7 @@ class Args: aliases=["--inmem"], default=False, help="keep dataset in memory" ) # Optimization - epochs: int = HfArg(default=100, help="number of epochs") + epochs: int = HfArg(default=2, help="number of epochs") batch_size: int = HfArg( aliases=["--bs"], default=256, help="batch size per replica" ) diff --git a/topomoe/tests/test_train.py b/topomoe/tests/test_train.py index b541d94..58b56bb 100644 --- a/topomoe/tests/test_train.py +++ b/topomoe/tests/test_train.py @@ -1,8 +1,19 @@ import pytest - -from topomoe import train +import sys +sys.path.append('../') +from topomoe.src import train configs = { + "vit_small": train.Args( + name="debug_train_vit_small", + out_dir="topomoe/test_results", + model="vit_base_patch16_128", + dataset="hfds/clane9/imagenet-100", + workers=1, + batch_size=1024, + overwrite=True, + debug=False, + ), "transformer": train.Args( name="debug_train_transformer", out_dir="topomoe/test_results", @@ -74,6 +85,7 @@ @pytest.mark.parametrize( "config", [ + "vit_small", "transformer", "transformer_v2", "quadmoe", @@ -85,3 +97,5 @@ def test_train(config: str): args = configs[config] train.main(args) + +test_train("vit_small") From 5ad3089f3f6865dde8c99241ac7ebb7e0d2b9c8c Mon Sep 17 00:00:00 2001 From: RBZ-99 Date: Sat, 16 Nov 2024 02:37:22 -0500 Subject: [PATCH 2/4] vit-topomoe-imagenet1k --- topomoe/src/models/__init__.py | 1 - topomoe/src/models/topomoe.py | 56 ++++++++++++++ topomoe/src/models/vit.py | 135 --------------------------------- topomoe/src/train.py | 3 + topomoe/tests/test_train.py | 126 +++++++++++++++++++++++++++++- 5 files changed, 181 insertions(+), 140 deletions(-) delete mode 100644 topomoe/src/models/vit.py diff --git a/topomoe/src/models/__init__.py b/topomoe/src/models/__init__.py index 42a3a57..c40c5dc 100644 --- a/topomoe/src/models/__init__.py +++ b/topomoe/src/models/__init__.py @@ -1,5 +1,4 @@ from . import quadmoe # noqa from . import softmoe # noqa from . import topomoe # noqa -from . import vit from .registry import create_model, list_models # noqa diff --git a/topomoe/src/models/topomoe.py b/topomoe/src/models/topomoe.py index a229ed9..50a46e0 100644 --- a/topomoe/src/models/topomoe.py +++ b/topomoe/src/models/topomoe.py @@ -504,3 +504,59 @@ def topomoe_tiny_3s_patch16_128(**kwargs): } model = model_factory(TopoMoETransformer, params, defaults, **kwargs) return model + + + + +@register_model +def vit_tiny_patch16_128(**kwargs): + params = { + "img_size": 128, + "patch_size": 16, + "in_chans": 3, + "depths": (6,), + "widths": None, + "embed_dim": 384, + "num_experts": (1,), + } + defaults = { + "num_heads": 6, + } + model = model_factory(TopoMoETransformer, params, defaults, **kwargs) + return model + + +@register_model +def vit_small_patch16_128(**kwargs): + params = { + "img_size": 128, + "patch_size": 16, + "in_chans": 3, + "depths": (12,), + "widths": None, + "embed_dim": 384, + "num_experts": (1,), + } + defaults = { + "num_heads": 12, + } + model = model_factory(TopoMoETransformer, params, defaults, **kwargs) + return model + + +@register_model +def vit_base_patch16_128(**kwargs): + params = { + "img_size": 128, + "patch_size": 16, + "in_chans": 3, + "depths": (12,), + "widths": None, + "embed_dim": 768, + "num_experts": (1,), + } + defaults = { + "num_heads": 12, + } + model = model_factory(TopoMoETransformer, params, defaults, **kwargs) + return model \ No newline at end of file diff --git a/topomoe/src/models/vit.py b/topomoe/src/models/vit.py deleted file mode 100644 index 28fb8c1..0000000 --- a/topomoe/src/models/vit.py +++ /dev/null @@ -1,135 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from typing import Tuple, Dict -from . import wiring -from .common import Attention, Layer, Mlp, State, init_weights, model_factory, to_list -from .registry import register_model - - -class ViTBlock(nn.Module): - def __init__(self, dim, num_heads, mlp_ratio=4.0, qkv_bias=False, drop_rate=0.0): - super(ViTBlock, self).__init__() - self.norm1 = nn.LayerNorm(dim) - self.attn = Attention( - dim=dim, - num_heads=num_heads, - qkv_bias=qkv_bias, - attn_drop=drop_rate, - proj_drop=drop_rate, - ) - self.drop_path = nn.Dropout(drop_rate) - self.norm2 = nn.LayerNorm(dim) - self.mlp = Mlp( - in_features=dim, - hidden_features=int(dim * mlp_ratio), - drop=drop_rate, - ) - - def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, dict]: - state = {} - - - x_norm = self.norm1(x) - attn_output, attn_state = self.attn(x_norm) - x = x + self.drop_path(attn_output) - state["attention_weights"] = attn_state["attn"] # Store attention weights - - - x_norm = self.norm2(x) - mlp_output = self.mlp(x_norm) - x = x + self.drop_path(mlp_output) - - return x, state - - -class ViTWithStateAndLoss(nn.Module): - def __init__( - self, - img_size: int = 224, - patch_size: int = 16, - in_chans: int = 3, - embed_dim: int = 768, - depth: int = 12, - num_heads: int = 12, - mlp_ratio: float = 4.0, - qkv_bias: bool = True, - drop_rate: float = 0.0, - num_classes=100, - ): - super(ViTWithStateAndLoss, self).__init__() - - - self.patch_embed = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) - num_patches = (img_size // patch_size) ** 2 - self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) - self.pos_drop = nn.Dropout(drop_rate) - self.num_classes = num_classes - - - self.blocks = nn.ModuleList([ - ViTBlock(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop_rate=drop_rate) - for _ in range(depth) - ]) - - - self.norm = nn.LayerNorm(embed_dim) - self.head = nn.Linear(embed_dim, self.num_classes) - self.apply(init_weights) - - def forward_features(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: - x = self.patch_embed(x).flatten(2).transpose(1, 2) - x = x + self.pos_embed - x = self.pos_drop(x) - - state = {} - losses = {} # Populate losses? - - - for i, block in enumerate(self.blocks): - x, block_state = block(x) - state[f"block_{i}"] = block_state # Collecting intermediate state from each block - - x = self.norm(x) - return x, losses, state - - def forward_head(self, x: torch.Tensor) -> torch.Tensor: - x = x.mean(dim=1) - return self.head(x) - - def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], Dict[str, torch.Tensor]]: - x, losses, state = self.forward_features(x) - x = self.forward_head(x) - return x, losses, state - - - -@register_model -def vit_base_patch16_128(**kwargs): - params = { - "img_size": 128, - "patch_size": 16, - "in_chans": 3, - "embed_dim": 384, - "depth": 12, # Vary number of transformer blocks based on size - "num_heads": 6, - "mlp_ratio": 4.0, - "qkv_bias": True, - "drop_rate": 0.0, - "num_classes": 100, - } - - - model = ViTWithStateAndLoss( - img_size=params["img_size"], - patch_size=params["patch_size"], - in_chans=params["in_chans"], - embed_dim=params["embed_dim"], - depth=params["depth"], - num_heads=params["num_heads"], - mlp_ratio=params["mlp_ratio"], - qkv_bias=params["qkv_bias"], - drop_rate=params["drop_rate"], - num_classes=params["num_classes"] - ) - return model diff --git a/topomoe/src/train.py b/topomoe/src/train.py index 8c606ea..3924737 100644 --- a/topomoe/src/train.py +++ b/topomoe/src/train.py @@ -241,6 +241,7 @@ def main(args: Args): # Dataset logging.info("Loading dataset %s", args.dataset) + import pdb;pdb.set_trace() dataset_train = create_dataset( args.dataset, root=args.data_dir, @@ -283,6 +284,7 @@ def main(args: Args): device=clust.device, use_prefetcher=args.prefetch, ) + import pdb;pdb.set_trace() loader_eval = create_loader( dataset_eval, input_size=input_size, @@ -516,6 +518,7 @@ def train_one_epoch( data_time = time.monotonic() - end # forward pass + #import pdb;pdb.set_trace() with autocast(): output, losses, state = model(input) losses["class_loss"] = loss_fn(output, target) diff --git a/topomoe/tests/test_train.py b/topomoe/tests/test_train.py index 58b56bb..1bcd450 100644 --- a/topomoe/tests/test_train.py +++ b/topomoe/tests/test_train.py @@ -3,16 +3,29 @@ sys.path.append('../') from topomoe.src import train +# Replace with your path to Imagenet1k (ILSVRC2012) with train and val folders +imagenet_path = "../../datasets/ILSVRC2012/" + configs = { "vit_small": train.Args( + name="debug_train_vit_small", + out_dir="topomoe/test_results", + model="vit_small_patch16_128", + dataset= "hfds/clane9/imagenet-100", + workers=1, + batch_size=1024, + overwrite=True, + debug=True, + ), + "vit_base": train.Args( name="debug_train_vit_small", out_dir="topomoe/test_results", model="vit_base_patch16_128", - dataset="hfds/clane9/imagenet-100", + dataset= "hfds/clane9/imagenet-100", workers=1, batch_size=1024, overwrite=True, - debug=False, + debug=True, ), "transformer": train.Args( name="debug_train_transformer", @@ -20,7 +33,7 @@ model="quadmoe_tiny_1s_patch16_128", dataset="hfds/clane9/imagenet-100", workers=0, - batch_size=32, + batch_size=1024, overwrite=True, debug=True, ), @@ -79,13 +92,118 @@ overwrite=True, debug=True, ), + ## Imagenet1k + "vit_small_imagenet1k": train.Args( + name="debug_train_vit_small", + out_dir="topomoe/test_results", + model="vit_small_patch16_128", + dataset= "imagenet1k", + data_dir = imagenet_path, + workers=1, + num_classes=1000, + batch_size=1024, + overwrite=True, + debug=True, + ), + "vit_base_imagenet1k": train.Args( + name="debug_train_vit_small", + out_dir="topomoe/test_results", + model="vit_base_patch16_128", + dataset= "imagenet1k", + data_dir = imagenet_path, + workers=1, + num_classes=1000, + batch_size=1024, + overwrite=True, + debug=True, + ), + "transformer_imagenet1k": train.Args( + name="debug_train_transformer", + out_dir="topomoe/test_results", + model="quadmoe_tiny_1s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=1024, + overwrite=True, + debug=True, + ), + "transformer_v2_imagenet1k": train.Args( + name="debug_train_transformer_v2", + out_dir="topomoe/test_results", + model="topomoe_tiny_1s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "quadmoe_imagenet1k": train.Args( + name="debug_train_quadmoe", + out_dir="topomoe/test_results", + model="quadmoe_tiny_2s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "softmoe_imagenet1k": train.Args( + name="debug_train_softmoe", + out_dir="topomoe/test_results", + model="softmoe_tiny_2s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "topomoe_imagenet1k": train.Args( + name="debug_train_topomoe", + out_dir="topomoe/test_results", + model="topomoe_tiny_2s_patch16_128", + wiring_lambd=0.01, + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "aug_imagenet1k": train.Args( + name="debug_train_aug", + out_dir="topomoe/test_results", + model="quadmoe_tiny_1s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + num_classes=1000, + scale=[0.1, 0.3], + ratio=[1 / 4, 4 / 1], + hflip=0.5, + color_jitter=0.4, + workers=0, + batch_size=32, + overwrite=True, + debug=True, + ), } + + @pytest.mark.parametrize( "config", [ "vit_small", + "vit_base", "transformer", "transformer_v2", "quadmoe", @@ -98,4 +216,4 @@ def test_train(config: str): args = configs[config] train.main(args) -test_train("vit_small") +test_train("vit_small_imagenet1k") From 613ad91347ebb863fa6683113529573ea321a03c Mon Sep 17 00:00:00 2001 From: RBZ-99 Date: Sat, 16 Nov 2024 03:22:58 -0500 Subject: [PATCH 3/4] vit-topomoe --- topomoe/tests/test_train.py | 109 +----------------------------------- 1 file changed, 2 insertions(+), 107 deletions(-) diff --git a/topomoe/tests/test_train.py b/topomoe/tests/test_train.py index 1bcd450..bb9caa3 100644 --- a/topomoe/tests/test_train.py +++ b/topomoe/tests/test_train.py @@ -3,9 +3,6 @@ sys.path.append('../') from topomoe.src import train -# Replace with your path to Imagenet1k (ILSVRC2012) with train and val folders -imagenet_path = "../../datasets/ILSVRC2012/" - configs = { "vit_small": train.Args( name="debug_train_vit_small", @@ -91,109 +88,7 @@ batch_size=32, overwrite=True, debug=True, - ), - ## Imagenet1k - "vit_small_imagenet1k": train.Args( - name="debug_train_vit_small", - out_dir="topomoe/test_results", - model="vit_small_patch16_128", - dataset= "imagenet1k", - data_dir = imagenet_path, - workers=1, - num_classes=1000, - batch_size=1024, - overwrite=True, - debug=True, - ), - "vit_base_imagenet1k": train.Args( - name="debug_train_vit_small", - out_dir="topomoe/test_results", - model="vit_base_patch16_128", - dataset= "imagenet1k", - data_dir = imagenet_path, - workers=1, - num_classes=1000, - batch_size=1024, - overwrite=True, - debug=True, - ), - "transformer_imagenet1k": train.Args( - name="debug_train_transformer", - out_dir="topomoe/test_results", - model="quadmoe_tiny_1s_patch16_128", - dataset="imagenet1k", - data_dir = imagenet_path, - workers=0, - num_classes=1000, - batch_size=1024, - overwrite=True, - debug=True, - ), - "transformer_v2_imagenet1k": train.Args( - name="debug_train_transformer_v2", - out_dir="topomoe/test_results", - model="topomoe_tiny_1s_patch16_128", - dataset="imagenet1k", - data_dir = imagenet_path, - workers=0, - num_classes=1000, - batch_size=32, - overwrite=True, - debug=True, - ), - "quadmoe_imagenet1k": train.Args( - name="debug_train_quadmoe", - out_dir="topomoe/test_results", - model="quadmoe_tiny_2s_patch16_128", - dataset="imagenet1k", - data_dir = imagenet_path, - workers=0, - num_classes=1000, - batch_size=32, - overwrite=True, - debug=True, - ), - "softmoe_imagenet1k": train.Args( - name="debug_train_softmoe", - out_dir="topomoe/test_results", - model="softmoe_tiny_2s_patch16_128", - dataset="imagenet1k", - data_dir = imagenet_path, - workers=0, - num_classes=1000, - batch_size=32, - overwrite=True, - debug=True, - ), - "topomoe_imagenet1k": train.Args( - name="debug_train_topomoe", - out_dir="topomoe/test_results", - model="topomoe_tiny_2s_patch16_128", - wiring_lambd=0.01, - dataset="imagenet1k", - data_dir = imagenet_path, - workers=0, - num_classes=1000, - batch_size=32, - overwrite=True, - debug=True, - ), - "aug_imagenet1k": train.Args( - name="debug_train_aug", - out_dir="topomoe/test_results", - model="quadmoe_tiny_1s_patch16_128", - dataset="imagenet1k", - data_dir = imagenet_path, - num_classes=1000, - scale=[0.1, 0.3], - ratio=[1 / 4, 4 / 1], - hflip=0.5, - color_jitter=0.4, - workers=0, - batch_size=32, - overwrite=True, - debug=True, - ), + ) } @@ -216,4 +111,4 @@ def test_train(config: str): args = configs[config] train.main(args) -test_train("vit_small_imagenet1k") +test_train("vit_small") From 41e8183fc42c91cb8fc7726ae6605bd7e4ec5581 Mon Sep 17 00:00:00 2001 From: RBZ-99 Date: Sat, 16 Nov 2024 03:24:41 -0500 Subject: [PATCH 4/4] vit-topomoe and imagenet1k --- topomoe/tests/test_train.py | 109 +++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/topomoe/tests/test_train.py b/topomoe/tests/test_train.py index bb9caa3..1bcd450 100644 --- a/topomoe/tests/test_train.py +++ b/topomoe/tests/test_train.py @@ -3,6 +3,9 @@ sys.path.append('../') from topomoe.src import train +# Replace with your path to Imagenet1k (ILSVRC2012) with train and val folders +imagenet_path = "../../datasets/ILSVRC2012/" + configs = { "vit_small": train.Args( name="debug_train_vit_small", @@ -88,7 +91,109 @@ batch_size=32, overwrite=True, debug=True, - ) + ), + ## Imagenet1k + "vit_small_imagenet1k": train.Args( + name="debug_train_vit_small", + out_dir="topomoe/test_results", + model="vit_small_patch16_128", + dataset= "imagenet1k", + data_dir = imagenet_path, + workers=1, + num_classes=1000, + batch_size=1024, + overwrite=True, + debug=True, + ), + "vit_base_imagenet1k": train.Args( + name="debug_train_vit_small", + out_dir="topomoe/test_results", + model="vit_base_patch16_128", + dataset= "imagenet1k", + data_dir = imagenet_path, + workers=1, + num_classes=1000, + batch_size=1024, + overwrite=True, + debug=True, + ), + "transformer_imagenet1k": train.Args( + name="debug_train_transformer", + out_dir="topomoe/test_results", + model="quadmoe_tiny_1s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=1024, + overwrite=True, + debug=True, + ), + "transformer_v2_imagenet1k": train.Args( + name="debug_train_transformer_v2", + out_dir="topomoe/test_results", + model="topomoe_tiny_1s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "quadmoe_imagenet1k": train.Args( + name="debug_train_quadmoe", + out_dir="topomoe/test_results", + model="quadmoe_tiny_2s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "softmoe_imagenet1k": train.Args( + name="debug_train_softmoe", + out_dir="topomoe/test_results", + model="softmoe_tiny_2s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "topomoe_imagenet1k": train.Args( + name="debug_train_topomoe", + out_dir="topomoe/test_results", + model="topomoe_tiny_2s_patch16_128", + wiring_lambd=0.01, + dataset="imagenet1k", + data_dir = imagenet_path, + workers=0, + num_classes=1000, + batch_size=32, + overwrite=True, + debug=True, + ), + "aug_imagenet1k": train.Args( + name="debug_train_aug", + out_dir="topomoe/test_results", + model="quadmoe_tiny_1s_patch16_128", + dataset="imagenet1k", + data_dir = imagenet_path, + num_classes=1000, + scale=[0.1, 0.3], + ratio=[1 / 4, 4 / 1], + hflip=0.5, + color_jitter=0.4, + workers=0, + batch_size=32, + overwrite=True, + debug=True, + ), } @@ -111,4 +216,4 @@ def test_train(config: str): args = configs[config] train.main(args) -test_train("vit_small") +test_train("vit_small_imagenet1k")