okunator
diff --git a/‎cellseg_models_pytorch/decoders/multitask_decoder.py
Lines changed: 29 additions & 11 deletions b/‎cellseg_models_pytorch/decoders/multitask_decoder.py
Lines changed: 29 additions & 11 deletions
diff --git a/‎cellseg_models_pytorch/encoders/encoder.py
Lines changed: 14 additions & 29 deletions b/‎cellseg_models_pytorch/encoders/encoder.py
Lines changed: 14 additions & 29 deletions
diff --git a/‎cellseg_models_pytorch/encoders/encoder_upsampler.py
Lines changed: 20 additions & 21 deletions b/‎cellseg_models_pytorch/encoders/encoder_upsampler.py
Lines changed: 20 additions & 21 deletions
diff --git a/‎cellseg_models_pytorch/encoders/timm_encoder.py
Lines changed: 0 additions & 83 deletions b/‎cellseg_models_pytorch/encoders/timm_encoder.py
Lines changed: 0 additions & 83 deletions
diff --git a/‎cellseg_models_pytorch/models/cellpose/cellpose.py
Lines changed: 2 additions & 7 deletions b/‎cellseg_models_pytorch/models/cellpose/cellpose.py
Lines changed: 2 additions & 7 deletions
@@ -7,6 +7,7 @@
 
 from cellseg_models_pytorch.decoders.long_skips import StemSkip
 from cellseg_models_pytorch.decoders.unet_decoder import UnetDecoder
+from cellseg_models_pytorch.encoders.encoder_upsampler import EncoderUpsampler
 from cellseg_models_pytorch.models.base._initialization import (
     initialize_decoder,
     initialize_head,
@@ -36,8 +37,7 @@ def __init__(
         decoders: Tuple[str, ...],
         heads: Dict[str, Dict[str, int]],
         out_channels: Tuple[int, ...],
-        enc_channels: Tuple[int, ...],
-        enc_reductions: Tuple[int, ...],
+        enc_feature_info: Tuple[Dict[str, Any], ...],
         n_layers: Tuple[int, ...],
         n_blocks: Tuple[int, ...],
         stage_kws: Tuple[Dict[str, Any], ...],
@@ -59,10 +59,8 @@ def __init__(
             out_channels (Tuple[int, ...]):
                 Tuple of output channels for each decoder stage. The length of the tuple
                 should be equal to the number of enc_channels.
-            enc_channels (Tuple[int, ...]):
-                Tuple of encoder channels.
-            enc_reductions (Tuple[int, ...]):
-                Tuple of encoder reduction factors.
+            enc_feature_info (Tuple[Dict[str, Any], ...]):
+                Tuple of encoder feature info dicts. Basically timm.model.feature_info
             n_layers (Tuple[int, ...]):
                 Tuple of number of conv layers in each decoder stage.
             n_blocks (Tuple[int, ...]):
@@ -87,15 +85,30 @@ def __init__(
         self._check_head_args(heads, decoders)
         self._check_decoder_args(decoders)
         self._check_depth(
-            len(enc_channels),
+            len(n_blocks),
             {
-                "n_blocks": n_blocks,
                 "n_layers": n_layers,
                 "out_channels": out_channels,
-                "enc_reductions": enc_reductions,
+                "enc_feature_info": enc_feature_info,
             },
         )
 
+        # get the reduction factors and out channels of the encoder
+        self.enc_feature_info = enc_feature_info[::-1]  # bottleneck first
+        enc_reductions = tuple([inf["reduction"] for inf in self.enc_feature_info])
+        enc_channels = tuple([inf["num_chs"] for inf in self.enc_feature_info])
+
+        # initialize feature upsampler if encoder is a vision transformer
+        self.encoder_upsampler = None
+        if all(elem == enc_reductions[0] for elem in enc_reductions):
+            self.encoder_upsampler = EncoderUpsampler(
+                feature_info=enc_feature_info,
+                out_channels=out_channels,
+            )
+            self.enc_feature_info = self.encoder_upsampler.feature_info  # bottlneck 1st
+            enc_reductions = tuple([inf["reduction"] for inf in self.enc_feature_info])
+            enc_channels = tuple([inf["num_chs"] for inf in self.enc_feature_info])
+
         # style
         self.make_style = None
         if style_channels is not None:
@@ -194,14 +207,19 @@ def forward(
 
         Parameters:
             enc_feats (Tuple[torch.Tensor, ...]):
-                Tuple containing encoder feature tensors.
+                Tuple containing encoder feature tensors. Assumes that the deepest i.e.
+                the bottleneck features is the last element of the tuple.
             x_in (torch.Tensor, default=None):
                 Optional (the input image) tensor for stem skip connection.
 
         Returns:
             Tuple[Dict[str, List[torch.Tensor]], Dict[str, torch.Tensor]]:
                 The output of the seg heads.
         """
+        enc_feats = enc_feats[::-1]  # bottleneck first
+        if self.encoder_upsampler is not None:
+            enc_feats = self.encoder_upsampler(enc_feats)
+
         style = self.forward_style(enc_feats[0])
         dec_feats = self.forward_features(enc_feats, style)
 
@@ -211,7 +229,7 @@ def forward(
 
         out = self.forward_heads(dec_feats)
 
-        return dec_feats, out
+        return enc_feats, dec_feats, out
 
     def initialize(self) -> None:
         """Initialize the decoders and segmentation heads."""
 
@@ -3,7 +3,6 @@
 import torch
 import torch.nn as nn
 
-from .encoder_upsampler import EncoderUpsampler
 from .timm_encoder import TimmEncoder
 
 __all__ = ["Encoder"]
@@ -14,26 +13,22 @@ def __init__(
         self,
         timm_encoder_name: str,
         timm_encoder_out_indices: Tuple[int, ...],
-        pixel_decoder_out_channels: Tuple[int, ...],
         timm_encoder_pretrained: bool = True,
         timm_extra_kwargs: Dict[str, Any] = {},
     ) -> None:
         """Wrap timm encoders to one class.
 
-        Parameters
-        ----------
-        timm_encoder_name : str
-            Name of the encoder. If the name is in `TR_ENCODERS.keys()`, a transformer
-            will be used. Otherwise, a timm encoder will be used.
-        timm_encoder_out_indices : Tuple[int], optional
-            Indices of the output features.
-        pixel_decoder_out_channels : Tuple[int], optional
-            Number of output channels at each upsampling stage.
-        timm_encoder_pretrained : bool, optional, default=False
-            If True, load pretrained timm weights, by default False.
-        timm_extra_kwargs : Dict[str, Any], optional, default={}
-            Key-word arguments for any `timm` based encoder. These arguments are
-            used in `timm.create_model(**kwargs)` function call.
+        Parameters:
+            timm_encoder_name (str):
+                Name of the encoder. If the name is in `TR_ENCODERS.keys()`, a transformer
+                will be used. Otherwise, a timm encoder will be used.
+            timm_encoder_out_indices (Tuple[int, ...]):
+                Indices of the output features.
+            timm_encoder_pretrained (bool, default=True):
+                If True, load pretrained timm weights.
+            timm_extra_kwargs (Dict[str, Any], default={}):
+                Key-word arguments for any `timm` based encoder. These arguments are
+                used in `timm.create_model(**kwargs)` function call.
         """
         super().__init__()
 
@@ -45,23 +40,13 @@ def __init__(
             extra_kwargs=timm_extra_kwargs,
         )
 
-        # initialize feature upsampler if encoder is a vision transformer
-        feature_info = self.encoder.feature_info
-        reductions = [finfo["reduction"] for finfo in feature_info]
-        if all(element == reductions[0] for element in reductions):
-            self.encoder = EncoderUpsampler(
-                backbone=self.encoder,
-                out_channels=pixel_decoder_out_channels,
-            )
-            feature_info = self.encoder.feature_info
-
-        self.out_channels = [f["num_chs"] for f in self.encoder.feature_info][::-1]
-        self.feature_info = self.encoder.feature_info[::-1]
+        self.out_channels = [f["num_chs"] for f in self.encoder.feature_info]
+        self.feature_info = self.encoder.feature_info  # bottleneck last element
 
     def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Tuple[torch.Tensor, ...]]:
         """Forward pass of the encoder and return all the features."""
         output, feats = self.encoder(x)
-        return output, feats[::-1]
+        return output, feats  # bottleneck feature is the last element
 
     def freeze_encoder(self) -> None:
         """Freeze the parameters of the encoeder."""
 
@@ -64,7 +64,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 class EncoderUpsampler(nn.Module):
     def __init__(
         self,
-        backbone: nn.Module,
+        feature_info: Tuple[dict, ...],
         out_channels: Tuple[int, ...],
     ) -> None:
         """Feature upsampler for transformer-like backbones.
@@ -75,28 +75,27 @@ def __init__(
             are two. Builds an image-pyramid like structure.
 
         Parameters:
-            backbone (nn.Module):
-                Backbone network that extracts features.
+            feature_info (Tuple[dict, ...]):
+                timm feature info of the backbone. Assumes that the feature info dicts
+                are in bottleneck first order I.e. the deepest encoder block first.
+                For example: [
+                    {'module': 'blocks.8', 'num_chs': 1024, 'reduction': 16},
+                    {'module': 'blocks.4', 'num_chs': 1024, 'reduction': 16}
+                }
             out_channels (Tuple[int, ...]):
                 Number of channels in the output tensor of each upsampling block.
                 Defaults to None.
         """
-        print(out_channels, backbone.feature_info)
         super().__init__()
-        if len(out_channels) != len(backbone.feature_info):
+        if len(out_channels) != len(feature_info):
             raise ValueError(
                 "`out_channels` must have the same len as the `backbone.feature_info.`"
-                f"Got {len(out_channels)} and {len(backbone.feature_info)} respectively."
+                f"Got {len(out_channels)} and {len(feature_info)} respectively."
             )
 
-        self.backbone = backbone
         self.out_channels = out_channels
         self.feature_info = []
 
-        # flip the feature info so that we start building the
-        # upsampling blocks from the bottleneck layer
-        feature_info = backbone.feature_info[::-1]
-
         # bottleneck layer
         self.bottleneck = nn.Conv2d(
             in_channels=feature_info[0]["num_chs"],
@@ -144,17 +143,17 @@ def __init__(
             )
             self.up_blocks[f"up{i + 1}"] = nn.Sequential(*up_blocks)
 
-        # flip the feature info back to the original order to match the top-down
-        # order of timm feature_info. (high to low res)
-        self.feature_info = self.feature_info[::-1]
-
-    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Tuple[torch.Tensor, ...]]:
-        # get the features from the backbone
-        final_feat, feats = self.backbone(x)
+    def forward(self, feats: Tuple[torch.Tensor]) -> Tuple[torch.Tensor, ...]:
+        """Forward pass of the encoder upsampler.
 
-        # flip the features so that we start from the bottleneck (low res)
-        feats = feats[::-1]
+        Parameters:
+            feats (Tuple[torch.Tensor]):
+                Tuple of features from the backbone in bottleneck first order. I.e. the
+                bottleneck (deepest) feature is the first element in the tuple.
 
+        Returns:
+            Tuple[torch.Tensor, ...]: Tuple of upsampled features in hi-to-lo res order.
+        """
         # bottleneck feature
         up_feat = self.bottleneck(feats[0])
         intermediate_features = [up_feat]
@@ -164,4 +163,4 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Tuple[torch.Tensor, ..
             up_feat = self.up_blocks[f"up{i + 1}"](feat)
             intermediate_features.append(up_feat)
 
-        return final_feat, tuple(intermediate_features[::-1])  # feats in top-down order
+        return tuple(intermediate_features)  # hi-to-lo res order
@@ -69,86 +69,3 @@ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
         offset = len(intermediates) - len(self.encoder.feature_info)
 
         return final_feat, [intermediates[i + offset] for i in self.out_indices]
-
-
-# class TimmEncoder(nn.Module):
-#     def __init__(
-#         self,
-#         name: str,
-#         pretrained: bool = True,
-#         checkpoint_path: str = None,
-#         in_channels: int = 3,
-#         depth: int = 4,
-#         out_indices: List[int] = None,
-#         **kwargs,
-#     ) -> None:
-#         """Import any encoder from timm package.
-
-#         Parameters
-#         ----------
-#             name : str
-#                 Name of the encoder.
-#             pretrained : bool, optional
-#                 If True, load pretrained weights, by default True.
-#             checkpoint_path : str, optional
-#                 Path to the checkpoint file, by default None. If not None, overrides
-#                 the `pretrained` argument.
-#             in_channels : int, optional
-#                 Number of input channels, by default 3.
-#             depth : int, optional
-#                 Number of output features, by default 4.
-#             out_indices : List[int], optional
-#                 Indices of the output features, by default None. If None,
-#                 out_indices is set to range(len(depth)). Overrides the `depth` argument.
-#             **kwargs : Dict[str, Any]
-#                 Key-word arguments for any `timm` based encoder. These arguments are
-#                 used in `timm.create_model(**kwargs)` function call.
-#         """
-#         super().__init__()
-
-#         # set out_indices
-#         self.out_indices = out_indices
-#         if out_indices is None:
-#             self.out_indices = tuple(range(depth))
-
-#         # set checkpoint_path
-#         if checkpoint_path is None:
-#             checkpoint_path = ""
-
-#         # create the timm model
-#         try:
-#             self.backbone = timm.create_model(
-#                 name,
-#                 pretrained=pretrained,
-#                 checkpoint_path=checkpoint_path,
-#                 in_chans=in_channels,
-#                 features_only=True,
-#                 out_indices=self.out_indices,
-#                 **kwargs,
-#             )
-#         except (AttributeError, RuntimeError) as err:
-#             print(err)
-#             raise RuntimeError(
-#                 f"timm backbone: {name} is not supported due to missing "
-#                 "features_only argument implementation in timm-package."
-#             )
-#         except IndexError as err:
-#             print(err)
-#             raise IndexError(
-#                 f"It's possible that the given depth: {depth} is too large for "
-#                 f"the given backbone: {name}. Try passing a smaller `depth` argument "
-#                 "or a different backbone."
-#             )
-
-#         # set in_channels and out_channels
-#         self.in_channels = in_channels
-#         self.out_channels = tuple(self.backbone.feature_info.channels()[::-1])
-#         if out_indices is not None:
-#             self.out_channels = tuple(self.out_channels[i] for i in self.out_indices)
-
-#         self.feature_info = self.backbone.feature_info.info[:depth][::-1]
-
-#     def forward(self, x: torch.Tensor, **kwargs) -> List[torch.Tensor]:
-#         """Forward pass of the encoder and return all the features."""
-#         features = self.backbone(x)
-#         return features[::-1]
@@ -156,20 +156,15 @@ def __init__(
         self.encoder = Encoder(
             timm_encoder_name=enc_name,
             timm_encoder_out_indices=enc_out_indices,
-            pixel_decoder_out_channels=out_channels,
             timm_encoder_pretrained=enc_pretrain,
             timm_extra_kwargs=encoder_kws,
         )
 
-        # get the reduction factors for the encoder
-        enc_reductions = tuple([inf["reduction"] for inf in self.encoder.feature_info])
-
         self.decoder = MultiTaskDecoder(
             decoders=decoders,
             heads=heads,
             out_channels=out_channels,
-            enc_channels=self.encoder.out_channels,
-            enc_reductions=enc_reductions,
+            enc_feature_info=self.encoder.feature_info,
             n_layers=n_layers,
             n_blocks=n_blocks,
             stage_kws=stage_kws,
@@ -208,7 +203,7 @@ def forward(
                 outputs (segmentations) dict.
         """
         enc_output, feats = self.encoder.forward(x)
-        dec_feats, out = self.decoder.forward(feats, x)
+        feats, dec_feats, out = self.decoder.forward(feats, x)
 
         if return_feats:
             return enc_output, feats, dec_feats, out