Merge pull request #5 from NVIDIA/bbonev/0.1.0

bonevbs · web-flow · commit 0218658e925a · 2024-01-22T14:53:30.000+01:00
Addressing logic in the big skip connection
diff --git a/makani/__init__.py b/makani/__init__.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.1.0a1"
+__version__ = "0.1.0"
 
 from .utils.trainer import Trainer
 from .utils.inferencer import Inferencer
diff --git a/makani/models/networks/sfnonet.py b/makani/models/networks/sfnonet.py
@@ -460,10 +460,10 @@ def __init__(
 
         # output transform
         if self.big_skip:
-            self.residual_transform = nn.Conv2d(self.out_chans, self.out_chans, 1, bias=False)
+            self.residual_transform = nn.Conv2d(self.inp_chans, self.out_chans, 1, bias=False)
             self.residual_transform.weight.is_shared_mp = ["spatial"]
             self.residual_transform.weight.sharded_dims_mp = [None, None, None, None]
-            scale = math.sqrt(0.5 / self.out_chans)
+            scale = math.sqrt(0.5 / self.inp_chans)
             nn.init.normal_(self.residual_transform.weight, mean=0.0, std=scale)
 
         # learned position embedding
@@ -591,15 +591,15 @@ def forward(self, x):
             if self.out_shape != self.inp_shape:
                 xtype = x.dtype
                 # only take the predicted channels as residual
-                residual = x[..., : self.out_chans, :, :].to(torch.float32)
+                residual = x.to(torch.float32)
                 with amp.autocast(enabled=False):
                     residual = self.trans_down(residual)
                     residual = residual.contiguous()
                     residual = self.itrans_up(residual)
                     residual = residual.to(dtype=xtype)
             else:
                 # only take the predicted channels
-                residual = x[..., : self.out_chans, :, :].contiguous()
+                residual = x
 
         if comm.get_size("fin") > 1:
             x = scatter_to_parallel_region(x, 1, "fin")