merged unets, consistent q_dim name in models

homerjed · homerjed · commit 586d1909da94 · 2024-09-08T15:54:41.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,4 @@ __pycache__/
 imgs/
 exps/
 _fisher.py
-_set_transformer.py
-sgm.egg-info/
-main_o.py
+_set_transformer.py
diff --git a/README.md b/README.md
@@ -118,6 +118,7 @@ model = sbgm.train.train(
 * UNet and transformer score network implementations,
 * VP, SubVP and VE SDEs (neural network $\beta(t)$ and $\sigma(t)$ functions are on the list!),
 * Multi-modal conditioning (basically just optional parameter and image conditioning methods),
+* Checkpointing optimiser and model,
 * Multi-device training and sampling.
 
 ### Samples
@@ -169,8 +170,4 @@ ODE sampling
       primaryClass={stat.ML},
       url={https://arxiv.org/abs/2101.09258}, 
 }
-```
-
-<!-- <p align="center">
-  <img src="figs/flowers_eu.png" width="350" title="hover text">
-</p> -->
+```
diff --git a/configs/grfs.py b/configs/grfs.py
@@ -12,7 +12,7 @@ def grfs_config():
 
     # Model
     config.model = model = ml_collections.ConfigDict()
-    model.model_type       = "UNetXY"
+    model.model_type       = "UNet"
     model.is_biggan        = False
     model.dim_mults        = [1, 1, 1]
     model.hidden_size      = 128
diff --git a/configs/quijote.py b/configs/quijote.py
@@ -8,7 +8,7 @@ def quijote_config():
 
     # Data
     config.dataset_name    = "quijote" 
-    config.n_pix           = 32
+    config.n_pix           = 64
 
     # Model
     config.model = model = ml_collections.ConfigDict()
diff --git a/data/quijote.py b/data/quijote.py
@@ -56,7 +56,7 @@ def quijote(key, n_pix, split=0.5):
     key_train, key_valid = jr.split(key)
 
     data_shape = (1, n_pix, n_pix)
-    context_shape = (1, n_pix, n_pix)
+    context_shape = None #(1, n_pix, n_pix)
     parameter_dim = 5
 
     X, A = get_quijote_data(n_pix) 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "sbgm"
-version = "0.0.11"
+version = "0.0.12"
 description = "Score-based Diffusion models in JAX."
 readme = "README.md"
 requires-python ="~=3.12"
diff --git a/sbgm/models/__init__.py b/sbgm/models/__init__.py
@@ -1,12 +1,12 @@
 from typing import Sequence
 import equinox as eqx
 from jaxtyping import Key
+import numpy as np
 import ml_collections
 
 from ._mixer import Mixer2d
 from ._mlp import ResidualNetwork 
 from ._unet import UNet
-from ._unet_xy import UNetXY
 
 
 def get_model(
@@ -17,6 +17,12 @@ def get_model(
     parameter_dim: int,
     config: ml_collections.ConfigDict
 ) -> eqx.Module:
+    # Grab channel assuming 'q' is a map like x
+    if context_shape is not None:
+        context_channels, *_ = context_shape.shape 
+    else:
+        context_channels = None
+
     if model_type == "Mixer":
         model = Mixer2d(
             data_shape,
@@ -26,7 +32,7 @@ def get_model(
             mix_hidden_size=config.model.mix_hidden_size,
             num_blocks=config.model.num_blocks,
             t1=config.t1,
-            q_dim=context_shape,
+            q_dim=context_channels,
             a_dim=parameter_dim,
             key=model_key
         )
@@ -42,22 +48,7 @@ def get_model(
             num_res_blocks=config.model.num_res_blocks,
             attn_resolutions=config.model.attn_resolutions,
             final_activation=config.model.final_activation,
-            a_dim=parameter_dim,
-            key=model_key
-        )
-    if model_type == "UNetXY":
-        model = UNetXY(
-            data_shape=data_shape,
-            is_biggan=config.model.is_biggan,
-            dim_mults=config.model.dim_mults,
-            hidden_size=config.model.hidden_size,
-            heads=config.model.heads,
-            dim_head=config.model.dim_head,
-            dropout_rate=config.model.dropout_rate,
-            num_res_blocks=config.model.num_res_blocks,
-            attn_resolutions=config.model.attn_resolutions,
-            final_activation=config.model.final_activation,
-            q_dim=context_shape[0], # Just grab channel assuming 'q' is a map like x
+            q_dim=context_channels, 
             a_dim=parameter_dim,
             key=model_key
         )
@@ -68,9 +59,11 @@ def get_model(
             depth=config.model.depth,
             activation=config.model.activation,
             dropout_p=config.model.dropout_p,
-            y_dim=parameter_dim,
+            q_dim=parameter_dim,
             key=model_key
         )
+    if model_type == "CCT":
+        raise NotImplementedError
     if model_type == "DiT":
         raise NotImplementedError
     return model
diff --git a/sbgm/models/_mlp.py b/sbgm/models/_mlp.py
@@ -40,7 +40,7 @@ def __init__(
         in_size: int, 
         width_size: int, 
         depth: int, 
-        y_dim: int, 
+        q_dim: int, 
         activation: Callable,
         dropout_p: float = 0.,
         *, 
@@ -49,11 +49,11 @@ def __init__(
         """ Time-embedding may be necessary """
         in_key, *net_keys, out_key = jr.split(key, 2 + depth)
         self._in = Linear(
-            in_size + y_dim + 1, width_size, key=in_key
+            in_size + q_dim + 1, width_size, key=in_key
         )
         layers = [
             Linear(
-                width_size + y_dim + 1, width_size, key=_key
+                width_size + q_dim + 1, width_size, key=_key
             )
             for _key in net_keys 
         ]
diff --git a/sbgm/models/_unet.py b/sbgm/models/_unet.py
diff --git a/sbgm/models/_unet_xy.py b/sbgm/models/_unet_xy.py