flx interop

qihqi · qihqi · commit 073c52c78d2c · 2025-06-01T20:34:08.000-07:00
diff --git a/torchax/test/test_flax.py b/torchax/test/test_flax.py
@@ -1,7 +1,9 @@
 import unittest
+import torch
 import torchax
 from flax import linen as nn
 from torchax.flax import FlaxNNModule
+from torchax.interop import jax_jit
 import jax.numpy as jnp
 import jax
 
@@ -12,6 +14,7 @@ def test_flax_simple(self):
 
     class CNN(nn.Module):
       """A simple CNN model."""
+
       @nn.compact
       def __call__(self, x):
         x = nn.Conv(features=32, kernel_size=(3, 3))(x)
@@ -34,13 +37,50 @@ def __call__(self, x):
     expected = flax_model.apply(state, inputs)
 
     env = torchax.default_env()
-    nn_module = FlaxNNModule(env, flax_model, (inputs, ), {} )
+    nn_module = FlaxNNModule(env, flax_model, (inputs,), {})
     res = nn_module.forward(inputs)
 
     self.assertTrue(jnp.allclose(res.jax(), expected))
 
-    
+  def test_flax_functional_call(self):
+
+    class CNN(nn.Module):
+      """A simple CNN model."""
+
+      @nn.compact
+      def __call__(self, x):
+        x = nn.Conv(features=32, kernel_size=(3, 3))(x)
+        x = nn.relu(x)
+        x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))
+        x = nn.Conv(features=64, kernel_size=(3, 3))(x)
+        x = nn.relu(x)
+        x = nn.avg_pool(x, window_shape=(2, 2), strides=(2, 2))
+        x = x.reshape((x.shape[0], -1))  # flatten
+        x = nn.Dense(features=256)(x)
+        x = nn.relu(x)
+        x = nn.Dense(features=10)(x)
+        return x
+
+    flax_model = CNN()
+
+    inputs = jnp.ones((1, 28, 28, 1))
+    env = torchax.default_env()
+    state = flax_model.init(env.prng_key, inputs)
+    expected = flax_model.apply(state, inputs)
+
+    env = torchax.default_env()
+    nn_module = FlaxNNModule(env, flax_model, (inputs,), {})
+
+    @jax_jit
+    def jitted(weights, args):
+      return torch.func.functional_call(nn_module, weights, args)
+
+    with env:
+      inputs_torch = torch.ones((1, 28, 28, 1), device='jax')
+      state_dict = nn_module.state_dict()
+      res = jitted(state_dict, inputs_torch)
+      self.assertTrue(jnp.allclose(res.jax(), expected))
+
+
 if __name__ == '__main__':
   unittest.main()
-    
-    
diff --git a/torchax/torchax/flax.py b/torchax/torchax/flax.py
@@ -7,13 +7,33 @@
 
 class FlaxNNModule(torch.nn.Module):
 
-  def __init__(self, env, flax_module, sample_args, sample_kwargs):
+  def __init__(self, env, flax_module, sample_args, sample_kwargs=None):
     super().__init__()
     prng = env.prng_key
-    self._params = tx.interop.call_jax(flax_module.init, prng, *sample_args, **sample_kwargs)
+    sample_kwargs = sample_kwargs or {}
+    parameter_dict = tx.interop.call_jax(flax_module.init, prng, *sample_args,
+                                         **sample_kwargs)
+
+    self._params = self._encode_nested_dict(parameter_dict)
+
     self._flax_module = flax_module
-    
-  def forward(self, *args, **kwargs):
-    return tx.interop.call_jax(self._flax_module.apply, self._params, *args, **kwargs)
 
-    
+  def _encode_nested_dict(self, nested_dict):
+    child_module = torch.nn.Module()
+    for k, v in nested_dict.items():
+      if isinstance(v, dict):
+        child_module.add_module(k, self._encode_nested_dict(v))
+      else:
+        child_module.register_parameter(k, torch.nn.Parameter(v))
+    return child_module
+
+  def _decode_nested_dict(self, child_module):
+    result = dict(child_module.named_parameters(recurse=False))
+    for k, v in child_module.named_children():
+      result[k] = self._decode_nested_dict(v)
+    return result
+
+  def forward(self, *args, **kwargs):
+    nested_dict_params = self._decode_nested_dict(self._params)
+    return tx.interop.call_jax(self._flax_module.apply, nested_dict_params,
+                               *args, **kwargs)