Make compatible with more recent Jax versions (#11)

lebrice · web-flow · commit 83f00b1e3ae6 · 2026-01-08T14:39:05.000-05:00
* Make compatible with more recent Jax versions

Signed-off-by: Fabrice Normandin &lt;normandf@mila.quebec&gt;

* Upgrade to cuda13 and fix torch/setuptools bug

Signed-off-by: Fabrice Normandin &lt;normandf@mila.quebec&gt;

* Fix mkdocs upgrade issue

Signed-off-by: Fabrice Normandin &lt;normandf@mila.quebec&gt;

---------

Signed-off-by: Fabrice Normandin &lt;normandf@mila.quebec&gt;
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -60,7 +60,7 @@ jobs:
       max-parallel: 4
       matrix:
         platform: ["ubuntu-latest", "macos-latest"]
-        python-version: ["3.12"]
+        python-version: ["3.11", "3.12", "3.13"]
     steps:
       - uses: actions/checkout@v4
       - name: Install the latest version of uv
diff --git a/mkdocs.yaml b/mkdocs.yaml
@@ -18,7 +18,7 @@ plugins:
   - mkdocstrings:
       handlers:
         python:
-          import:
+          inventories:
             - https://docs.python.org/3/objects.inv
             - https://docs.pytest.org/en/stable/objects.inv
             - https://flax.readthedocs.io/en/latest/objects.inv
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,12 +5,17 @@ readme = "README.md"
 authors = [
     { name = "Fabrice Normandin", email = "fabrice.normandin@gmail.com" },
 ]
-requires-python = ">=3.10"
-dependencies = ["jax>=0.4.28", "torch>=2.0.0"]
+requires-python = ">=3.11"
+dependencies = [
+    "jax>=0.6.0",
+    "torch",
+    # note: This is because of a weird bug where torch wants setuptools to build cpp extensions (seems related to torch.compile).
+    "setuptools; python_version == '3.11'",
+]
 dynamic = ["version"]
 
 [project.optional-dependencies]
-gpu = ["jax[cuda12]>=0.4.28; sys_platform == 'linux'"]
+gpu = ["jax[cuda13]; sys_platform == 'linux'"]
 
 
 [dependency-groups]
diff --git a/torch_jax_interop/to_jax.py b/torch_jax_interop/to_jax.py
@@ -11,13 +11,10 @@
 
 import jax
 import jax.core
-import jaxlib
-import jaxlib.xla_extension
 import torch
 import torch.func
 import torch.utils._pytree
 from jax.dlpack import from_dlpack as jax_from_dlpack  # type: ignore
-from torch.utils.dlpack import to_dlpack as torch_to_dlpack  # type: ignore
 
 from .types import (
     Dataclass,
@@ -34,33 +31,27 @@
 
 
 @overload
-def torch_to_jax(value: torch.Tensor, /) -> jax.Array:
-    ...
+def torch_to_jax(value: torch.Tensor, /) -> jax.Array: ...
 
 
 @overload
-def torch_to_jax(value: torch.device, /) -> jax.Device:
-    ...
+def torch_to_jax(value: torch.device, /) -> jax.Device: ...
 
 
 @overload
-def torch_to_jax(value: tuple[torch.Tensor, ...], /) -> tuple[jax.Array, ...]:
-    ...
+def torch_to_jax(value: tuple[torch.Tensor, ...], /) -> tuple[jax.Array, ...]: ...
 
 
 @overload
-def torch_to_jax(value: list[torch.Tensor], /) -> list[jax.Array]:
-    ...
+def torch_to_jax(value: list[torch.Tensor], /) -> list[jax.Array]: ...
 
 
 @overload
-def torch_to_jax(value: NestedDict[K, torch.Tensor], /) -> NestedDict[K, jax.Array]:
-    ...
+def torch_to_jax(value: NestedDict[K, torch.Tensor], /) -> NestedDict[K, jax.Array]: ...
 
 
 @overload
-def torch_to_jax(value: Any, /) -> Any:
-    ...
+def torch_to_jax(value: Any, /) -> Any: ...
 
 
 def torch_to_jax(value: Any, /) -> Any:
@@ -99,16 +90,14 @@ def _direct_conversion(v: torch.Tensor) -> jax.Array:
     return jax_from_dlpack(v, copy=False)
 
 
-def _to_from_dlpack(
-    v: torch.Tensor, ignore_deprecation_warning: bool = True
-) -> jax.Array:
+def _to_from_dlpack(v: torch.Tensor, ignore_deprecation_warning: bool = True) -> jax.Array:
     with warnings.catch_warnings() if ignore_deprecation_warning else contextlib.nullcontext():
         # Only way to get this to work for CPU seems to be with to/from dlpack... so we have to use this deprecated
         # conversion method for now.
         # todo: Should we let it though though?
         if ignore_deprecation_warning:
             warnings.filterwarnings("ignore", category=DeprecationWarning)
-        return jax_from_dlpack(torch_to_dlpack(v), copy=False)
+        return jax_from_dlpack(v, copy=False)
 
 
 def torch_to_jax_tensor(value: torch.Tensor) -> jax.Array:
@@ -130,7 +119,7 @@ def torch_to_jax_tensor(value: torch.Tensor) -> jax.Array:
             # return _direct_conversion(value)
             return _to_from_dlpack(value, ignore_deprecation_warning=True)
 
-        except jaxlib.xla_extension.XlaRuntimeError as err:
+        except RuntimeError as err:
             log_once(
                 logger,
                 message=(
@@ -145,7 +134,7 @@ def torch_to_jax_tensor(value: torch.Tensor) -> jax.Array:
 
     try:
         return _direct_conversion(value)
-    except jaxlib.xla_extension.XlaRuntimeError as err:
+    except RuntimeError as err:
         log_once(
             logger,
             message=(
diff --git a/torch_jax_interop/to_jax_module.py b/torch_jax_interop/to_jax_module.py
@@ -26,9 +26,7 @@
 
 def make_functional(
     module_with_state: Module[P, Out_cov], disable_autograd_tracking=False
-) -> tuple[
-    Callable[Concatenate[Iterable[torch.Tensor], P], Out_cov], tuple[torch.Tensor, ...]
-]:
+) -> tuple[Callable[Concatenate[Iterable[torch.Tensor], P], Out_cov], tuple[torch.Tensor, ...]]:
     """Backward compatibility equivalent for `functorch.make_functional` in the new torch.func API.
 
     Adapted from https://gist.github.com/zou3519/7769506acc899d83ef1464e28f22e6cf as suggested by
@@ -128,15 +126,21 @@ def j2t(v: JaxPyTree) -> TorchPyTree:
         if any(isinstance(v_i, jax.core.Tracer) for v_i in jax.tree.leaves(v)):
             # running inside JIT.
             return jax.pure_callback(
-                functools.partial(jax.tree.map, jax_to_torch), v, v, vectorized=True
+                functools.partial(jax.tree.map, jax_to_torch),
+                v,
+                v,
+                vmap_method="legacy_vectorized",
             )
         return jax.tree.map(jax_to_torch, v)
 
     def t2j(v: TorchPyTree) -> JaxPyTree:
         if any(isinstance(v_i, jax.core.Tracer) for v_i in jax.tree.leaves(v)):
             # running inside JIT.
             return jax.pure_callback(
-                functools.partial(jax.tree.map, torch_to_jax), v, v, vectorized=True
+                functools.partial(jax.tree.map, torch_to_jax),
+                v,
+                v,
+                vmap_method="legacy_vectorized",
             )
         return jax.tree.map(torch_to_jax, v)
 
@@ -153,8 +157,7 @@ def apply(params, *args, **kwargs):
         # Apply the model function to the input data.
         if example_output is None:
             if any(
-                isinstance(v, jax.core.Tracer)
-                for v in jax.tree.leaves((params, args, kwargs))
+                isinstance(v, jax.core.Tracer) for v in jax.tree.leaves((params, args, kwargs))
             ):
                 raise RuntimeError(
                     "You need to pass `example_output` in order to JIT the torch function!"
@@ -186,7 +189,7 @@ def pytorch_model_callback(params, *args, **kwargs):
             params,
             *args,
             **kwargs,
-            vectorized=True,
+            vmap_method="legacy_vectorized",
         )
         # Convert the output data from JAX to PyTorch representations
         out = t2j(out)
@@ -224,7 +227,7 @@ def _pytorch_model_backward_callback(params, grads, *args, **kwargs):
                 grads,
                 *args,
                 **kwargs,
-                vectorized=True,
+                vmap_method="legacy_vectorized",
             )
             in_grads = t2j(in_grads)
             return in_grads
diff --git a/torch_jax_interop/to_torch.py b/torch_jax_interop/to_torch.py
@@ -9,7 +9,6 @@
 
 import jax
 import torch
-from jax.dlpack import to_dlpack as jax_to_dlpack  # type: ignore (not exported there?)
 from torch.utils import dlpack as torch_dlpack
 
 from .types import Dataclass, DataclassType, K, NestedDict, NestedMapping
@@ -19,33 +18,27 @@
 
 
 @overload
-def jax_to_torch(value: jax.Array, /) -> torch.Tensor:
-    ...
+def jax_to_torch(value: jax.Array, /) -> torch.Tensor: ...
 
 
 @overload
-def jax_to_torch(value: jax.Device, /) -> torch.device:
-    ...
+def jax_to_torch(value: jax.Device, /) -> torch.device: ...
 
 
 @overload
-def jax_to_torch(value: tuple[jax.Array, ...], /) -> tuple[torch.Tensor, ...]:
-    ...
+def jax_to_torch(value: tuple[jax.Array, ...], /) -> tuple[torch.Tensor, ...]: ...
 
 
 @overload
-def jax_to_torch(value: list[jax.Array], /) -> list[torch.Tensor]:
-    ...
+def jax_to_torch(value: list[jax.Array], /) -> list[torch.Tensor]: ...
 
 
 @overload
-def jax_to_torch(value: NestedDict[K, jax.Array], /) -> NestedDict[K, torch.Tensor]:
-    ...
+def jax_to_torch(value: NestedDict[K, jax.Array], /) -> NestedDict[K, torch.Tensor]: ...
 
 
 @overload
-def jax_to_torch(value: Any, /) -> Any:
-    ...
+def jax_to_torch(value: Any, /) -> Any: ...
 
 
 def jax_to_torch(value: Any, /) -> Any:
@@ -88,7 +81,7 @@ def jax_to_torch_tensor(value: jax.Array, /) -> torch.Tensor:
     try:
         return torch_dlpack.from_dlpack(value)
     except Exception:
-        return torch_dlpack.from_dlpack(jax_to_dlpack(value))
+        return torch_dlpack.from_dlpack(value.__dlpack__())
 
 
 # Register it like this so the type hints are preserved on the functions (which are also called
diff --git a/uv.lock b/uv.lock