Merge branch 'main' of https://github.com/jax-ml/jax

nitins17 · nitins17 · commit 9267b5d99e77 · 2024-11-27T22:03:33.000Z
diff --git a/jax/_src/mesh_utils.py b/jax/_src/mesh_utils.py
@@ -705,6 +705,12 @@ def _transpose_trick(
       *_TRANSPOSE_TRICKS[topology][mesh_shape_no_trivial_dims]
   )
 
+def _validate_axis_shapes(axis_shapes: Sequence[int], arg_name: str,
+                          fun_name: str):
+  if not all(isinstance(s, int) for s in axis_shapes):
+    raise ValueError(
+        f'{arg_name} passed to {fun_name} should be a sequence of ints. Got'
+        f' {axis_shapes}')
 
 def create_device_mesh(
     mesh_shape: Sequence[int],
@@ -740,7 +746,8 @@ def create_device_mesh(
   """
   if devices is None:
     devices = xb.devices()
-  if np.prod(mesh_shape) != len(devices):
+  _validate_axis_shapes(mesh_shape, 'mesh_shape', 'create_device_mesh')
+  if math.prod(mesh_shape) != len(devices):
     raise ValueError(
         f'Number of devices {len(devices)} must equal the product '
         f'of mesh_shape {mesh_shape}'
diff --git a/jax/_src/pallas/triton/lowering.py b/jax/_src/pallas/triton/lowering.py
@@ -1469,10 +1469,22 @@ def _float_int_cast(
   dst_element_type = ir.IntegerType(_element_type(dst_type))
   if dst_element_type.width == 1:
     return _not_equal(src, _full(src.type, 0), signed=signed)
-  elif signed:
-    return arith_dialect.fptosi(dst_type, src)
   else:
-    return arith_dialect.fptoui(dst_type, src)
+    # We clamp the float value to the min/max integer destination value
+    # in order to match JAX/XLA casting behavior. Note that this differs
+    # from numpy casting behavior.
+    if signed:
+      maxint = 2**(dst_element_type.width-1) - 1
+      minint = -2**(dst_element_type.width-1)
+    else:
+      maxint = 2**dst_element_type.width - 1
+      minint = 0
+    src = arith_dialect.minimumf(src, _full(src.type, maxint))
+    src = arith_dialect.maximumf(src, _full(src.type, minint))
+    if signed:
+      return arith_dialect.fptosi(dst_type, src)
+    else:
+      return arith_dialect.fptoui(dst_type, src)
 
 
 def _int_float_cast(
@@ -1499,10 +1511,12 @@ def _cast(
       src,
       _dtype_to_ir_type(dst_type),
       signed=jnp.issubdtype(src_type, jnp.signedinteger),
+      dst_signed=jnp.issubdtype(dst_type, jnp.signedinteger),
   )
 
 
-def _ir_cast(src: ir.Value, dst_type: ir.Type, *, signed: bool) -> ir.Value:
+def _ir_cast(src: ir.Value, dst_type: ir.Type, *,
+             signed: bool, dst_signed: bool = False) -> ir.Value:
   if ir.RankedTensorType.isinstance(
       src.type
   ) and not ir.RankedTensorType.isinstance(dst_type):
@@ -1527,7 +1541,8 @@ def _ir_cast(src: ir.Value, dst_type: ir.Type, *, signed: bool) -> ir.Value:
       dst_element_type, ir.F32Type
   ):
     return _ir_cast(
-        _ir_cast(src, ir.F32Type.get(), signed=False), dst_type, signed=False
+        _ir_cast(src, ir.F32Type.get(), signed=False),
+        dst_type, signed=False, dst_signed=dst_signed
     )
 
   if isinstance(src_element_type, ir.FloatType) and isinstance(
@@ -1543,7 +1558,7 @@ def _ir_cast(src: ir.Value, dst_type: ir.Type, *, signed: bool) -> ir.Value:
   if isinstance(src_element_type, ir.FloatType) and isinstance(
       dst_element_type, ir.IntegerType
   ):
-    return _float_int_cast(src, dst_type, signed=signed)
+    return _float_int_cast(src, dst_type, signed=dst_signed)
   if isinstance(src_element_type, ir.IntegerType) and isinstance(
       dst_element_type, ir.FloatType
   ):
diff --git a/jax/_src/pjit.py b/jax/_src/pjit.py
@@ -1662,10 +1662,8 @@ def _pjit_call_impl_python(
       pgle_compile_options['fdo_profile'] = fdo_profile
 
   compiler_options_kvs = compiler_options_kvs + tuple(pgle_compile_options.items())
-  # TODO(patrios): Do not pass mutable profile session through cached lowering
-  # chain. Instead we need to move profilers dictionary to pxla module and use
-  # module as key. Right now we can't do that since there is no way to evict
-  # _pjit_lower_cached cache for in PGLE mode.
+  # Passing mutable PGLE profile here since it should be extracted by JAXPR to
+  # initialize the fdo_profile compile option.
   compiled = _resolve_and_lower(
       args, jaxpr=jaxpr, in_shardings=in_shardings,
       out_shardings=out_shardings, in_layouts=in_layouts,
diff --git a/jax/_src/sharding_impls.py b/jax/_src/sharding_impls.py
@@ -1714,6 +1714,7 @@ def make_mesh(axis_shapes: Sequence[int], axis_names: Sequence[str],
   """
   if devices is None:
     devices = xla_bridge.devices()
+  mesh_utils._validate_axis_shapes(axis_shapes, 'axis_shapes', 'make_mesh')
   axis_size = math.prod(axis_shapes)
   if axis_size > len(devices):
     raise ValueError(
diff --git a/tests/mesh_utils_test.py b/tests/mesh_utils_test.py
@@ -353,6 +353,12 @@ def test_create_device_mesh_for_nd_torus(
         )
     self.assertArraysEqual(assignment, expected_assignment_matrix)
 
+  def test_create_device_mesh_non_int_error(self):
+    with self.assertRaisesRegex(
+        ValueError,
+        "mesh_shape passed to create_device_mesh should be a sequence of ints"):
+      mesh_utils.create_device_mesh(((4,), 4))
+
   @parameterized.named_parameters(
       ('2x2x1', mock_2x2x1_devices,),
       ('2x2x4', mock_2x2x4_devices, ),
diff --git a/tests/pallas/ops_test.py b/tests/pallas/ops_test.py
@@ -556,10 +556,6 @@ def test_cast(self, from_dtype, to_dtype, data):
       self.skipTest("Not supported: bad canonicalization")
     if from_dtype == "bool" and to_dtype in {"int16", "int8"}:
       self.skipTest("Not supported: cannot extend to sub-32 bit types")
-    if jtu.test_device_matches(["gpu"]):
-      if (from_dtype in {"bfloat16", "float32"} and
-          to_dtype in {"int8", "int16", "int32"}):
-        self.skipTest("TODO: wrong result on GPU")
 
     if from_dtype == "bfloat16":
       from_dtype = jnp.bfloat16
diff --git a/tests/pjit_test.py b/tests/pjit_test.py
@@ -4458,6 +4458,12 @@ def g(x):
     self.assertEqual(out2.sharding, s)
     self.assertEqual(out2.dtype, np.float32)
 
+  def test_make_mesh_non_int_error(self):
+    with self.assertRaisesRegex(
+        ValueError,
+        "axis_shapes passed to make_mesh should be a sequence of ints"):
+      jax.make_mesh(((4,), 4), ('x', 'y'))
+
   def test_jnp_array_reshard_error(self):
     if jax.device_count() < 2:
       self.skipTest('Requires >=2 devices')
diff --git a/third_party/xla/workspace.bzl b/third_party/xla/workspace.bzl
@@ -21,8 +21,8 @@ load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls")
 #    curl -L https://github.com/openxla/xla/archive/<git hash>.tar.gz | sha256sum
 #    and update XLA_SHA256 with the result.
 
-XLA_COMMIT = "c7fdcbc588fa9ea021cf8766530604e8d0fef332"
-XLA_SHA256 = "c0e82c28e5e74065c8446199af657af71ae2f786ba33ddb23d6e1bbcd4463d50"
+XLA_COMMIT = "e2fe67323ea46076a61230952a3551df04ec559d"
+XLA_SHA256 = "0cdc3108f44f8ab37c90e165bae3bc72e16d049ad18c46d2aa8004f93df2d9f9"
 
 def repo():
     tf_http_archive(