fix: torch.nn.functional.gumbel_softmax frontend

Sam-Armstrong · Sam-Armstrong · commit 7c585ff9492d · 2025-07-01T08:33:01.000+01:00
diff --git a/ivy/functional/frontends/torch/nn/functional/non_linear_activation_functions.py b/ivy/functional/frontends/torch/nn/functional/non_linear_activation_functions.py
@@ -67,22 +67,20 @@ def glu(input, dim=-1):
 
 @to_ivy_arrays_and_back
 @with_unsupported_dtypes({"2.2 and below": ("float16",)}, "torch")
-def gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1):
-    gumbels = -ivy.empty_like(logits).exponential().log()
-    gumbels = (logits + gumbels) / tau
-    y_soft = ivy.softmax(gumbels, axis=dim)
-
+def gumbel_softmax(logits, tau=1., hard=False, eps=1e-10, dim=-1):
+    if logits.ndim == 0:
+        return ivy.ones_like(logits)
+    gumbel_noise = -ivy.log(
+        -ivy.log(ivy.random_uniform(low=0, high=1, shape=logits.shape) + eps) + eps
+    )
+    y = (logits + gumbel_noise) / tau
+    y_soft = ivy.softmax(y, axis=dim)
     if hard:
-        indices = y_soft.max(axis=dim, keepdims=True)[1]
-        y_hard = ivy.zeros_like(logits)
-        updates = ivy.ones_like(indices)
-        y_hard = ivy.scatter_nd(indices, updates, reduction="replace", out=y_hard)
-
-        ret = y_hard - y_soft.stop_gradient(preserve_type=True) + y_soft
-    else:
-        ret = y_soft
-
-    return ret
+        index = ivy.argmax(y_soft, axis=dim)
+        y_hard = ivy.one_hot(index, logits.shape[dim], axis=dim).astype(y_soft.dtype)
+        ret = y_hard - ivy.stop_gradient(y_soft) + y_soft
+        return ret.astype(logits.dtype)
+    return y_soft.astype(logits.dtype)
 
 
 @to_ivy_arrays_and_back
diff --git a/ivy_tests/test_ivy/test_frontends/test_torch/test_nn/test_functional/test_non_linear_activation_functions.py b/ivy_tests/test_ivy/test_frontends/test_torch/test_nn/test_functional/test_non_linear_activation_functions.py
@@ -307,12 +307,11 @@ def test_torch_glu(
     dtype_and_x=helpers.dtype_and_values(
         available_dtypes=helpers.get_dtypes("float"),
     ),
-    tau=st.floats(min_value=0),
+    tau=st.floats(min_value=1e-6, max_value=10.0),
     hard=st.booleans(),
     eps=st.floats(min_value=0, max_value=1),
-    dim=st.integers(),
+    dim=st.integers(min_value=-1, max_value=0),
     test_with_out=st.just(False),
-    test_inplace=st.booleans(),
 )
 def test_torch_gumbel_softmax(
     *,