tenstorrent
diff --git a/‎models/demos/deepseek_v3/tests/unit/test_topk.py‎
Lines changed: 1 addition & 1 deletion b/‎models/demos/deepseek_v3/tests/unit/test_topk.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/ttnn/unit_tests/operations/reduce/test_topk.py‎
Lines changed: 48 additions & 41 deletions b/‎tests/ttnn/unit_tests/operations/reduce/test_topk.py‎
Lines changed: 48 additions & 41 deletions
@@ -13,7 +13,7 @@
 TOPK_MEMORY_CONFIG = ttnn.L1_MEMORY_CONFIG
 
 # Sub-core grids for mesh device tests
-SUB_CORE_GRIDS = ttnn.CoreRangeSet([ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(8, 9))])
+SUB_CORE_GRIDS = ttnn.CoreRangeSet([ttnn.CoreRange(ttnn.CoreCoord(0, 0), ttnn.CoreCoord(6, 7))])
 
 
 # k=32 matches the DeepSeek v3 MoE gating configuration, where the gate selects 32 experts per token.
 
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC
+# SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
 
 # SPDX-License-Identifier: Apache-2.0
 
@@ -8,49 +8,61 @@
 
 import torch
 import ttnn
-from tests.ttnn.utils_for_testing import assert_with_pcc
+from tests.ttnn.utils_for_testing import assert_allclose, assert_equal
+
+UINT16_MAX = 65535
 
 
 def run_topk_test(N, C, H, W, k, dtype, dim, sorted, largest, device, sub_core_grids=None, pass_indices_tensor=False):
     torch.manual_seed(2005)
+
+    # Input tensor
     shape = [N, C, H, W]
+    ttnn_indices_dtype = ttnn.uint16 if W <= UINT16_MAX else ttnn.uint32
+    torch_indices_dtype = torch.uint16 if W <= UINT16_MAX else torch.uint32
     torch_dtype = torch.bfloat16
     input = torch.randn(shape, dtype=torch_dtype) * 0.9
-    pyt_topk_values, pyt_topk_indices = torch.topk(input, k, dim=dim, largest=largest, sorted=True)
     ttnn_input = ttnn.from_torch(input, dtype, layout=ttnn.Layout.TILE, device=device)
 
+    pyt_topk_values, pyt_topk_indices = torch.topk(input, k, dim=dim, largest=largest, sorted=True)
+
     if pass_indices_tensor:
-        indices_tensor_torch = torch.zeros(shape, dtype=torch.int32)
+        indices_tensor_torch = torch.zeros(shape, dtype=torch_indices_dtype)
         for i in range(W):
             indices_tensor_torch[:, :, :, i] = i
-        indices_tensor = ttnn.from_torch(indices_tensor_torch, ttnn.uint16, layout=ttnn.Layout.TILE, device=device)
+        indices_tensor = ttnn.from_torch(
+            indices_tensor_torch, ttnn_indices_dtype, layout=ttnn.Layout.TILE, device=device
+        )
     else:
         indices_tensor = None
 
-    try:
-        ttnn_topk_values, ttnn_topk_indices = ttnn.topk(
-            ttnn_input,
-            k,
-            dim=dim,
-            largest=largest,
-            sorted=sorted,
-            sub_core_grids=sub_core_grids,
-            indices_tensor=indices_tensor,
-        )
-    except Exception as e:
-        raise e
+    ttnn_topk_values, ttnn_topk_indices = ttnn.topk(
+        ttnn_input,
+        k,
+        dim=dim,
+        largest=largest,
+        sorted=sorted,
+        sub_core_grids=sub_core_grids,
+        indices_tensor=indices_tensor,
+    )
+
+    # Convert TTNN outputs to Torch for comparison
+    ttnn_torch_values = ttnn.to_torch(ttnn_topk_values)
+    ttnn_torch_indices = ttnn.to_torch(ttnn_topk_indices, dtype=torch_indices_dtype)
 
+    # Assert output shapes
     desired_shape = [N, C, H, W]
     desired_shape[dim] = k
     assert list(ttnn_topk_values.shape) == desired_shape
     assert list(ttnn_topk_indices.shape) == desired_shape
 
-    ttnn_torch_values = ttnn.to_torch(ttnn_topk_values)
-    ttnn_torch_indices = ttnn.to_torch(ttnn_topk_indices).to(torch.int64)
+    # Assert values correctness
     if dtype == ttnn.bfloat8_b:
-        pcc_values = 0.99
+        assert_allclose(ttnn_torch_values, pyt_topk_values, rtol=1e-1, atol=1e-1)
     else:
-        pcc_values = 1.0
+        assert_equal(ttnn_torch_values, pyt_topk_values)
+
+    # Assert indices correctness using gather
     # pcc is not a good measure for the raw indices
     # if index 49 and index 8 are tied, the order of the indices can be different
     # but the values associated with the indices should be the same
@@ -64,8 +76,6 @@ def run_topk_test(N, C, H, W, k, dtype, dim, sorted, largest, device, sub_core_g
 
     assert ttnn_torch_cosine > 0.99, "Cosine similarity between topk values and gather from indices is less than 0.99"
 
-    assert_with_pcc(pyt_topk_values, ttnn_torch_values, pcc_values)
-
 
 @pytest.mark.parametrize(
     "dtype",
@@ -83,16 +93,19 @@ def run_topk_test(N, C, H, W, k, dtype, dim, sorted, largest, device, sub_core_g
 @pytest.mark.parametrize(
     "N, C, H, W, dim, k",
     (
-        (1, 1, 32, 8192, 3, 50),  # passed
-        (1, 1, 64, 64, 2, 32),  # passed
-        (1, 1, 64, 64, 2, 64),  # passed
-        (1, 2048, 1, 64, 1, 32),  # skipped
-        (1, 1, 32, 64, 3, 2),  # passed
-        (1, 1, 32, 64, 3, 4),  # passed
-        (1, 1, 32, 8192, 3, 6),  # passed
-        (1, 2048, 1, 64, 1, 8),  # passed
-        (1, 1, 32, 32768, 3, 3000),  # passed
-        (1, 1, 32, 18992, 3, 3000),  # passed
+        (1, 1, 32, 8192, 3, 50),
+        (1, 1, 64, 64, 2, 32),
+        (1, 1, 64, 64, 2, 64),
+        (1, 2048, 1, 64, 1, 32),
+        (1, 1, 32, 64, 3, 2),
+        (1, 1, 32, 64, 3, 4),
+        (1, 1, 32, 8192, 3, 6),
+        (1, 2048, 1, 64, 1, 8),
+        (1, 1, 32, 32768, 3, 3000),
+        (1, 1, 32, 18992, 3, 3000),
+        (1, 1, 32, 18992, 3, 32),
+        (1, 1, 32, 10000, 3, 32),
+        (1, 1, 32, 64128, 3, 32),
     ),
 )
 @pytest.mark.parametrize(
@@ -116,12 +129,6 @@ def run_topk_test(N, C, H, W, k, dtype, dim, sorted, largest, device, sub_core_g
     ],
 )
 def test_topk(N, C, H, W, dim, k, dtype, sorted, largest, device, sub_core_grids):
-    if dim == 0 or dim == 1:
-        # As of now, when we try to get top-k for dim = 0 or 1, we get following error from transpose_op.cpp's validate():
-        # input_tensor.get_dtype() == DataType::BFLOAT16 || input_tensor.get_dtype() == DataType::FLOAT32
-        # this is because, transpose.cpp always typecasts bf8 to bf16
-        # and when dim = 0 or 1, transpose converts it into TransposeOpDim::HC & this dim doesnt support bf16 or fp32
-        pytest.skip()
     run_topk_test(N, C, H, W, k, dtype, dim, sorted, largest, device, sub_core_grids)
 
 
@@ -186,8 +193,8 @@ def test_topk_sub_core_grids(N, C, H, W, dim, k, dtype, sorted, largest, device,
 @pytest.mark.parametrize(
     "N, C, H, W, dim, k",
     (
-        (1, 1, 32, 151936, 3, 50),  # passed  - customer shape 2
-        (1, 1, 32, 128256, 3, 50),  # passed  - customer shape 1
+        (1, 1, 32, 151936, 3, 50),
+        (1, 1, 32, 128256, 3, 50),
     ),
 )
 @pytest.mark.parametrize(