Remove fallback for aten.squeeze.default, aten.full, aten.fill.Scalar, aten.memory_format (#896)

philei-tt · Pavlo Hilei · web-flow · commit afac9cfc81a9 · 2025-04-08T18:43:30.000Z
* Always use native ttnn squeeze

* Remove fallback for aten.full(_like).default

* Enable ttnn.fill for aten.fill.Scalar

* Enable ttnn.empty for aten.empty.memory_format

* Fix precommit check

* Remove changes from autogen test

* Update ttnn version

* Fix typing issue in Embeddings::validate

---------

Co-authored-by: Pavlo Hilei &lt;pavlo.hilei@gmail.com&gt;
diff --git a/tests/lowering/creation/test_full.py b/tests/lowering/creation/test_full.py
@@ -15,22 +15,24 @@ def forward(self, size, fill_value):
 
 
 @pytest.mark.parametrize(
-    "input_shapes",
+    "input_shape",
     [
-        [(64, 128)],
-        [(19, 19)],
-        [(59, 59)],
+        [64, 128],
+        [19, 19],
+        [59, 59],
+        [33],
+        [],  # scalar
     ],
 )
-def test_full(device, input_shapes):
+def test_full(device, input_shape):
     m = FullModule()
     fill_value = 1.23
-    result_before = m.forward(input_shapes[0], fill_value).to(torch.bfloat16)
+    result_before = m.forward(input_shape, fill_value).to(torch.bfloat16)
     option = torch_ttnn.TorchTtnnOption(device=device)
     option.gen_graphviz = True
     # The compilation is lazy, so we need to run forward once to trigger the compilation
     m = torch.compile(m, backend=torch_ttnn.backend, options=option)
-    result_after = m.forward(input_shapes[0], fill_value).to(torch.bfloat16)
+    result_after = m.forward(input_shape, fill_value).to(torch.bfloat16)
     option._out_fx_graphs[0].print_tabular()
 
     # Check the graph has be rewritten and contain ttnn ops
diff --git a/tests/lowering/creation/test_full_like.py b/tests/lowering/creation/test_full_like.py
@@ -22,6 +22,8 @@ def forward(self, tensor, fill_value):
         (1, 1),
         (2, 2),
         (17, 17),
+        (33,),
+        (),
     ],
 )
 def test_full_like(device, input_shape):
diff --git a/tests/lowering/tensor_manipulation/test_squeeze.py b/tests/lowering/tensor_manipulation/test_squeeze.py
@@ -22,6 +22,9 @@ def forward(self, input, dim):
         ((1, 256, 1), -1),
         ((33, 44, 1, 32, 16), 1),
         ((33, 44, 1, 32, 16), 2),
+        ((1, 12), 0),
+        ((1), 0),
+        ((), 0),
     ],
 )
 def test_squeeze_dim(device, input_shape, dim):
@@ -36,11 +39,7 @@ def test_squeeze_dim(device, input_shape, dim):
     option._out_fx_graphs[0].print_tabular()
     # Check the graph has be rewritten and contain ttnn ops
     nodes = list(option._out_fx_graphs[0].nodes)
-    if option.use_less_ttnn_op_types:
-        # squeeze is lowered to reshape
-        assert [node.target for node in nodes].count(ttnn.reshape) == 1
-    else:
-        assert [node.target for node in nodes].count(ttnn.squeeze) == 1
+    assert [node.target for node in nodes].count(ttnn.squeeze) == 1
     # Check inference result
     assert torch.allclose(result_before, result_after)
 
@@ -60,6 +59,10 @@ def forward(self, input):
         ((1, 1, 55, 23, 44, 32, 32)),
         ((22, 1, 55, 23, 44, 32, 1)),
         ((1, 1, 55, 1, 1, 1, 1)),
+        ((1, 12)),
+        ((1, 1)),
+        ((1)),
+        (()),
     ],
 )
 def test_squeeze_none_dim(device, input_shape):
@@ -74,6 +77,6 @@ def test_squeeze_none_dim(device, input_shape):
     option._out_fx_graphs[0].print_tabular()
     # Check the graph has be rewritten and contain ttnn ops (squeeze without provided dim is lowered to reshape)
     nodes = list(option._out_fx_graphs[0].nodes)
-    assert [node.target for node in nodes].count(ttnn.reshape) == 1
+    assert [node.target for node in nodes].count(ttnn.squeeze) == 1
     # Check inference result
     assert torch.allclose(result_before, result_after)
diff --git a/torch_ttnn/passes/lowering/add_data_move_pass.py b/torch_ttnn/passes/lowering/add_data_move_pass.py
@@ -216,6 +216,8 @@ def is_tt_compute(node) -> bool:
             ttnn.sum,
             ttnn.typecast,
             ttnn.argmax,
+            ttnn.fill,
+            ttnn.empty,
         ]
     )
 
diff --git a/torch_ttnn/passes/lowering/to_tt_pass.py b/torch_ttnn/passes/lowering/to_tt_pass.py
@@ -10,6 +10,7 @@
     GraphCleanup,
     TtnnBfloat16,
     TtnnInt32,
+    TtnnUint32,
     TtnnDevice,
     TtnnL1MemoryConfig,
     TtnnRowMajorLayout,
@@ -370,7 +371,7 @@ def __init__(self, target, args, kwargs):
 def torch_dtype_to_ttnn_dtype(dtype: torch.dtype):
     # Add newly supported dtypes here:
     dtype_map = {
-        torch.float32: TtnnBfloat16(),
+        torch.float32: TtnnBfloat16(),  # Should this be changed to TtnnFloat32?
         torch.bfloat16: TtnnBfloat16(),
     }
     if dtype in dtype_map:
@@ -597,21 +598,12 @@ def reshape_1d(code, args=args, kwargs=kwargs):
                 return None
 
             if node.target == torch.ops.aten.full.default:
-                # args[0] can be empty for aten.full which simply creates a scalar. Ignore conversion in this case.
-                if args[0]:
-                    new_kwargs = {
-                        "fill_value": args[1],
-                        "device": TtnnDevice(),
-                        "layout": TtnnTileLayout(),
-                    }
-                    return g.call_function(ttnn.full, args=(tuple(args[0]),), kwargs=new_kwargs)
-                # Replace op with scalar for eltwise ops
-                # TODO: Generalize this to support all eltwise ops
-                node_users = list(node.users.keys())
-                for node_user in node_users:
-                    if node_user.target == torch.ops.aten.div.Tensor:
-                        node_user.update_arg(1, args[1])
-                return None
+                new_kwargs = {
+                    "fill_value": args[1],
+                    "device": TtnnDevice(),
+                    "layout": TtnnTileLayout(),
+                }
+                return g.call_function(ttnn.full, args=(args[0],), kwargs=new_kwargs)
 
             if node.target == torch.ops.aten.baddbmm.default:
                 # out = beta * input + alpha * (batch1 @ batch2)
@@ -738,16 +730,7 @@ def reshape_1d(code, args=args, kwargs=kwargs):
                 return None
 
             if node.target == torch.ops.aten.squeeze.dim or node.target == torch.ops.aten.squeeze.default:
-                if get_shape(gm, args[0]) in [torch.Size([1]), torch.Size([])]:
-                    # see #442
-                    return None
-                if use_less_ttnn_op_types or node.target == torch.ops.aten.squeeze.default:
-                    # ttnn.squeeze does not support calling the OP without provided dim (torch.ops.aten.squeeze.default)
-                    # squeezing is the same as reshaping to shape of output tensor of squeeze
-                    output_size = list(node.meta["val"].size())
-                    return g.call_function(ttnn.reshape, args=(args[0], output_size))
-                else:
-                    return g.call_function(ttnn.squeeze, args=(args[0], args[1]))
+                return g.call_function(ttnn.squeeze, args=args, kwargs=kwargs)
 
             if node.target == torch.ops.aten.unsqueeze.default:
                 output_shape_num_element = node.meta["val"].numel()
@@ -906,6 +889,9 @@ def reshape_1d(code, args=args, kwargs=kwargs):
                 # Essentially remove this op
                 return node.args[0]
 
+            if node.target == torch.ops.aten.fill.Scalar:
+                return g.call_function(ttnn.fill, args=args)
+
             if node.target in [torch.ops.aten.masked_fill.Scalar, torch.ops.aten.masked_fill.Tensor]:
                 # aten.masked_fill is equivalent to the following:
                 # masked_fill = (tensor * (ones - mask)) + (mask * full)
@@ -1223,6 +1209,21 @@ def reshape_1d(code, args=args, kwargs=kwargs):
                 ttnn_all = g.call_function(target_wrappers.all, args=(args[0], input_shape.numel()))
                 return g.call_function(torch.ops.aten.squeeze.default, args=(ttnn_all,))
 
+            if node.target == torch.ops.aten.empty.memory_format:
+                # raise RuntimeError(f"{str(kwargs)}, {str(args)}, {str(type(args[0]))}")
+                dtype_mapping = {
+                    torch.float32: TtnnBfloat16(),
+                    torch.float16: TtnnBfloat16(),
+                    torch.int32: TtnnInt32(),
+                }
+                dtype = dtype_mapping.get(kwargs["dtype"], TtnnUint32())
+                new_kwargs = {
+                    "dtype": dtype,
+                    "layout": TtnnTileLayout(),
+                    "device": TtnnDevice(),
+                }
+                return g.call_function(ttnn.empty, args=(args[0],), kwargs=new_kwargs)
+
             # PEP 8 suggests this explicit statement
             return None
 
diff --git a/torch_ttnn/utils.py b/torch_ttnn/utils.py
@@ -108,14 +108,14 @@ def __repr__(self):
         return f"ttnn_TILE_LAYOUT"
 
 
-class TtnnUint32:
+class TtnnInt32:
     def __repr__(self):
-        return f"ttnn_uint32"
+        return f"ttnn_int32"
 
 
-class TtnnInt32:
+class TtnnUint32:
     def __repr__(self):
-        return f"ttnn_int32"
+        return f"ttnn_uint32"
 
 
 class TtnnBfloat16:

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,8 @@ def forward(self, tensor, fill_value):`
`22`	`22`	`(1, 1),`
`23`	`23`	`(2, 2),`
`24`	`24`	`(17, 17),`
	`25`	`+ (33,),`
	`26`	`+ (),`
`25`	`27`	`],`
`26`	`28`	`)`
`27`	`29`	`def test_full_like(device, input_shape):`
Original file line number	Diff line number	Diff line change
`@@ -216,6 +216,8 @@ def is_tt_compute(node) -> bool:`
`216`	`216`	`ttnn.sum,`
`217`	`217`	`ttnn.typecast,`
`218`	`218`	`ttnn.argmax,`
	`219`	`+ ttnn.fill,`
	`220`	`+ ttnn.empty,`
`219`	`221`	`]`
`220`	`222`	`)`
`221`	`223`