Support reshaping tiled uint32 tensors, re-enable models that had OOM due to conv (#1017)

jmalone-tt · web-flow · commit c3b7a6268531 · 2025-05-16T18:22:35.000Z
* Update target wrappers since reshaping tiled uint32 tensors is now
supported

* Increase l1_small_size in conftest so conv's no longer cause OOM with
program cache enabled

* Update stack target wrapper to convert to RM to get ViLT passing
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -67,7 +67,7 @@ def input_var_check_ttnn(request):
 @pytest.fixture(scope="session")
 def device(request):
     # TODO(tt-metal#13746): Currently L1 small size needs to be manually determined
-    l1_small_size = 16384
+    l1_small_size = 65536
     dispatch_core_config = get_dispatch_core_config()
 
     if request.config.getoption("--data_parallel"):
diff --git a/tests/models/glpn_kitti/test_glpn_kitti.py b/tests/models/glpn_kitti/test_glpn_kitti.py
@@ -27,7 +27,7 @@ def _load_inputs(self):
 
 @pytest.mark.parametrize(
     "mode",
-    [pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled"))],
+    ["eval"],
 )
 def test_glpn_kitti(record_property, mode):
     model_name = "GLPN-KITTI"
diff --git a/tests/models/hardnet/test_hardnet.py b/tests/models/hardnet/test_hardnet.py
@@ -40,8 +40,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_hardnet(record_property, mode):
diff --git a/tests/models/timm/test_timm_image_classification.py b/tests/models/timm/test_timm_image_classification.py
@@ -42,32 +42,28 @@ def _load_inputs(self):
     ["tf_efficientnet_lite1.in1k", "train"],
     ["tf_efficientnet_lite2.in1k", "train"],
     ["tf_efficientnet_lite3.in1k", "train"],
-    pytest.param(
-        ["tf_efficientnet_lite4.in1k", "train"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")
-    ),
+    ["tf_efficientnet_lite4.in1k", "train"],
     ["ghostnet_100.in1k", "train"],
     ["ghostnetv2_100.in1k", "train"],
-    pytest.param(["inception_v4.tf_in1k", "train"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    ["inception_v4.tf_in1k", "train"],
     ["mixer_b16_224.goog_in21k", "train"],
     ["mobilenetv1_100.ra4_e3600_r224_in1k", "train"],
     ["ese_vovnet19b_dw.ra_in1k", "train"],
-    pytest.param(["xception71.tf_in1k", "train"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    ["xception71.tf_in1k", "train"],
     ["dla34.in1k", "train"],
     ["hrnet_w18.ms_aug_in1k", "train"],
     pytest.param(["tf_efficientnet_lite0.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["tf_efficientnet_lite1.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["tf_efficientnet_lite2.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     ["tf_efficientnet_lite3.in1k", "eval"],
-    pytest.param(
-        ["tf_efficientnet_lite4.in1k", "eval"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")
-    ),
+    ["tf_efficientnet_lite4.in1k", "eval"],
     pytest.param(["ghostnet_100.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     ["ghostnetv2_100.in1k", "eval"],
-    pytest.param(["inception_v4.tf_in1k", "eval"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    ["inception_v4.tf_in1k", "eval"],
     ["mixer_b16_224.goog_in21k", "eval"],
     pytest.param(["mobilenetv1_100.ra4_e3600_r224_in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["ese_vovnet19b_dw.ra_in1k", "eval"], marks=pytest.mark.converted_end_to_end),
-    pytest.param(["xception71.tf_in1k", "eval"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    pytest.param(["xception71.tf_in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["dla34.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     ["hrnet_w18.ms_aug_in1k", "eval"],
 ]
diff --git a/tests/models/torchvision/test_torchvision_object_detection.py b/tests/models/torchvision/test_torchvision_object_detection.py
@@ -43,9 +43,7 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "model_info",
     [
-        pytest.param(
-            ("ssd300_vgg16", "SSD300_VGG16_Weights"), marks=pytest.mark.xfail(reason="OOM with program cache enabled")
-        ),
+        ("ssd300_vgg16", "SSD300_VGG16_Weights"),
         ("ssdlite320_mobilenet_v3_large", "SSDLite320_MobileNet_V3_Large_Weights"),
         pytest.param(
             ("retinanet_resnet50_fpn", "RetinaNet_ResNet50_FPN_Weights"),
diff --git a/tests/models/unet/test_unet.py b/tests/models/unet/test_unet.py
@@ -49,8 +49,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_unet(record_property, mode):
diff --git a/tests/models/unet_brain/test_unet_brain.py b/tests/models/unet_brain/test_unet_brain.py
@@ -51,8 +51,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_unet_brain(record_property, mode):
diff --git a/tests/models/unet_carvana/test_unet_carvana.py b/tests/models/unet_carvana/test_unet_carvana.py
@@ -30,8 +30,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_unet_carvana(record_property, mode):
diff --git a/tests/models/yolov3/test_yolov3.py b/tests/models/yolov3/test_yolov3.py
@@ -50,7 +50,7 @@ def _load_inputs(self):
 
 @pytest.mark.parametrize(
     "mode",
-    [pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled"))],
+    ["eval"],
 )
 def test_yolov3(record_property, mode):
     model_name = "YOLOv3"
diff --git a/torch_ttnn/passes/lowering/target_wrappers.py b/torch_ttnn/passes/lowering/target_wrappers.py
@@ -144,10 +144,11 @@ def stack(tensors, dim, output_shape):
     # Reshape each input tensor to add the new dimension
     unsqueezed_tensors = []
     for tensor in tensors:
-        # TODO: remove when reshape supports tiled uint32 inputs
+        # TODO: remove when concat supports tiled uint32
+        tensor = ttnn.reshape(tensor, unsqueezed_shape)
         if tensor.layout == ttnn.TILE_LAYOUT and tensor.dtype == ttnn.uint32:
             tensor = ttnn.to_layout(tensor, ttnn.ROW_MAJOR_LAYOUT)
-        unsqueezed_tensors.append(ttnn.reshape(tensor, unsqueezed_shape))
+        unsqueezed_tensors.append(tensor)
 
     # Concatenate all reshaped tensors along the stack dimension
     return ttnn.concat(unsqueezed_tensors, dim)

Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ def _load_inputs(self):`
`27`	`27`
`28`	`28`	`@pytest.mark.parametrize(`
`29`	`29`	`"mode",`
`30`		`- [pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled"))],`
	`30`	`+ ["eval"],`
`31`	`31`	`)`
`32`	`32`	`def test_glpn_kitti(record_property, mode):`
`33`	`33`	`model_name = "GLPN-KITTI"`
Original file line number	Diff line number	Diff line change
`@@ -40,8 +40,8 @@ def _load_inputs(self):`
`40`	`40`	`@pytest.mark.parametrize(`
`41`	`41`	`"mode",`
`42`	`42`	`[`
`43`		`- pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
`44`		`- pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
	`43`	`+ "train",`
	`44`	`+ pytest.param("eval", marks=pytest.mark.converted_end_to_end),`
`45`	`45`	`],`
`46`	`46`	`)`
`47`	`47`	`def test_hardnet(record_property, mode):`
Original file line number	Diff line number	Diff line change
`@@ -49,8 +49,8 @@ def _load_inputs(self):`
`49`	`49`	`@pytest.mark.parametrize(`
`50`	`50`	`"mode",`
`51`	`51`	`[`
`52`		`- pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
`53`		`- pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
	`52`	`+ "train",`
	`53`	`+ pytest.param("eval", marks=pytest.mark.converted_end_to_end),`
`54`	`54`	`],`
`55`	`55`	`)`
`56`	`56`	`def test_unet(record_property, mode):`
Original file line number	Diff line number	Diff line change
`@@ -51,8 +51,8 @@ def _load_inputs(self):`
`51`	`51`	`@pytest.mark.parametrize(`
`52`	`52`	`"mode",`
`53`	`53`	`[`
`54`		`- pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
`55`		`- pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
	`54`	`+ "train",`
	`55`	`+ pytest.param("eval", marks=pytest.mark.converted_end_to_end),`
`56`	`56`	`],`
`57`	`57`	`)`
`58`	`58`	`def test_unet_brain(record_property, mode):`
Original file line number	Diff line number	Diff line change
`@@ -30,8 +30,8 @@ def _load_inputs(self):`
`30`	`30`	`@pytest.mark.parametrize(`
`31`	`31`	`"mode",`
`32`	`32`	`[`
`33`		`- pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
`34`		`- pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),`
	`33`	`+ "train",`
	`34`	`+ pytest.param("eval", marks=pytest.mark.converted_end_to_end),`
`35`	`35`	`],`
`36`	`36`	`)`
`37`	`37`	`def test_unet_carvana(record_property, mode):`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ def _load_inputs(self):`
`50`	`50`
`51`	`51`	`@pytest.mark.parametrize(`
`52`	`52`	`"mode",`
`53`		`- [pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled"))],`
	`53`	`+ ["eval"],`
`54`	`54`	`)`
`55`	`55`	`def test_yolov3(record_property, mode):`
`56`	`56`	`model_name = "YOLOv3"`