tenstorrent · jmalone-tt · May 16, 2025 · May 14, 2025 · May 15, 2025 · May 16, 2025
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -67,7 +67,7 @@ def input_var_check_ttnn(request):
 @pytest.fixture(scope="session")
 def device(request):
     # TODO(tt-metal#13746): Currently L1 small size needs to be manually determined
-    l1_small_size = 16384
+    l1_small_size = 65536
     dispatch_core_config = get_dispatch_core_config()
 
     if request.config.getoption("--data_parallel"):

diff --git a/tests/models/glpn_kitti/test_glpn_kitti.py b/tests/models/glpn_kitti/test_glpn_kitti.py
@@ -27,7 +27,7 @@ def _load_inputs(self):
 
 @pytest.mark.parametrize(
     "mode",
-    [pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled"))],
+    ["eval"],
 )
 def test_glpn_kitti(record_property, mode):
     model_name = "GLPN-KITTI"

diff --git a/tests/models/hardnet/test_hardnet.py b/tests/models/hardnet/test_hardnet.py
@@ -40,8 +40,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_hardnet(record_property, mode):

diff --git a/tests/models/timm/test_timm_image_classification.py b/tests/models/timm/test_timm_image_classification.py
@@ -42,32 +42,28 @@ def _load_inputs(self):
     ["tf_efficientnet_lite1.in1k", "train"],
     ["tf_efficientnet_lite2.in1k", "train"],
     ["tf_efficientnet_lite3.in1k", "train"],
-    pytest.param(
-        ["tf_efficientnet_lite4.in1k", "train"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")
-    ),
+    ["tf_efficientnet_lite4.in1k", "train"],
     ["ghostnet_100.in1k", "train"],
     ["ghostnetv2_100.in1k", "train"],
-    pytest.param(["inception_v4.tf_in1k", "train"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    ["inception_v4.tf_in1k", "train"],
     ["mixer_b16_224.goog_in21k", "train"],
     ["mobilenetv1_100.ra4_e3600_r224_in1k", "train"],
     ["ese_vovnet19b_dw.ra_in1k", "train"],
-    pytest.param(["xception71.tf_in1k", "train"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    ["xception71.tf_in1k", "train"],
     ["dla34.in1k", "train"],
     ["hrnet_w18.ms_aug_in1k", "train"],
     pytest.param(["tf_efficientnet_lite0.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["tf_efficientnet_lite1.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["tf_efficientnet_lite2.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     ["tf_efficientnet_lite3.in1k", "eval"],
-    pytest.param(
-        ["tf_efficientnet_lite4.in1k", "eval"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")
-    ),
+    ["tf_efficientnet_lite4.in1k", "eval"],
     pytest.param(["ghostnet_100.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     ["ghostnetv2_100.in1k", "eval"],
-    pytest.param(["inception_v4.tf_in1k", "eval"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    ["inception_v4.tf_in1k", "eval"],
     ["mixer_b16_224.goog_in21k", "eval"],
     pytest.param(["mobilenetv1_100.ra4_e3600_r224_in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["ese_vovnet19b_dw.ra_in1k", "eval"], marks=pytest.mark.converted_end_to_end),
-    pytest.param(["xception71.tf_in1k", "eval"], marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+    pytest.param(["xception71.tf_in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     pytest.param(["dla34.in1k", "eval"], marks=pytest.mark.converted_end_to_end),
     ["hrnet_w18.ms_aug_in1k", "eval"],
 ]

diff --git a/tests/models/torchvision/test_torchvision_object_detection.py b/tests/models/torchvision/test_torchvision_object_detection.py
@@ -43,9 +43,7 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "model_info",
     [
-        pytest.param(
-            ("ssd300_vgg16", "SSD300_VGG16_Weights"), marks=pytest.mark.xfail(reason="OOM with program cache enabled")
-        ),
+        ("ssd300_vgg16", "SSD300_VGG16_Weights"),
         ("ssdlite320_mobilenet_v3_large", "SSDLite320_MobileNet_V3_Large_Weights"),
         pytest.param(
             ("retinanet_resnet50_fpn", "RetinaNet_ResNet50_FPN_Weights"),

diff --git a/tests/models/unet/test_unet.py b/tests/models/unet/test_unet.py
@@ -49,8 +49,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_unet(record_property, mode):

diff --git a/tests/models/unet_brain/test_unet_brain.py b/tests/models/unet_brain/test_unet_brain.py
@@ -51,8 +51,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_unet_brain(record_property, mode):

diff --git a/tests/models/unet_carvana/test_unet_carvana.py b/tests/models/unet_carvana/test_unet_carvana.py
@@ -30,8 +30,8 @@ def _load_inputs(self):
 @pytest.mark.parametrize(
     "mode",
     [
-        pytest.param("train", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
-        pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled")),
+        "train",
+        pytest.param("eval", marks=pytest.mark.converted_end_to_end),
     ],
 )
 def test_unet_carvana(record_property, mode):

diff --git a/tests/models/yolov3/test_yolov3.py b/tests/models/yolov3/test_yolov3.py
@@ -50,7 +50,7 @@ def _load_inputs(self):
 
 @pytest.mark.parametrize(
     "mode",
-    [pytest.param("eval", marks=pytest.mark.xfail(reason="OOM with program cache enabled"))],
+    ["eval"],
 )
 def test_yolov3(record_property, mode):
     model_name = "YOLOv3"

diff --git a/torch_ttnn/passes/lowering/target_wrappers.py b/torch_ttnn/passes/lowering/target_wrappers.py
@@ -144,10 +144,11 @@ def stack(tensors, dim, output_shape):
     # Reshape each input tensor to add the new dimension
     unsqueezed_tensors = []
     for tensor in tensors:
-        # TODO: remove when reshape supports tiled uint32 inputs
+        # TODO: remove when concat supports tiled uint32
+        tensor = ttnn.reshape(tensor, unsqueezed_shape)
         if tensor.layout == ttnn.TILE_LAYOUT and tensor.dtype == ttnn.uint32:
             tensor = ttnn.to_layout(tensor, ttnn.ROW_MAJOR_LAYOUT)
-        unsqueezed_tensors.append(ttnn.reshape(tensor, unsqueezed_shape))
+        unsqueezed_tensors.append(tensor)
 
     # Concatenate all reshaped tensors along the stack dimension
     return ttnn.concat(unsqueezed_tensors, dim)