tp5uiuc
diff --git a/‎tests/py/dynamo/models/test_dynamic_shapes_kernel_strategy_models.py‎
Lines changed: 64 additions & 0 deletions b/‎tests/py/dynamo/models/test_dynamic_shapes_kernel_strategy_models.py‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎tests/py/dynamo/models/test_runtime_cache_models.py‎
Lines changed: 91 additions & 0 deletions b/‎tests/py/dynamo/models/test_runtime_cache_models.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎tests/py/dynamo/runtime/test_000_dynamic_shapes_kernel_strategy.py‎
Lines changed: 0 additions & 133 deletions b/‎tests/py/dynamo/runtime/test_000_dynamic_shapes_kernel_strategy.py‎
Lines changed: 0 additions & 133 deletions
@@ -129,5 +129,69 @@ def test_dynamic_batch_none(self):
         self._test_dynamic_batch_with_strategy("none")
 
 
+@unittest.skipIf(
+    not ENABLED_FEATURES.torch_tensorrt_runtime,
+    "C++ runtime is not available",
+)
+@unittest.skipIf(
+    not ENABLED_FEATURES.tensorrt_rtx,
+    "Dynamic shapes kernel specialization strategy requires TensorRT-RTX",
+)
+@unittest.skipIf(
+    not importlib.util.find_spec("torchvision"),
+    "torchvision is not installed",
+)
+class TestDynamicShapesKernelStrategyCppModels(TestCase):
+    """End-to-end model tests with each strategy exercised through the C++ runtime."""
+
+    def tearDown(self):
+        torch._dynamo.reset()
+
+    def _compile_and_verify_cpp(self, model, strategy):
+        input_tensor = torch.randn(4, 3, 224, 224).cuda()
+        compiled = torchtrt.compile(
+            model,
+            ir="dynamo",
+            inputs=[
+                torchtrt.Input(
+                    min_shape=(1, 3, 224, 224),
+                    opt_shape=(4, 3, 224, 224),
+                    max_shape=(8, 3, 224, 224),
+                    dtype=torch.float32,
+                )
+            ],
+            enabled_precisions={torch.float32},
+            use_python_runtime=False,
+            min_block_size=1,
+            dynamic_shapes_kernel_specialization_strategy=strategy,
+        )
+        ref_output = model(input_tensor)
+        trt_output = compiled(input_tensor)
+        cos_sim = cosine_similarity(ref_output, trt_output)
+        self.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            f"C++ runtime cosine similarity {cos_sim} below threshold {COSINE_THRESHOLD} "
+            f"with strategy={strategy}",
+        )
+
+    def test_resnet18_lazy_strategy_cpp(self):
+        import torchvision.models as models
+
+        model = models.resnet18(pretrained=True).eval().cuda()
+        self._compile_and_verify_cpp(model, "lazy")
+
+    def test_resnet18_eager_strategy_cpp(self):
+        import torchvision.models as models
+
+        model = models.resnet18(pretrained=True).eval().cuda()
+        self._compile_and_verify_cpp(model, "eager")
+
+    def test_resnet18_none_strategy_cpp(self):
+        import torchvision.models as models
+
+        model = models.resnet18(pretrained=True).eval().cuda()
+        self._compile_and_verify_cpp(model, "none")
+
+
 if __name__ == "__main__":
     run_tests()
@@ -325,5 +325,96 @@ def forward(self, x):
         self.assertTrue(True, "Timing test completed (informational)")
 
 
+@unittest.skipIf(
+    not ENABLED_FEATURES.torch_tensorrt_runtime,
+    "C++ runtime is not available",
+)
+@unittest.skipIf(
+    not ENABLED_FEATURES.tensorrt_rtx,
+    "Runtime cache is only available with TensorRT-RTX",
+)
+@unittest.skipIf(
+    not importlib.util.find_spec("torchvision"),
+    "torchvision is not installed",
+)
+class TestRuntimeCacheCppModels(TestCase):
+    """End-to-end model tests with runtime cache exercised through the C++ runtime."""
+
+    def setUp(self):
+        self.cache_dir = tempfile.mkdtemp()
+        self.cache_path = os.path.join(self.cache_dir, "runtime_cache.bin")
+
+    def tearDown(self):
+        shutil.rmtree(self.cache_dir, ignore_errors=True)
+        torch._dynamo.reset()
+
+    def test_resnet18_with_runtime_cache_cpp(self):
+        import torchvision.models as models
+
+        model = models.resnet18(pretrained=True).eval().cuda()
+        input_tensor = torch.randn(1, 3, 224, 224).cuda()
+
+        compiled = torchtrt.compile(
+            model,
+            ir="dynamo",
+            inputs=[torchtrt.Input(input_tensor.shape, dtype=torch.float32)],
+            enabled_precisions={torch.float32},
+            use_python_runtime=False,
+            min_block_size=1,
+            runtime_cache_path=self.cache_path,
+        )
+
+        ref_output = model(input_tensor)
+        trt_output = compiled(input_tensor)
+
+        cos_sim = cosine_similarity(ref_output, trt_output)
+        self.assertTrue(
+            cos_sim > COSINE_THRESHOLD,
+            f"ResNet18 C++ runtime cosine similarity {cos_sim} below threshold {COSINE_THRESHOLD}",
+        )
+
+        # Verify the runtime cache is persisted on engine destruction.
+        del compiled
+        gc.collect()
+        self.assertTrue(
+            os.path.isfile(self.cache_path),
+            "Runtime cache should be saved after ResNet18 C++-runtime inference",
+        )
+
+    def test_resnet18_cache_reuse_cpp(self):
+        """Warm-cache second compile should match eager output."""
+        import torchvision.models as models
+
+        model = models.resnet18(pretrained=True).eval().cuda()
+        input_tensor = torch.randn(1, 3, 224, 224).cuda()
+        ref_output = model(input_tensor)
+
+        compile_kwargs = {
+            "ir": "dynamo",
+            "inputs": [torchtrt.Input(input_tensor.shape, dtype=torch.float32)],
+            "enabled_precisions": {torch.float32},
+            "use_python_runtime": False,
+            "min_block_size": 1,
+            "runtime_cache_path": self.cache_path,
+        }
+
+        compiled1 = torchtrt.compile(model, **compile_kwargs)
+        out1 = compiled1(input_tensor)
+        self.assertTrue(
+            cosine_similarity(ref_output, out1) > COSINE_THRESHOLD,
+            "First ResNet18 C++-runtime output should match eager",
+        )
+        del compiled1
+        gc.collect()
+        self.assertTrue(os.path.isfile(self.cache_path))
+
+        compiled2 = torchtrt.compile(model, **compile_kwargs)
+        out2 = compiled2(input_tensor)
+        self.assertTrue(
+            cosine_similarity(ref_output, out2) > COSINE_THRESHOLD,
+            "Second ResNet18 C++-runtime output (warm cache) should match eager",
+        )
+
+
 if __name__ == "__main__":
     run_tests()