guard the engine not to be none

cehongwang · cehongwang · commit 067ebe3ae86a · 2026-05-29T21:25:42.000Z
diff --git a/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py
@@ -126,7 +126,7 @@ def __init__(
         self.settings = copy.deepcopy(settings)
         self.weight_name_map = weight_name_map
         self.serialized_engine = serialized_engine
-        self.engine = None
+        self.engine: Optional[Any] = None
         self.requires_output_allocator = requires_output_allocator
         self.dynamically_allocate_resources = settings.dynamically_allocate_resources
         self.symbolic_shape_expressions = symbolic_shape_expressions
@@ -229,34 +229,45 @@ def _pack_engine_info(self) -> List[str | bytes]:
 
         return engine_info
 
+    def get_engine(self) -> torch.classes.tensorrt.Engine:
+        """Return the underlying engine, raising if it has not been set up.
+
+        Used by every engine-accessing method except the hot ``forward`` path,
+        which intentionally skips the check to avoid per-call overhead.
+        """
+        if self.engine is None:
+            raise RuntimeError("Engine has not been setup yet.")
+        return self.engine
+
     def get_streamable_device_memory_budget(self) -> Any:
-        return self.engine.streamable_device_memory_budget
+        return self.get_engine().streamable_device_memory_budget
 
     def get_automatic_device_memory_budget(self) -> Any:
-        return self.engine.automatic_device_memory_budget
+        return self.get_engine().automatic_device_memory_budget
 
     def get_device_memory_budget(self) -> Any:
-        return self.engine.device_memory_budget
+        return self.get_engine().device_memory_budget
 
     def set_device_memory_budget(self, budget_bytes: int) -> int:
+        engine = self.get_engine()
         if budget_bytes < 0:
             budget_bytes = self.get_streamable_device_memory_budget()
-        self.engine.device_memory_budget = budget_bytes
-        if self.engine.device_memory_budget != budget_bytes:
+        engine.device_memory_budget = budget_bytes
+        if engine.device_memory_budget != budget_bytes:
             logger.error(f"Failed to set weight streaming budget to {budget_bytes}")
-            budget_bytes = self.engine.device_memory_budget
+            budget_bytes = engine.device_memory_budget
         if self.get_streamable_device_memory_budget() == budget_bytes:
             logger.warning("Weight streaming is disabled")
         return budget_bytes
 
     def _reset_captured_graph(self) -> None:
-        self.engine.reset_captured_graph()
+        self.get_engine().reset_captured_graph()
 
     def use_dynamically_allocated_resources(
         self, dynamically_allocate_resources: bool = False
     ) -> None:
         self.dynamically_allocate_resources = dynamically_allocate_resources
-        self.engine.use_dynamically_allocated_resources(
+        self.get_engine().use_dynamically_allocated_resources(
             self.dynamically_allocate_resources
         )
 
@@ -277,7 +288,7 @@ def setup_engine(self) -> None:
         else:
             from torch_tensorrt.dynamo.runtime._TRTEngine import TRTEngine
 
-            self.engine = TRTEngine(  # type: ignore[assignment]
+            self.engine = TRTEngine(
                 self._pack_engine_info(),
                 profile_execution=self.profiling_enabled,
             )
@@ -325,7 +336,7 @@ def decode_metadata(encoded_metadata: bytes) -> Any:
         return metadata
 
     def get_extra_state(self) -> SerializedTorchTensorRTModuleFmt:
-        if self.engine:
+        if self.engine is not None:
             engine_info = self._pack_engine_info()
             assert isinstance(engine_info[ENGINE_IDX], (bytes, bytearray))
             engine_info[ENGINE_IDX] = base64.b64encode(engine_info[ENGINE_IDX])
@@ -380,7 +391,7 @@ def set_extra_state(self, state: SerializedTorchTensorRTModuleFmt) -> None:
             else:
                 from torch_tensorrt.dynamo.runtime._TRTEngine import TRTEngine
 
-                self.engine = TRTEngine(serialized_engine_info)  # type: ignore[assignment]
+                self.engine = TRTEngine(serialized_engine_info)
 
             self.engine.set_output_tensors_as_unowned(
                 metadata["output_tensors_are_unowned"]
@@ -395,7 +406,7 @@ def set_extra_state(self, state: SerializedTorchTensorRTModuleFmt) -> None:
         self.target_device = self._resolve_target_device()
 
     def set_pre_allocated_outputs(self, enable: bool) -> None:
-        self.engine.use_pre_allocated_outputs = enable
+        self.get_engine().use_pre_allocated_outputs = enable
 
     @property
     def pre_allocated_outputs(self) -> Any:
@@ -405,13 +416,15 @@ def pre_allocated_outputs(self) -> Any:
         return getattr(self.engine, "pre_allocated_outputs", [])
 
     def set_use_output_allocator(self, enable: bool) -> None:
-        self.engine.use_output_allocator_outputs = enable
+        self.get_engine().use_output_allocator_outputs = enable
 
     def forward(self, *inputs: Any) -> torch.Tensor | Tuple[torch.Tensor, ...]:
-        """Run the TensorRT engine on GPU tensors (non-tensor args are cast to CUDA tensors)."""
-        if self.engine is None:
-            raise RuntimeError("Engine has not been setup yet.")
+        """Run the TensorRT engine on GPU tensors (non-tensor args are cast to CUDA tensors).
 
+        Note: callers are responsible for ensuring the engine has been set up;
+        the hot path intentionally omits a ``self.engine is None`` guard so
+        that a properly-bound module avoids the per-call attribute check.
+        """
         target = self.target_device
         binding_names = self.input_binding_names
         # len-check inlined (cheaper than keeping an f-string around the hot path)
@@ -451,28 +464,26 @@ def enable_profiling(
         profile_format: str = "perfetto",
     ) -> None:
         """Enable engine profiling (optional path prefix and format for tracing output)."""
-        if self.engine is None:
-            raise RuntimeError("Engine has not been initialized yet.")
+        engine = self.get_engine()
 
         if profiling_results_dir is not None:
-            self.engine.profile_path_prefix = profiling_results_dir
+            engine.profile_path_prefix = profiling_results_dir
 
-        self.engine.enable_profiling()
-        if hasattr(self.engine, "set_profile_format"):
-            self.engine.set_profile_format(profile_format)
+        engine.enable_profiling()
+        if hasattr(engine, "set_profile_format"):
+            engine.set_profile_format(profile_format)
         self.profiling_enabled = True
 
     def set_output_tensors_as_unowned(self, enabled: bool) -> None:
-        self.engine.set_output_tensors_as_unowned(enabled)
+        self.get_engine().set_output_tensors_as_unowned(enabled)
 
     def are_output_tensors_unowned(self) -> bool:
-        return bool(self.engine.are_output_tensors_unowned())
+        return bool(self.get_engine().are_output_tensors_unowned())
 
     def disable_profiling(self) -> None:
         """Disable engine profiling and clear the profiling flag on this module."""
-        if self.engine is None:
-            raise RuntimeError("Engine has not been initialized yet.")
-        self.engine.disable_profiling()
+        engine = self.get_engine()
+        engine.disable_profiling()
         self.profiling_enabled = False
 
     def get_layer_info(self) -> str:
@@ -482,15 +493,9 @@ def get_layer_info(self) -> str:
 
             str: A JSON string which contains the layer information of the engine incapsulated in this module
         """
-        if self.engine is None:
-            raise RuntimeError("Engine has not been initialized yet.")
-
-        layer_info: str = self.engine.get_engine_layer_info()
+        layer_info: str = self.get_engine().get_engine_layer_info()
         return layer_info
 
     def dump_layer_info(self) -> None:
         """Dump layer information encoded by the TensorRT engine in this module to STDOUT"""
-        if self.engine is None:
-            raise RuntimeError("Engine has not been initialized yet.")
-
-        self.engine.dump_engine_layer_info()
+        self.get_engine().dump_engine_layer_info()