pytorch · lanluo-nvidia · May 9, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 24, 2026
diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml
@@ -80,7 +80,17 @@
       use-rtx: false
       pip-install-torch-extra-args: "--extra-index-url https://pypi.org/simple"
 
+  executorch-static-build:
+    needs: [filter-matrix, build]
+    uses: ./.github/workflows/executorch-static-linux.yml
+    with:
+      repository: "pytorch/tensorrt"
+      ref: ""
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      build-matrix: ${{ needs.filter-matrix.outputs.matrix }}
+
   L0-dynamo-converter-tests:
     name: ${{ matrix.display-name }}
     needs: [filter-matrix, build]
    if: ${{ (github.ref_name == 'main' || github.ref_name == 'nightly' || contains(github.event.pull_request.labels.*.name, 'Force All Tests[L0+L1+L2]')) && always() || success() }}

diff --git a/.github/workflows/executorch-static-linux.yml b/.github/workflows/executorch-static-linux.yml
@@ -0,0 +1,89 @@
+name: ExecuTorch Static Linux Build
+
+on:
+  workflow_call:
+    inputs:
+      build-matrix:
+        description: "Build matrix to utilize"
+        default: ""
+        type: string
+      repository:
+        description: 'Repository to checkout, defaults to ""'
+        default: ""
+        type: string
+      ref:
+        description: 'Reference to checkout, defaults to ""'
+        default: ""
+        type: string
+      test-infra-repository:
+        description: "Test infra repository to use"
+        default: "pytorch/test-infra"
+        type: string
+      test-infra-ref:
+        description: "Test infra reference to use"
+        default: ""
+        type: string
+
+jobs:
+  select-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.pick.outputs.matrix }}
+    steps:
+      - id: pick
+        env:
+          FULL_MATRIX: ${{ inputs.build-matrix }}
+        run: |
+          set -euo pipefail
+          python - <<'PY' >> "${GITHUB_OUTPUT}"
+          import json
+          import os
+
+          matrix = json.loads(os.environ["FULL_MATRIX"])
+          include = matrix.get("include", [])
+          if not include:
+              raise SystemExit("build-matrix include[] is empty")
+
+          preferred = None
+          for entry in include:
+              if entry.get("python_version") == "3.11":
+                  preferred = entry
+                  break
+
+          if preferred is None:
+              preferred = include[0]
+
+          print("matrix=" + json.dumps({"include": [preferred]}))
+          PY
+
+  build:
+    needs: select-matrix
+    uses: ./.github/workflows/linux-test.yml
+    with:
+      job-name: executorch-static-build
+      repository: ${{ inputs.repository }}
+      ref: ${{ inputs.ref }}
+      test-infra-repository: ${{ inputs.test-infra-repository }}
+      test-infra-ref: ${{ inputs.test-infra-ref }}
+      build-matrix: ${{ needs.select-matrix.outputs.matrix }}
+      script: |
+        set -euo pipefail
+        EXECUTORCH_SRC="${RUNNER_TEMP}/executorch"
+        EXECUTORCH_BUILD="${EXECUTORCH_SRC}/cmake-out"
+
+        git clone --depth 1 https://github.com/pytorch/executorch.git "${EXECUTORCH_SRC}"
+        cmake -S "${EXECUTORCH_SRC}" -B "${EXECUTORCH_BUILD}" \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DBUILD_TESTING=OFF \
+          -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=OFF \
+          -DEXECUTORCH_BUILD_PYBIND=OFF \
+          -DEXECUTORCH_BUILD_PORTABLE_OPS=OFF
+        cmake --build "${EXECUTORCH_BUILD}" --target executorch_core -j"$(nproc)"
+
+        TORCH_CMAKE_PREFIX="$(python3 -c 'import torch; print(torch.utils.cmake_prefix_path)')"
+        cmake -S . -B build-executorch \
+          -DCMAKE_PREFIX_PATH="${TORCH_CMAKE_PREFIX}" \
+          -DBUILD_TORCHTRT_EXECUTORCH=ON \
+          -DEXECUTORCH_ROOT="${EXECUTORCH_SRC}" \
+          -DEXECUTORCH_CORE_LIBRARY="${EXECUTORCH_BUILD}/libexecutorch_core.a"
+        cmake --build build-executorch --target trt_executor_runner -j"$(nproc)"
diff --git a/.gitignore b/.gitignore
@@ -82,3 +82,4 @@ coverage.xml
 *.pt2
 examples/torchtrt_aoti_example/torchtrt_aoti_example
 CLAUDE.md
+build-executorch/
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,6 +27,10 @@ endif()
 add_subdirectory(core)
 add_subdirectory(cpp)
 
+if(BUILD_TORCHTRT_EXECUTORCH)
+  add_subdirectory(examples/torchtrt_executorch_example)
+endif()
+
 include(CMakePackageConfigHelpers)
 
 configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in

diff --git a/MODULE.bazel b/MODULE.bazel
@@ -26,6 +26,15 @@ new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.
 
 local_torch = use_repo_rule("//toolchains:local_torch.bzl", "local_torch")
 
+local_executorch = use_repo_rule("//toolchains:local_executorch.bzl", "local_executorch")
+
+# Detect ExecuTorch from the active Python environment (VIRTUAL_ENV / CONDA_PREFIX / PATH).
+# Supports both pip-installed wheel layout and source-tree layout.
+# Optional overrides:
+#   EXECUTORCH_PATH=/path/to/executorch
+#   EXECUTORCH_CORE_PATH=/path/to/libexecutorch_core.a
+local_executorch(name = "executorch")
+
 # External dependency for torch_tensorrt if you already have precompiled binaries.
 new_local_repository(
     name = "torch_tensorrt",
@@ -77,6 +86,15 @@ local_torch(name = "libtorch")
 #    build_file = "third_party/libtorch/BUILD"
 #)
 
+# Explicit ExecuTorch override. The repository rule above is preferred.
+# If you use this manually, point the path either at the installed
+# ExecuTorch package root or a compatible source-tree root.
+#new_local_repository(
+#    name = "executorch",
+#    build_file = "@//third_party/executorch:BUILD",
+#    path = "/path/to/executorch",
+#)
+
 #new_local_repository(
 #   name = "tensorrt",
 #   path = "/usr/",

diff --git a/cmake/build_options.cmake b/cmake/build_options.cmake
@@ -24,3 +24,5 @@ if(NOT CMAKE_CONFIGURATION_TYPES)
                                                      ${VALID_BUILD_TYPES})
     endif()
 endif()
+
+option(BUILD_TORCHTRT_EXECUTORCH "Build the ExecuTorch TensorRT backend and example runner" OFF)
diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
@@ -19,6 +19,36 @@ add_definitions(-DTORCH_VERSION_MAJOR=${Torch_VERSION_MAJOR})
 add_definitions(-DTORCH_VERSION_MINOR=${Torch_VERSION_MINOR})
 add_definitions(-DTORCH_VERSION_PATCH=${Torch_VERSION_PATCH})
 
+if(BUILD_TORCHTRT_EXECUTORCH)
+    if(NOT DEFINED EXECUTORCH_ROOT AND EXISTS "/home/lanl/git/executorch")
+        set(
+            EXECUTORCH_ROOT
+            "/home/lanl/git/executorch"
+            CACHE PATH "Path to the ExecuTorch source tree"
+        )
+    endif()
+
+    if(NOT DEFINED EXECUTORCH_ROOT OR EXECUTORCH_ROOT STREQUAL "")
+        message(FATAL_ERROR "BUILD_TORCHTRT_EXECUTORCH requires EXECUTORCH_ROOT to point to an ExecuTorch source tree")
+    endif()
+
+    if(NOT EXISTS "${EXECUTORCH_ROOT}/runtime")
+        message(FATAL_ERROR "EXECUTORCH_ROOT='${EXECUTORCH_ROOT}' is missing runtime/")
+    endif()
+
+    if(NOT DEFINED EXECUTORCH_CORE_LIBRARY)
+        set(
+            EXECUTORCH_CORE_LIBRARY
+            "${EXECUTORCH_ROOT}/cmake-out/libexecutorch_core.a"
+            CACHE FILEPATH "Path to the ExecuTorch static runtime library"
+        )
+    endif()
+
+    if(NOT EXISTS "${EXECUTORCH_CORE_LIBRARY}")
+        message(FATAL_ERROR "EXECUTORCH_CORE_LIBRARY='${EXECUTORCH_CORE_LIBRARY}' does not exist")
+    endif()
+endif()
+
 if (WITH_TESTS)
 	include(FetchContent)
 	include(${CMAKE_SOURCE_DIR}/third_party/googletest/googletest.cmake)

diff --git a/core/runtime/BUILD b/core/runtime/BUILD
@@ -1,6 +1,7 @@
 load("@rules_cc//cc:defs.bzl", "cc_library")
 load("@rules_pkg//:pkg.bzl", "pkg_tar")
 load("@rules_pkg//pkg:mappings.bzl", "pkg_files")
+
 package(default_visibility = ["//visibility:public"])
 
 config_setting(
@@ -58,17 +59,22 @@ config_setting(
     ],
 )
 
+# runtime_base: TRTEngine + device management + serialization utilities.
+# Does NOT include register_jit_hooks.cpp (TorchScript torch::class_ /
+# TORCH_LIBRARY registrations), so it can be linked into the static
+# ExecuTorch TensorRT backend without causing a duplicate-registration
+# crash when libtorchtrt.so is also loaded in the same process.
 cc_library(
-    name = "runtime",
+    name = "runtime_base",
     srcs = [
         "DeviceList.cpp",
         "Platform.cpp",
         "RTDevice.cpp",
         "TRTEngine.cpp",
         "TRTEngineProfiler.cpp",
         "execute_engine.cpp",
-        "register_jit_hooks.cpp",
         "runtime.cpp",
+        "runtime_utils.cpp",
     ],
     hdrs = [
         "Platform.h",
@@ -100,6 +106,26 @@ cc_library(
     alwayslink = True,
 )
 
+# runtime: full runtime including TorchScript torch::class_ / TORCH_LIBRARY
+# registrations. Used by the main libtorchtrt.so.
+cc_library(
+    name = "runtime",
+    srcs = [
+        "register_jit_hooks.cpp",
+    ],
+    hdrs = [
+        "Platform.h",
+        "RTDevice.h",
+        "TRTEngine.h",
+        "TRTEngineProfiler.h",
+        "runtime.h",
+    ],
+    deps = [
+        ":runtime_base",
+    ],
+    alwayslink = True,
+)
+
 filegroup(
     name = "include_files",
     srcs = [
@@ -121,6 +147,6 @@ pkg_tar(
 pkg_files(
     name = "include_pkg_files",
     srcs = [":include_files"],
-    visibility = ["//visibility:public"],
     prefix = "include/torch_tensorrt/core/runtime/",
+    visibility = ["//visibility:public"],
 )
diff --git a/core/runtime/CMakeLists.txt b/core/runtime/CMakeLists.txt
@@ -9,6 +9,7 @@ set(CXX_SRCS
     "${CMAKE_CURRENT_SOURCE_DIR}/execute_engine.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/register_jit_hooks.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/runtime.cpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/runtime_utils.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/Platform.cpp"
 )
 
@@ -46,3 +47,7 @@ endif(NOT WIN32)
 
 # Install
 install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/runtime")
+
+if(BUILD_TORCHTRT_EXECUTORCH)
+    add_subdirectory(executorch)
+endif()
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -447,6 +447,29 @@ TRTEngine& TRTEngine::operator=(const TRTEngine& other) {
 }
 
 void TRTEngine::verify_serialization_fmt(const std::vector<std::string>& serialized_info) {
+  static const char* kIndexNames[] = {
+      "ABI_TARGET_IDX",
+      "NAME_IDX",
+      "DEVICE_IDX",
+      "ENGINE_IDX",
+      "INPUT_BINDING_NAMES_IDX",
+      "OUTPUT_BINDING_NAMES_IDX",
+      "HW_COMPATIBLE_IDX",
+      "SERIALIZED_METADATA_IDX",
+      "TARGET_PLATFORM_IDX",
+      "REQUIRES_OUTPUT_ALLOCATOR_IDX",
+      "RESOURCE_ALLOCATION_STRATEGY_IDX",
+  };
+  fprintf(stderr, "[verify_serialization_fmt] %zu entries (expected %d):\n", serialized_info.size(), SERIALIZATION_LEN);
+  for (size_t i = 0; i < serialized_info.size(); ++i) {
+    const char* name = (i < sizeof(kIndexNames) / sizeof(kIndexNames[0])) ? kIndexNames[i] : "?";
+    if (i == ENGINE_IDX) {
+      fprintf(stderr, "  [%zu] %-35s = <binary, %zu bytes>\n", i, name, serialized_info[i].size());
+    } else {
+      fprintf(stderr, "  [%zu] %-35s = \"%s\"\n", i, name, serialized_info[i].c_str());
+    }
+  }
+
   TORCHTRT_CHECK(
       serialized_info.size() == SERIALIZATION_LEN,
       "Program to be deserialized targets an incompatible Torch-TensorRT ABI");

diff --git a/core/runtime/executorch/BUILD b/core/runtime/executorch/BUILD
@@ -0,0 +1,80 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "use_torch_whl",
+    flag_values = {
+        "//toolchains/dep_src:torch": "whl",
+    },
+)
+
+config_setting(
+    name = "rtx_x86_64",
+    constraint_values = [
+        "@platforms//cpu:x86_64",
+        "@platforms//os:linux",
+    ],
+    flag_values = {
+        "//toolchains/dep_collection:compute_libs": "rtx",
+    },
+)
+
+config_setting(
+    name = "rtx_win",
+    constraint_values = [
+        "@platforms//os:windows",
+    ],
+    flag_values = {
+        "//toolchains/dep_collection:compute_libs": "rtx",
+    },
+)
+
+config_setting(
+    name = "sbsa",
+    constraint_values = [
+        "@platforms//cpu:aarch64",
+    ],
+    flag_values = {
+        "//toolchains/dep_collection:compute_libs": "default",
+    },
+)
+
+config_setting(
+    name = "jetpack",
+    constraint_values = [
+        "@platforms//cpu:aarch64",
+    ],
+    flag_values = {
+        "//toolchains/dep_collection:compute_libs": "jetpack",
+    },
+)
+
+config_setting(
+    name = "windows",
+    constraint_values = [
+        "@platforms//os:windows",
+    ],
+)
+
+cc_library(
+    name = "tensorrt_executorch_backend",
+    srcs = ["TensorRTBackend.cpp"],
+    hdrs = ["TensorRTBackend.h"],
+    # Build the TensorRT backend as a static library. The final application
+    # links this target together with the ExecuTorch runtime it was compiled
+    # against, avoiding any runtime plugin/dlopen dependency.
+    deps = [
+        "//core/runtime:runtime_base",
+        "//core/util:prelude",
+        "@executorch//:executorch_headers",
+    ] + select({
+        ":jetpack": ["@tensorrt_l4t//:nvinfer"],
+        ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"],
+        ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"],
+        ":sbsa": ["@tensorrt_sbsa//:nvinfer"],
+        ":windows": ["@tensorrt_win//:nvinfer"],
+        "//conditions:default": ["@tensorrt//:nvinfer"],
+    }),
+    alwayslink = True,
+)
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,3 +24,5 @@ if(NOT CMAKE_CONFIGURATION_TYPES) @@
                                                          ${VALID_BUILD_TYPES})
         endif()
     endif()
+    option(BUILD_TORCHTRT_EXECUTORCH "Build the ExecuTorch TensorRT backend and example runner" OFF)