Merge remote-tracking branch 'upstream/pull-request/307' into bmh-307

bmhowe23 · bmhowe23 · commit a6d1f13fd64d · 2025-10-28T17:27:27.000-07:00
diff --git a/.github/workflows/all_libs.yaml b/.github/workflows/all_libs.yaml
@@ -66,12 +66,17 @@ jobs:
         run: |
           apt install -y --no-install-recommends gfortran libblas-dev wget
 
-      - name: Install TensorRT (x86_64 only)
+      - name: Install TensorRT (amd64)
         if: matrix.platform == 'amd64'
         run: |
-          wget https://developer.download.nvidia.com/compute/tensorrt/10.13.3/local_installers/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
-          dpkg -i nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
-          cp /var/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9/nv-tensorrt-local-4B177B4F-keyring.gpg /usr/share/keyrings/
+          apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda${{ matrix.cuda_version }}\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda${{ matrix.cuda_version }}.pref > /dev/null
+          apt update
+          apt install -y tensorrt-dev
+
+      - name: Install TensorRT (arm64)
+        if: matrix.platform == 'arm64'
+        run: |
+          apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda13.0\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda13.0.pref > /dev/null
           apt update
           apt install -y tensorrt-dev
 
diff --git a/.github/workflows/build_wheels.yaml b/.github/workflows/build_wheels.yaml
@@ -68,18 +68,45 @@ jobs:
         run: |
           cuda_major=`echo ${{ matrix.cuda_version }} | cut -d . -f1`
           echo "cuda_major=$cuda_major" >> $GITHUB_OUTPUT
-          tensorrt_version="10.13.3.9"
+          # Map CUDA 12.6 to 12.9 for TensorRT filename
+          if [ "${{ matrix.cuda_version }}" == "12.6" ]; then
+            tensorrt_cuda_version="12.9"
+            tensorrt_cuda_major="12"
+          else
+            tensorrt_cuda_version="${{ matrix.cuda_version }}"
+            tensorrt_cuda_major="$cuda_major"
+          fi
+          echo "tensorrt_cuda_version=$tensorrt_cuda_version" >> $GITHUB_OUTPUT
+          echo "tensorrt_cuda_major=$tensorrt_cuda_major" >> $GITHUB_OUTPUT
+          tensorrt_major_version="10.13.3"
+          tensorrt_minor_version="9"
+          tensorrt_version="${tensorrt_major_version}.${tensorrt_minor_version}"
+          echo "tensorrt_major_version=$tensorrt_major_version" >> $GITHUB_OUTPUT
           echo "tensorrt_version=$tensorrt_version" >> $GITHUB_OUTPUT
 
-      - name: Install TensorRT (x86_64 only)
+      - name: Install TensorRT (amd64)
         shell: bash
         if: matrix.platform == 'amd64'
         run: |
           mkdir -p /trt_download
           pushd /trt_download
           pwd
-          wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-12.9.tar.gz
-          tar -zxvf TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-12.9.tar.gz
+          wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${{ steps.config.outputs.tensorrt_major_version }}/tars/TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-${{ steps.config.outputs.tensorrt_cuda_version }}.tar.gz
+          tar -zxvf TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-${{ steps.config.outputs.tensorrt_cuda_version }}.tar.gz
+          pwd
+          popd
+          find /trt_download/TensorRT-${{ steps.config.outputs.tensorrt_version }} -name "NvInfer.h"
+          find /trt_download/TensorRT-${{ steps.config.outputs.tensorrt_version }} -name "NvInferRuntime.h"
+
+      - name: Install TensorRT (arm64)
+        shell: bash
+        if: matrix.platform == 'arm64'
+        run: |
+          mkdir -p /trt_download
+          pushd /trt_download
+          pwd
+          wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${{ steps.config.outputs.tensorrt_major_version }}/tars/TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.aarch64-gnu.cuda-13.0.tar.gz
+          tar -zxvf TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.aarch64-gnu.cuda-13.0.tar.gz
           pwd
           popd
           find /trt_download/TensorRT-${{ steps.config.outputs.tensorrt_version }} -name "NvInfer.h"
@@ -360,7 +387,7 @@ jobs:
         with:
           set-safe-directory: true
           lfs: true # download assets file(s) for TRT tests
-  
+ 
       - name: Configure
         id: config
         run: |
diff --git a/.github/workflows/lib_qec.yaml b/.github/workflows/lib_qec.yaml
@@ -65,12 +65,17 @@ jobs:
         run: |
           apt install -y --no-install-recommends gfortran libblas-dev wget
 
-      - name: Install TensorRT (x86_64 only)
+      - name: Install TensorRT (amd64)
         if: matrix.platform == 'amd64'
         run: |
-          wget https://developer.download.nvidia.com/compute/tensorrt/10.13.3/local_installers/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
-          dpkg -i nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
-          cp /var/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9/nv-tensorrt-local-4B177B4F-keyring.gpg /usr/share/keyrings/
+          apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda${{ matrix.cuda_version }}\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda${{ matrix.cuda_version }}.pref > /dev/null
+          apt update
+          apt install -y tensorrt-dev
+
+      - name: Install TensorRT (arm64)
+        if: matrix.platform == 'arm64'
+        run: |
+          apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda13.0\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda13.0.pref > /dev/null
           apt update
           apt install -y tensorrt-dev
 
diff --git a/libs/qec/lib/CMakeLists.txt b/libs/qec/lib/CMakeLists.txt
@@ -25,10 +25,7 @@ add_library(${LIBRARY_NAME} SHARED
 )
 
 add_subdirectory(decoders/plugins/example)
-# TensorRT decoder is only built for x86 architectures
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
-  add_subdirectory(decoders/plugins/trt_decoder)
-endif()
+add_subdirectory(decoders/plugins/trt_decoder)
 add_subdirectory(codes)
 add_subdirectory(device) 
 
diff --git a/libs/qec/pyproject.toml.cu13 b/libs/qec/pyproject.toml.cu13
@@ -66,5 +66,5 @@ all = [
   "opt_einsum",
   "torch>=2.9.0",
   "cuquantum-python-cu13==25.09",
-  "tensorrt-cu13; platform_machine == 'x86_64'"
+  "tensorrt-cu13"
 ]
diff --git a/libs/qec/python/tests/test_trt_decoder.py b/libs/qec/python/tests/test_trt_decoder.py
@@ -22,7 +22,6 @@
 import cudaq_qec as qec
 import os
 import tempfile
-import platform
 
 # Test data constants
 NUM_TEST_SAMPLES = 200
@@ -44,14 +43,7 @@ def _is_cuda_available():
         return False
 
 
-def _is_arm_architecture():
-    """Check if the current architecture is ARM."""
-    machine = platform.machine().lower()
-    return any(arch in machine for arch in ['arm', 'aarch64'])
-
-
 CUDA_AVAILABLE = _is_cuda_available()
-IS_ARM = _is_arm_architecture()
 
 # Test inputs - 100 test cases with 24 detectors each
 TEST_INPUTS = [[
@@ -70,38 +62,6 @@ def _is_arm_architecture():
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
-               [
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
-               ],
                [
                    0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,
                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
@@ -456,26 +416,25 @@ def _is_arm_architecture():
                ]]
 
 # Expected outputs from PyTorch model
-TEST_OUTPUTS = [[0.527120], [0.546192], [0.546192], [0.555534], [0.546192],
+TEST_OUTPUTS = [[0.527120], [0.546192], [0.546192], [0.555534], [0.564633],
+                [0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
+                [0.501822], [0.514081], [0.546192], [0.546192], [0.546192],
+                [0.546192], [0.546192], [0.546192], [0.504422], [0.546192],
                 [0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.546192], [0.564633], [0.546192], [0.546192],
-                [0.546192], [0.546192], [0.546192], [0.501822], [0.514081],
+                [0.546192], [0.546192], [0.546192], [0.546192], [0.574795],
+                [0.546192], [0.546192], [0.546192], [0.501297], [0.546192],
                 [0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.504422], [0.546192], [0.546192], [0.546192],
+                [0.567863], [0.539243], [0.546192], [0.567863], [0.498555],
+                [0.546192], [0.546192], [0.546192], [0.575040], [0.546192],
                 [0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.546192], [0.574795], [0.546192], [0.546192],
-                [0.546192], [0.501297], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.546192], [0.546192], [0.567863], [0.539243],
-                [0.546192], [0.567863], [0.498555], [0.546192], [0.546192],
-                [0.546192], [0.575040], [0.546192], [0.546192], [0.546192],
+                [0.546192], [0.546192], [0.546192], [0.544458], [0.546192],
                 [0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.544458], [0.546192], [0.546192], [0.546192],
                 [0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.546192], [0.546192], [0.546192], [0.565316],
-                [0.474930], [0.546192], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.527011], [0.546192], [0.546192], [0.546192],
+                [0.546192], [0.565316], [0.474930], [0.546192], [0.546192],
+                [0.546192], [0.546192], [0.546192], [0.527011], [0.546192],
                 [0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
-                [0.546192], [0.499671], [0.546192], [0.546192], [0.546192]]
+                [0.546192], [0.546192], [0.546192], [0.499671], [0.546192],
+                [0.546192], [0.546192]]
 
 
 class TestTRTDecoderSetup:
@@ -504,7 +463,6 @@ def teardown_method(self):
             os.remove(self.test_file_path)
 
 
-@pytest.mark.skipif(IS_ARM, reason="ARM architecture not supported")
 class TestTRTDecoderParameterValidation(TestTRTDecoderSetup):
     """Tests for TRT decoder parameter validation."""
 
@@ -530,11 +488,18 @@ def test_validate_parameters_both_paths_provided(self):
     def test_validate_parameters_no_paths_provided(self):
         """Test that providing no paths creates decoder with warning."""
         # Decoder is created but logs a warning - it won't be usable for inference
-        decoder = qec.get_decoder('trt_decoder', self.H)
-        assert decoder is not None
+        # Create the TRT decoder
+        try:
+            decoder = qec.get_decoder('trt_decoder', self.H)
+            # If decoder is None or doesn't initialize properly, skip these tests
+            if decoder is None:
+                pytest.skip(
+                    "TRT decoder returned None - likely CUDA/GPU unavailable")
+        except (RuntimeError, SystemError, Exception) as e:
+            pytest.skip(
+                f"Failed to create TRT decoder (GPU may be unavailable): {e}")
 
 
-@pytest.mark.skipif(IS_ARM, reason="ARM architecture not supported")
 class TestTRTDecoderFileOperations(TestTRTDecoderSetup):
     """Tests for TRT decoder file loading operations."""
 
@@ -561,10 +526,8 @@ def test_load_file_non_existent_file(self):
 
 
 @pytest.mark.skipif(
-    not os.path.exists(ONNX_MODEL_PATH) or not CUDA_AVAILABLE or IS_ARM,
-    reason=
-    "ONNX model file not found, CUDA/GPU not available, or ARM architecture not supported"
-)
+    not os.path.exists(ONNX_MODEL_PATH) or not CUDA_AVAILABLE,
+    reason="ONNX model file not found or CUDA/GPU not available")
 class TestTRTDecoderInference(TestTRTDecoderSetup):
     """Tests for TRT decoder inference with actual model.
     
@@ -720,7 +683,6 @@ def test_decoder_batch_processing(self):
             assert error < TOLERANCE, f"Batch test case {i} failed with error {error}"
 
 
-@pytest.mark.skipif(IS_ARM, reason="ARM architecture not supported")
 class TestTRTDecoderEdgeCases(TestTRTDecoderSetup):
     """Tests for TRT decoder edge cases. Requires GPU access."""
 
diff --git a/scripts/build_engine_from_onnx.py b/scripts/build_engine_from_onnx.py

Original file line number	Diff line number	Diff line change
`@@ -66,5 +66,5 @@ all = [`
`66`	`66`	`"opt_einsum",`
`67`	`67`	`"torch>=2.9.0",`
`68`	`68`	`"cuquantum-python-cu13==25.09",`
`69`		`- "tensorrt-cu13; platform_machine == 'x86_64'"`
	`69`	`+ "tensorrt-cu13"`
`70`	`70`	`]`