Skip to content

Commit a6d1f13

Browse files
committed
Merge remote-tracking branch 'upstream/pull-request/307' into bmh-307
2 parents 8b3c5da + 663ba48 commit a6d1f13

File tree

7 files changed

+77
-81
lines changed

7 files changed

+77
-81
lines changed

.github/workflows/all_libs.yaml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,17 @@ jobs:
6666
run: |
6767
apt install -y --no-install-recommends gfortran libblas-dev wget
6868
69-
- name: Install TensorRT (x86_64 only)
69+
- name: Install TensorRT (amd64)
7070
if: matrix.platform == 'amd64'
7171
run: |
72-
wget https://developer.download.nvidia.com/compute/tensorrt/10.13.3/local_installers/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
73-
dpkg -i nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
74-
cp /var/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9/nv-tensorrt-local-4B177B4F-keyring.gpg /usr/share/keyrings/
72+
apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda${{ matrix.cuda_version }}\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda${{ matrix.cuda_version }}.pref > /dev/null
73+
apt update
74+
apt install -y tensorrt-dev
75+
76+
- name: Install TensorRT (arm64)
77+
if: matrix.platform == 'arm64'
78+
run: |
79+
apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda13.0\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda13.0.pref > /dev/null
7580
apt update
7681
apt install -y tensorrt-dev
7782

.github/workflows/build_wheels.yaml

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,45 @@ jobs:
6868
run: |
6969
cuda_major=`echo ${{ matrix.cuda_version }} | cut -d . -f1`
7070
echo "cuda_major=$cuda_major" >> $GITHUB_OUTPUT
71-
tensorrt_version="10.13.3.9"
71+
# Map CUDA 12.6 to 12.9 for TensorRT filename
72+
if [ "${{ matrix.cuda_version }}" == "12.6" ]; then
73+
tensorrt_cuda_version="12.9"
74+
tensorrt_cuda_major="12"
75+
else
76+
tensorrt_cuda_version="${{ matrix.cuda_version }}"
77+
tensorrt_cuda_major="$cuda_major"
78+
fi
79+
echo "tensorrt_cuda_version=$tensorrt_cuda_version" >> $GITHUB_OUTPUT
80+
echo "tensorrt_cuda_major=$tensorrt_cuda_major" >> $GITHUB_OUTPUT
81+
tensorrt_major_version="10.13.3"
82+
tensorrt_minor_version="9"
83+
tensorrt_version="${tensorrt_major_version}.${tensorrt_minor_version}"
84+
echo "tensorrt_major_version=$tensorrt_major_version" >> $GITHUB_OUTPUT
7285
echo "tensorrt_version=$tensorrt_version" >> $GITHUB_OUTPUT
7386
74-
- name: Install TensorRT (x86_64 only)
87+
- name: Install TensorRT (amd64)
7588
shell: bash
7689
if: matrix.platform == 'amd64'
7790
run: |
7891
mkdir -p /trt_download
7992
pushd /trt_download
8093
pwd
81-
wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-12.9.tar.gz
82-
tar -zxvf TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-12.9.tar.gz
94+
wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${{ steps.config.outputs.tensorrt_major_version }}/tars/TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-${{ steps.config.outputs.tensorrt_cuda_version }}.tar.gz
95+
tar -zxvf TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.x86_64-gnu.cuda-${{ steps.config.outputs.tensorrt_cuda_version }}.tar.gz
96+
pwd
97+
popd
98+
find /trt_download/TensorRT-${{ steps.config.outputs.tensorrt_version }} -name "NvInfer.h"
99+
find /trt_download/TensorRT-${{ steps.config.outputs.tensorrt_version }} -name "NvInferRuntime.h"
100+
101+
- name: Install TensorRT (arm64)
102+
shell: bash
103+
if: matrix.platform == 'arm64'
104+
run: |
105+
mkdir -p /trt_download
106+
pushd /trt_download
107+
pwd
108+
wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/${{ steps.config.outputs.tensorrt_major_version }}/tars/TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.aarch64-gnu.cuda-13.0.tar.gz
109+
tar -zxvf TensorRT-${{ steps.config.outputs.tensorrt_version }}.Linux.aarch64-gnu.cuda-13.0.tar.gz
83110
pwd
84111
popd
85112
find /trt_download/TensorRT-${{ steps.config.outputs.tensorrt_version }} -name "NvInfer.h"
@@ -360,7 +387,7 @@ jobs:
360387
with:
361388
set-safe-directory: true
362389
lfs: true # download assets file(s) for TRT tests
363-
390+
364391
- name: Configure
365392
id: config
366393
run: |

.github/workflows/lib_qec.yaml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,17 @@ jobs:
6565
run: |
6666
apt install -y --no-install-recommends gfortran libblas-dev wget
6767
68-
- name: Install TensorRT (x86_64 only)
68+
- name: Install TensorRT (amd64)
6969
if: matrix.platform == 'amd64'
7070
run: |
71-
wget https://developer.download.nvidia.com/compute/tensorrt/10.13.3/local_installers/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
72-
dpkg -i nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9_1.0-1_amd64.deb
73-
cp /var/nv-tensorrt-local-repo-ubuntu2404-10.13.3-cuda-12.9/nv-tensorrt-local-4B177B4F-keyring.gpg /usr/share/keyrings/
71+
apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda${{ matrix.cuda_version }}\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda${{ matrix.cuda_version }}.pref > /dev/null
72+
apt update
73+
apt install -y tensorrt-dev
74+
75+
- name: Install TensorRT (arm64)
76+
if: matrix.platform == 'arm64'
77+
run: |
78+
apt-cache search tensorrt | awk '{print "Package: "$1"\nPin: version *+cuda13.0\nPin-Priority: 1001\n"}' | tee /etc/apt/preferences.d/tensorrt-cuda13.0.pref > /dev/null
7479
apt update
7580
apt install -y tensorrt-dev
7681

libs/qec/lib/CMakeLists.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,7 @@ add_library(${LIBRARY_NAME} SHARED
2525
)
2626

2727
add_subdirectory(decoders/plugins/example)
28-
# TensorRT decoder is only built for x86 architectures
29-
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)")
30-
add_subdirectory(decoders/plugins/trt_decoder)
31-
endif()
28+
add_subdirectory(decoders/plugins/trt_decoder)
3229
add_subdirectory(codes)
3330
add_subdirectory(device)
3431

libs/qec/pyproject.toml.cu13

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,5 @@ all = [
6666
"opt_einsum",
6767
"torch>=2.9.0",
6868
"cuquantum-python-cu13==25.09",
69-
"tensorrt-cu13; platform_machine == 'x86_64'"
69+
"tensorrt-cu13"
7070
]

libs/qec/python/tests/test_trt_decoder.py

Lines changed: 25 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import cudaq_qec as qec
2323
import os
2424
import tempfile
25-
import platform
2625

2726
# Test data constants
2827
NUM_TEST_SAMPLES = 200
@@ -44,14 +43,7 @@ def _is_cuda_available():
4443
return False
4544

4645

47-
def _is_arm_architecture():
48-
"""Check if the current architecture is ARM."""
49-
machine = platform.machine().lower()
50-
return any(arch in machine for arch in ['arm', 'aarch64'])
51-
52-
5346
CUDA_AVAILABLE = _is_cuda_available()
54-
IS_ARM = _is_arm_architecture()
5547

5648
# Test inputs - 100 test cases with 24 detectors each
5749
TEST_INPUTS = [[
@@ -70,38 +62,6 @@ def _is_arm_architecture():
7062
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,
7163
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
7264
],
73-
[
74-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
75-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
76-
],
77-
[
78-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
79-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
80-
],
81-
[
82-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
83-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
84-
],
85-
[
86-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
87-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
88-
],
89-
[
90-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
91-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
92-
],
93-
[
94-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
95-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
96-
],
97-
[
98-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
99-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
100-
],
101-
[
102-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
103-
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
104-
],
10565
[
10666
0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,
10767
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
@@ -456,26 +416,25 @@ def _is_arm_architecture():
456416
]]
457417

458418
# Expected outputs from PyTorch model
459-
TEST_OUTPUTS = [[0.527120], [0.546192], [0.546192], [0.555534], [0.546192],
419+
TEST_OUTPUTS = [[0.527120], [0.546192], [0.546192], [0.555534], [0.564633],
420+
[0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
421+
[0.501822], [0.514081], [0.546192], [0.546192], [0.546192],
422+
[0.546192], [0.546192], [0.546192], [0.504422], [0.546192],
460423
[0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
461-
[0.546192], [0.546192], [0.564633], [0.546192], [0.546192],
462-
[0.546192], [0.546192], [0.546192], [0.501822], [0.514081],
424+
[0.546192], [0.546192], [0.546192], [0.546192], [0.574795],
425+
[0.546192], [0.546192], [0.546192], [0.501297], [0.546192],
463426
[0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
464-
[0.546192], [0.504422], [0.546192], [0.546192], [0.546192],
427+
[0.567863], [0.539243], [0.546192], [0.567863], [0.498555],
428+
[0.546192], [0.546192], [0.546192], [0.575040], [0.546192],
465429
[0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
466-
[0.546192], [0.546192], [0.574795], [0.546192], [0.546192],
467-
[0.546192], [0.501297], [0.546192], [0.546192], [0.546192],
468-
[0.546192], [0.546192], [0.546192], [0.567863], [0.539243],
469-
[0.546192], [0.567863], [0.498555], [0.546192], [0.546192],
470-
[0.546192], [0.575040], [0.546192], [0.546192], [0.546192],
430+
[0.546192], [0.546192], [0.546192], [0.544458], [0.546192],
471431
[0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
472-
[0.546192], [0.544458], [0.546192], [0.546192], [0.546192],
473432
[0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
474-
[0.546192], [0.546192], [0.546192], [0.546192], [0.565316],
475-
[0.474930], [0.546192], [0.546192], [0.546192], [0.546192],
476-
[0.546192], [0.527011], [0.546192], [0.546192], [0.546192],
433+
[0.546192], [0.565316], [0.474930], [0.546192], [0.546192],
434+
[0.546192], [0.546192], [0.546192], [0.527011], [0.546192],
477435
[0.546192], [0.546192], [0.546192], [0.546192], [0.546192],
478-
[0.546192], [0.499671], [0.546192], [0.546192], [0.546192]]
436+
[0.546192], [0.546192], [0.546192], [0.499671], [0.546192],
437+
[0.546192], [0.546192]]
479438

480439

481440
class TestTRTDecoderSetup:
@@ -504,7 +463,6 @@ def teardown_method(self):
504463
os.remove(self.test_file_path)
505464

506465

507-
@pytest.mark.skipif(IS_ARM, reason="ARM architecture not supported")
508466
class TestTRTDecoderParameterValidation(TestTRTDecoderSetup):
509467
"""Tests for TRT decoder parameter validation."""
510468

@@ -530,11 +488,18 @@ def test_validate_parameters_both_paths_provided(self):
530488
def test_validate_parameters_no_paths_provided(self):
531489
"""Test that providing no paths creates decoder with warning."""
532490
# Decoder is created but logs a warning - it won't be usable for inference
533-
decoder = qec.get_decoder('trt_decoder', self.H)
534-
assert decoder is not None
491+
# Create the TRT decoder
492+
try:
493+
decoder = qec.get_decoder('trt_decoder', self.H)
494+
# If decoder is None or doesn't initialize properly, skip these tests
495+
if decoder is None:
496+
pytest.skip(
497+
"TRT decoder returned None - likely CUDA/GPU unavailable")
498+
except (RuntimeError, SystemError, Exception) as e:
499+
pytest.skip(
500+
f"Failed to create TRT decoder (GPU may be unavailable): {e}")
535501

536502

537-
@pytest.mark.skipif(IS_ARM, reason="ARM architecture not supported")
538503
class TestTRTDecoderFileOperations(TestTRTDecoderSetup):
539504
"""Tests for TRT decoder file loading operations."""
540505

@@ -561,10 +526,8 @@ def test_load_file_non_existent_file(self):
561526

562527

563528
@pytest.mark.skipif(
564-
not os.path.exists(ONNX_MODEL_PATH) or not CUDA_AVAILABLE or IS_ARM,
565-
reason=
566-
"ONNX model file not found, CUDA/GPU not available, or ARM architecture not supported"
567-
)
529+
not os.path.exists(ONNX_MODEL_PATH) or not CUDA_AVAILABLE,
530+
reason="ONNX model file not found or CUDA/GPU not available")
568531
class TestTRTDecoderInference(TestTRTDecoderSetup):
569532
"""Tests for TRT decoder inference with actual model.
570533
@@ -720,7 +683,6 @@ def test_decoder_batch_processing(self):
720683
assert error < TOLERANCE, f"Batch test case {i} failed with error {error}"
721684

722685

723-
@pytest.mark.skipif(IS_ARM, reason="ARM architecture not supported")
724686
class TestTRTDecoderEdgeCases(TestTRTDecoderSetup):
725687
"""Tests for TRT decoder edge cases. Requires GPU access."""
726688

0 commit comments

Comments
 (0)