NVIDIA
diff --git a/‎.github/workflows/nightly.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/nightly.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎build.sh‎
Lines changed: 33 additions & 3 deletions b/‎build.sh‎
Lines changed: 33 additions & 3 deletions
diff --git a/‎cpp/CMakeLists.txt‎
Lines changed: 29 additions & 2 deletions b/‎cpp/CMakeLists.txt‎
Lines changed: 29 additions & 2 deletions
diff --git a/‎cpp/include/cuopt/error.hpp‎
Lines changed: 1 addition & 3 deletions b/‎cpp/include/cuopt/error.hpp‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎cpp/include/cuopt/linear_programming/utilities/internals.hpp‎
Lines changed: 2 additions & 1 deletion b/‎cpp/include/cuopt/linear_programming/utilities/internals.hpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cpp/src/dual_simplex/barrier.cu‎
Lines changed: 45 additions & 13 deletions b/‎cpp/src/dual_simplex/barrier.cu‎
Lines changed: 45 additions & 13 deletions
diff --git a/‎cpp/src/linear_programming/pdlp.cu‎
Lines changed: 2 additions & 2 deletions b/‎cpp/src/linear_programming/pdlp.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cpp/src/linear_programming/utilities/cython_solve.cu‎
Lines changed: 2 additions & 3 deletions b/‎cpp/src/linear_programming/utilities/cython_solve.cu‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎cpp/src/linear_programming/utils.cuh‎
Lines changed: 4 additions & 4 deletions b/‎cpp/src/linear_programming/utils.cuh‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎cpp/src/mip/diversity/lns/rins.cu‎
Lines changed: 2 additions & 2 deletions b/‎cpp/src/mip/diversity/lns/rins.cu‎
Lines changed: 2 additions & 2 deletions
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 name: Trigger Nightly cuOpt Pipeline
@@ -17,7 +17,7 @@ jobs:
       matrix:
         cuopt_branch:
           - "main"
-          - "release/25.12"
+          - "release/26.02"
     steps:
       - uses: actions/checkout@v4
       - name: Trigger Pipeline
 
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -15,7 +15,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd)
 LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build}
 LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build}
 
-VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -v -l= --verbose-pdlp --build-lp-only  --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"<args>\\\"] [--cache-tool=<tool>] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help"
+VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only  --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"<args>\\\"] [--cache-tool=<tool>] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help"
 HELP="$0 [<target> ...] [<flag> ...]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
@@ -32,7 +32,9 @@ HELP="$0 [<target> ...] [<flag> ...]
    -g               - build for debug
    -a               - Enable assertion (by default in debug mode)
    -b               - Build with benchmark settings
-   -fsanitize       - Build with sanitizer
+   -fsanitize       - Build with AddressSanitizer and UndefinedBehaviorSanitizer
+   -tsan            - Build with ThreadSanitizer (cannot be used with -fsanitize or -msan)
+   -msan            - Build with MemorySanitizer (cannot be used with -fsanitize or -tsan)
    -n               - no install step
    --no-fetch-rapids  - don't fetch rapids dependencies
    -l=              - log level. Options are: TRACE | DEBUG | INFO | WARN | ERROR | CRITICAL | OFF. Default=INFO
@@ -76,6 +78,8 @@ BUILD_ALL_GPU_ARCH=0
 BUILD_CI_ONLY=0
 BUILD_LP_ONLY=0
 BUILD_SANITIZER=0
+BUILD_TSAN=0
+BUILD_MSAN=0
 SKIP_C_PYTHON_ADAPTERS=0
 SKIP_TESTS_BUILD=0
 SKIP_ROUTING_BUILD=0
@@ -230,6 +234,12 @@ fi
 if hasArg -fsanitize; then
     BUILD_SANITIZER=1
 fi
+if hasArg -tsan; then
+    BUILD_TSAN=1
+fi
+if hasArg -msan; then
+    BUILD_MSAN=1
+fi
 if hasArg --skip-c-python-adapters; then
     SKIP_C_PYTHON_ADAPTERS=1
 fi
@@ -298,6 +308,24 @@ if [ ${BUILD_LP_ONLY} -eq 1 ] && [ ${SKIP_C_PYTHON_ADAPTERS} -eq 0 ]; then
     exit 1
 fi
 
+if [ ${BUILD_SANITIZER} -eq 1 ] && [ ${BUILD_TSAN} -eq 1 ]; then
+    echo "ERROR: -fsanitize and -tsan cannot be used together"
+    echo "AddressSanitizer and ThreadSanitizer are mutually exclusive"
+    exit 1
+fi
+
+if [ ${BUILD_SANITIZER} -eq 1 ] && [ ${BUILD_MSAN} -eq 1 ]; then
+    echo "ERROR: -fsanitize and -msan cannot be used together"
+    echo "AddressSanitizer and MemorySanitizer are mutually exclusive"
+    exit 1
+fi
+
+if [ ${BUILD_TSAN} -eq 1 ] && [ ${BUILD_MSAN} -eq 1 ]; then
+    echo "ERROR: -tsan and -msan cannot be used together"
+    echo "ThreadSanitizer and MemorySanitizer are mutually exclusive"
+    exit 1
+fi
+
 if  [ ${BUILD_ALL_GPU_ARCH} -eq 1 ]; then
     CUOPT_CMAKE_CUDA_ARCHITECTURES="RAPIDS"
     echo "Building for *ALL* supported GPU architectures..."
@@ -344,6 +372,8 @@ if buildAll || hasArg libcuopt; then
           -DFETCH_RAPIDS=${FETCH_RAPIDS} \
           -DBUILD_LP_ONLY=${BUILD_LP_ONLY} \
           -DBUILD_SANITIZER=${BUILD_SANITIZER} \
+          -DBUILD_TSAN=${BUILD_TSAN} \
+          -DBUILD_MSAN=${BUILD_MSAN} \
           -DSKIP_C_PYTHON_ADAPTERS=${SKIP_C_PYTHON_ADAPTERS} \
           -DBUILD_TESTS=$((1 - ${SKIP_TESTS_BUILD})) \
           -DSKIP_ROUTING_BUILD=${SKIP_ROUTING_BUILD} \
 
@@ -79,10 +79,33 @@ endif(CMAKE_COMPILER_IS_GNUCXX)
 # 1. Run the binary with env var set: LD_PRELOAD="$(gcc -print-file-name=libasan.so)" ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0'
 # 2. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0
 if(BUILD_SANITIZER)
-  list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g -Wno-error=maybe-uninitialized)
+  list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g)
+  if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    list(APPEND CUOPT_CXX_FLAGS -Wno-error=maybe-uninitialized)
+  endif()
   add_link_options(-fsanitize=address,undefined)
 endif(BUILD_SANITIZER)
 
+# To use ThreadSanitizer:
+# 1. Build with clang and the -tsan flag
+# 2. Run the binary with env var set: OMP_TOOL_LIBRARIES=/usr/lib/llvm-17/lib/libarcher.so ARCHER_OPTIONS='verbose=1' TSAN_OPTIONS='suppresions=cpp/utilities/tsan_suppressions.txt:ignore_noninstrumented_modules=1:halt_on_error=1'
+#     Replace with local llvm install path. libarcher.so must be presetn
+if(BUILD_TSAN)
+  message(STATUS "Building with ThreadSanitizer enabled")
+  list(APPEND CUOPT_CXX_FLAGS -fsanitize=thread -fno-omit-frame-pointer -g)
+  add_link_options(-fsanitize=thread)
+endif(BUILD_TSAN)
+
+# To use MemorySanitizer:
+# 1. Build with clang and the -msan flag (MemorySanitizer requires clang)
+# 2. Run the binary with env var set: MSAN_OPTIONS='halt_on_error=1'
+# Note: MemorySanitizer requires all code (including libraries) to be instrumented for accurate results
+if(BUILD_MSAN)
+  message(STATUS "Building with MemorySanitizer enabled")
+  list(APPEND CUOPT_CXX_FLAGS -fsanitize=memory -fno-omit-frame-pointer -g -fsanitize-memory-track-origins=1)
+  add_link_options(-fsanitize=memory)
+endif(BUILD_MSAN)
+
 if(DEFINE_ASSERT)
   add_definitions(-DASSERT_MODE)
 endif(DEFINE_ASSERT)
@@ -116,7 +139,11 @@ if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9)
   set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -static-global-template-stub=false")
 endif()
 list(APPEND CUOPT_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xcompiler=-Werror --default-stream=per-thread)
-list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall -Wno-error=non-template-friend)
+if("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+  list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall)
+else()
+  list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall -Wno-error=non-template-friend)
+endif()
 list(APPEND CUOPT_CUDA_FLAGS -Xfatbin=-compress-all)
 if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13.0)
   list(APPEND CUOPT_CUDA_FLAGS -Xfatbin=--compress-level=3)
 
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -33,8 +33,6 @@ enum class error_type_t {
  */
 
 struct logic_error : public std::logic_error {
-  explicit logic_error() = default;
-
   logic_error(const logic_error& exception) = default;
 
   // Move constructor
 
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -62,6 +62,7 @@ namespace linear_programming {
 
 class base_solution_t {
  public:
+  virtual ~base_solution_t()  = default;
   virtual bool is_mip() const = 0;
 };
 
 
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -44,6 +44,48 @@ namespace cuopt::linear_programming::dual_simplex {
 
 auto constexpr use_gpu = true;
 
+// non-template wrappers to work around clang compiler bug
+[[maybe_unused]] static void pairwise_multiply(
+  float* a, float* b, float* out, int size, rmm::cuda_stream_view stream)
+{
+  cub::DeviceTransform::Transform(
+    cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream);
+}
+
+[[maybe_unused]] static void pairwise_multiply(
+  double* a, double* b, double* out, int size, rmm::cuda_stream_view stream)
+{
+  cub::DeviceTransform::Transform(
+    cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream);
+}
+
+[[maybe_unused]] static void axpy(
+  float alpha, float* x, float beta, float* y, float* out, int size, rmm::cuda_stream_view stream)
+{
+  cub::DeviceTransform::Transform(
+    cuda::std::make_tuple(x, y),
+    out,
+    size,
+    [alpha, beta] __host__ __device__(float a, float b) { return alpha * a + beta * b; },
+    stream);
+}
+
+[[maybe_unused]] static void axpy(double alpha,
+                                  double* x,
+                                  double beta,
+                                  double* y,
+                                  double* out,
+                                  int size,
+                                  rmm::cuda_stream_view stream)
+{
+  cub::DeviceTransform::Transform(
+    cuda::std::make_tuple(x, y),
+    out,
+    size,
+    [alpha, beta] __host__ __device__(double a, double b) { return alpha * a + beta * b; },
+    stream);
+}
+
 template <typename i_t, typename f_t>
 class iteration_data_t {
  public:
@@ -1404,12 +1446,7 @@ class iteration_data_t {
 
     // diag.pairwise_product(x1, r1);
     // r1 <- D * x_1
-    thrust::transform(handle_ptr->get_thrust_policy(),
-                      d_x1.data(),
-                      d_x1.data() + n,
-                      d_diag_.data(),
-                      d_r1.data(),
-                      thrust::multiplies<f_t>());
+    pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), n, stream_view_);
 
     // r1 <- Q x1 + D x1
     if (Q.n > 0) {
@@ -1419,12 +1456,7 @@ class iteration_data_t {
 
     // y1 <- - alpha * r1 + beta * y1
     // y1.axpy(-alpha, r1, beta);
-    thrust::transform(handle_ptr->get_thrust_policy(),
-                      d_r1.data(),
-                      d_r1.data() + n,
-                      d_y1.data(),
-                      d_y1.data(),
-                      axpy_op<f_t>{-alpha, beta});
+    axpy(-alpha, d_r1.data(), beta, d_y1.data(), d_y1.data(), n, stream_view_);
 
     // matrix_transpose_vector_multiply(A, alpha, x2, 1.0, y1);
     cusparse_view_.transpose_spmv(alpha, d_x2, 1.0, d_y1);
 
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -1511,7 +1511,7 @@ void pdlp_solver_t<i_t, f_t>::compute_initial_step_size()
 
     const auto& cusparse_view_ = pdhg_solver_.get_cusparse_view();
 
-    int sing_iters = 0;
+    [[maybe_unused]] int sing_iters = 0;
     for (int i = 0; i < max_iterations; ++i) {
       ++sing_iters;
       // d_q = d_z
 
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -309,8 +309,7 @@ std::pair<std::vector<std::unique_ptr<solver_ret_t>>, double> call_batch_solve(
 
 #pragma omp parallel for num_threads(max_thread)
   for (std::size_t i = 0; i < size; ++i)
-    list[i] =
-      std::move(call_solve(data_models[i], solver_settings, cudaStreamNonBlocking, is_batch_mode));
+    list[i] = call_solve(data_models[i], solver_settings, cudaStreamNonBlocking, is_batch_mode);
 
   auto end      = std::chrono::high_resolution_clock::now();
   auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start_solver);
 
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -62,9 +62,9 @@ struct max_abs_value {
 template <typename i_t>
 i_t conditional_major(uint64_t total_pdlp_iterations)
 {
-  uint64_t step      = 10;
-  uint64_t threshold = 1000;
-  uint64_t iteration = 0;
+  uint64_t step                       = 10;
+  uint64_t threshold                  = 1000;
+  [[maybe_unused]] uint64_t iteration = 0;
 
   [[maybe_unused]] constexpr uint64_t max_u64 = std::numeric_limits<uint64_t>::max();
 
 
@@ -262,8 +262,8 @@ void rins_t<i_t, f_t>::run_rins()
   branch_and_bound_settings.num_diving_threads = 1;
   branch_and_bound_settings.log.log            = false;
   branch_and_bound_settings.log.log_prefix     = "[RINS] ";
-  branch_and_bound_settings.solution_callback  = [this, &rins_solution_queue](
-                                                  std::vector<f_t>& solution, f_t objective) {
+  branch_and_bound_settings.solution_callback  = [&rins_solution_queue](std::vector<f_t>& solution,
+                                                                       f_t objective) {
     rins_solution_queue.push_back(solution);
   };
   dual_simplex::branch_and_bound_t<i_t, f_t> branch_and_bound(branch_and_bound_problem,