Fix sample_async noise model lifetime and isolation (#3857)

huaweil-nv · 1tnguyen · khalatepradnya · web-flow · commit d9703ea23439 · 2026-02-24T04:54:59.000Z
### Description  ## Summary This PR fixes `cudaq.sample_async(..., noise_model=...)` for local simulators by ensuring the noise model is applied correctly during asynchronous execution and does not leak into subsequent calls. ## Root cause - The async implementation could reference a noise model whose lifetime ended before the queued task executed. - Noise configuration was not scoped to the async task lifetime, which could cause state pollution. - Noise set/reset needed to be applied per-QPU (using the provided `qpu_id`). ## Fix - Extend `details::runSamplingAsync` to accept an optional noise model and capture it by value inside the async task. - Set the noise model at the start of the async task and reset it on completion (including exception paths) to prevent state leakage. - Apply set/reset per-QPU using `qpu_id`. - Reject non-empty noise models on remote platforms with a clear error. - Update Python binding-side remote checks to respect the provided `qpu_id`. ## Tests Added regression tests in `python/tests/builder/test_NoiseModel.py`: - `test_sample_async_with_noise` - `test_sample_async_noise_isolation` ## How to repro 1. ``` import cudaq cudaq.set_target("density-matrix-cpu") cudaq.set_random_seed(42) k = cudaq.make_kernel() q = k.qalloc() k.x(q) k.mz(q) noise = cudaq.NoiseModel() noise.add_channel("x", [0], cudaq.DepolarizationChannel(1.0)) # Noise should be visible in async result. noisy = cudaq.sample_async(k, shots_count=1000, noise_model=noise).get() print("async noisy:", noisy) # Subsequent calls without noise must remain clean (no state pollution). clean = cudaq.sample(k, shots_count=200) print("after clean:", clean) assert clean.count("1") == 200 ``` 2. does not always occur ``` import cudaq, gc cudaq.set_target("density-matrix-cpu") cudaq.set_random_seed(42) k = cudaq.make_kernel() q = k.qalloc() k.x(q) k.mz(q) def launch_once(): # Noise model is intentionally scoped to this function. noise = cudaq.NoiseModel() noise.add_channel("x", [0], cudaq.DepolarizationChannel(1.0)) fut = cudaq.sample_async(k, shots_count=200, noise_model=noise) return fut futs = [launch_once() for _ in range(200)] gc.collect() # Try to encourage destruction of temporaries for f in futs: _ = f.get() print("done") ``` --------- Signed-off-by: huaweil <huaweil@nvidia.com> Co-authored-by: Thien Nguyen <58006629+1tnguyen@users.noreply.github.com> Co-authored-by: Pradnya Khalate <148914294+khalatepradnya@users.noreply.github.com>
diff --git a/python/runtime/cudaq/algorithms/py_sample_async.cpp b/python/runtime/cudaq/algorithms/py_sample_async.cpp
@@ -31,17 +31,22 @@ static async_sample_result sample_async_impl(
   std::string kernelName = shortName;
   auto retTy = unwrap(returnTy);
   auto &platform = get_platform();
-  if (noise_model.has_value()) {
-    if (platform.is_remote())
-      throw std::runtime_error(
-          "Noise model is not supported on remote platforms.");
-    platform.set_noise(&noise_model.value());
-  }
+
+  // Check remote platform restriction for noise model.
+  if (noise_model.has_value() && platform.is_remote(qpu_id))
+    throw std::runtime_error(
+        "Noise model is not supported on remote platforms.");
+
   auto fnOp = getKernelFuncOp(mod, shortName);
   auto opaques = marshal_arguments_for_module_launch(mod, runtimeArgs, fnOp);
 
   // Should only have C++ going on here, safe to release the GIL
   py::gil_scoped_release release;
+
+  // Use runSamplingAsync with noise model support.
+  // The noise_model is passed by value to runSamplingAsync, which captures
+  // it in the async task to ensure proper lifetime and handles setting/
+  // resetting it to avoid dangling pointers and global state pollution.
   return details::runSamplingAsync(
       // Notes:
       // (1) no Python data access is allowed in this lambda body.
@@ -52,7 +57,8 @@ static async_sample_result sample_async_impl(
         [[maybe_unused]] auto result =
             clean_launch_module(kernelName, mod, retTy, opaques);
       }),
-      platform, kernelName, shots_count, explicit_measurements, qpu_id);
+      platform, kernelName, shots_count, explicit_measurements, qpu_id,
+      std::move(noise_model));
 }
 
 void cudaq::bindSampleAsync(py::module &mod) {
diff --git a/python/tests/builder/test_NoiseModel.py b/python/tests/builder/test_NoiseModel.py
@@ -1073,6 +1073,112 @@ def kraus_mats(error_probability):
     cudaq.reset_target()
 
 
+@pytest.mark.parametrize('target', ['density-matrix-cpu', 'stim'])
+def test_sample_async_with_noise(target: str):
+    """
+    Tests that `cudaq.sample_async` correctly applies the noise model
+    and does not pollute subsequent calls.
+    
+    This test verifies the fix for the bug where:
+    1. Noise model was set but never reset (state pollution)
+    2. Noise model pointer became dangling after function return
+    3. Noise model was not correctly applied in async execution
+    """
+    cudaq.set_target(target)
+    cudaq.set_random_seed(42)
+
+    # Create a simple kernel that applies X gate (should give |1>)
+    kernel = cudaq.make_kernel()
+    qubit = kernel.qalloc()
+    kernel.x(qubit)
+    kernel.mz(qubit)
+
+    # Create a depolarizing noise model with high probability
+    noise = cudaq.NoiseModel()
+    depol = cudaq.DepolarizationChannel(0.9)  # 90% depolarization
+    noise.add_channel("x", [0], depol)
+
+    # Step 1: Baseline - sample without noise should give 100% |1>
+    clean_result = cudaq.sample(kernel, shots_count=100)
+    assert clean_result.count('1') == 100, "Baseline should be 100% |1>"
+
+    # Step 2: sample_async WITH noise should produce mixed results
+    future = cudaq.sample_async(kernel, shots_count=1000, noise_model=noise)
+    noisy_result = future.get()
+    # With 90% depolarization, we expect significant noise
+    assert noisy_result.count(
+        '0') > 0, "Noisy sample_async should have some |0>"
+    assert noisy_result.count(
+        '1') > 0, "Noisy sample_async should have some |1>"
+
+    # Step 3: Sample WITHOUT noise after async call - should NOT be polluted
+    clean_after = cudaq.sample(kernel, shots_count=100)
+    assert clean_after.count('1') == 100, \
+        "Sample after sample_async should not be polluted by noise model"
+
+    # Step 4: Another sample_async WITHOUT noise - should be clean
+    future_clean = cudaq.sample_async(kernel, shots_count=100)
+    clean_async_result = future_clean.get()
+    assert clean_async_result.count('1') == 100, \
+        "sample_async without noise should be 100% |1>"
+
+    cudaq.reset_target()
+
+
+@pytest.mark.parametrize('target', ['density-matrix-cpu'])
+def test_sample_async_noise_isolation(target: str):
+    """
+    Tests that multiple sample_async calls with different noise models
+    are properly isolated from each other.
+    """
+    cudaq.set_target(target)
+    cudaq.set_random_seed(13)
+
+    kernel = cudaq.make_kernel()
+    qubit = kernel.qalloc()
+    kernel.x(qubit)
+    kernel.mz(qubit)
+
+    # Create two different noise models
+    noise_high = cudaq.NoiseModel()
+    noise_high.add_channel("x", [0], cudaq.DepolarizationChannel(1.0))
+
+    noise_low = cudaq.NoiseModel()
+    noise_low.add_channel("x", [0], cudaq.DepolarizationChannel(0.1))
+
+    # Run multiple async calls with different noise models
+    future_high = cudaq.sample_async(kernel,
+                                     shots_count=1000,
+                                     noise_model=noise_high)
+    future_low = cudaq.sample_async(kernel,
+                                    shots_count=1000,
+                                    noise_model=noise_low)
+    future_none = cudaq.sample_async(kernel, shots_count=100)
+
+    # Get results
+    result_high = future_high.get()
+    result_low = future_low.get()
+    result_none = future_none.get()
+
+    # With DepolarizationChannel(p=1.0) applied after an X gate, the channel is
+    # (1-p)I + p/3 (X, Y, Z). Starting from |1>, this yields P(|0>) = 2/3.
+    # Allow a generous tolerance to avoid flakiness from finite-shot sampling.
+    high_zero_prob = result_high.probability('0')
+    assert 0.55 < high_zero_prob < 0.80, \
+        f"High noise should give P(|0>) ~ 2/3, got {high_zero_prob}"
+
+    # Low noise should have mostly |1>
+    low_one_prob = result_low.probability('1')
+    assert low_one_prob > 0.8, \
+        f"Low noise should give >80% |1>, got {low_one_prob}"
+
+    # No noise should be 100% |1>
+    assert result_none.count('1') == 100, \
+        "No noise should give 100% |1>"
+
+    cudaq.reset_target()
+
+
 INVALID_PROBABILITY_MSG = (r"probability must be in the range|"
                            r"not completely positive|trace preserving")
 
diff --git a/runtime/cudaq/algorithms/sample.h b/runtime/cudaq/algorithms/sample.h
@@ -88,7 +88,7 @@ runSampling(KernelFunctor &&wrappedKernel, quantum_platform &platform,
   }
 #endif
 
-  // Indicate that this is an async exec
+  // Indicate that this is an asynchronous execution.
   ctx.asyncExec = futureResult != nullptr;
 
   auto isRemoteSimulator = platform.get_remote_capabilities().isRemoteSimulator;
@@ -133,35 +133,77 @@ runSampling(KernelFunctor &&wrappedKernel, quantum_platform &platform,
 /// arguments and invokes the quantum kernel) and invoke the sampling process
 /// asynchronously. Return an `async_sample_result`, clients can retrieve the
 /// results at a later time via the `get()` call.
+///
+/// @param wrappedKernel The kernel functor to execute.
+/// @param platform The quantum platform to use.
+/// @param kernelName The name of the kernel.
+/// @param shots The number of shots to run.
+/// @param explicitMeasurements Whether to use explicit measurements.
+/// @param qpu_id The QPU ID to use.
+/// @param noise The optional noise model to apply during execution. The noise
+///              model is copied into the asynchronous task to ensure proper
+///              lifetime.
 template <typename KernelFunctor>
 auto runSamplingAsync(KernelFunctor &&wrappedKernel, quantum_platform &platform,
                       const std::string &kernelName, int shots,
-                      bool explicitMeasurements = false,
-                      std::size_t qpu_id = 0) {
+                      bool explicitMeasurements = false, std::size_t qpu_id = 0,
+                      std::optional<noise_model> noise = std::nullopt) {
   if (qpu_id >= platform.num_qpus()) {
     throw std::invalid_argument("Provided qpu_id " + std::to_string(qpu_id) +
                                 " is invalid (must be < " +
                                 std::to_string(platform.num_qpus()) +
                                 " i.e. platform.num_qpus())");
   }
 
+  // Treat an empty noise model as "no noise".
+  const bool hasNoise = noise.has_value() && !noise->empty();
+
   // If we are remote, then create the sampling executor with `cudaq::future`
-  // provided
+  // provided. Note: noise model is not supported on remote platforms.
   if (platform.is_remote(qpu_id)) {
+    if (hasNoise)
+      throw std::runtime_error(
+          "Noise model is not supported on remote platforms.");
     details::future futureResult;
     details::runSampling(std::forward<KernelFunctor>(wrappedKernel), platform,
                          kernelName, shots, explicitMeasurements, qpu_id,
                          &futureResult);
     return async_sample_result(std::move(futureResult));
   }
 
-  // Otherwise we'll create our own future/promise and return it
+  // For local platforms, create an asynchronous task that properly handles the
+  // noise model lifecycle:
+  // 1. Capture noise model BY VALUE in the task (extends lifetime)
+  // 2. Set noise model at the START of the task (before
+  // configureExecutionContext)
+  // 3. Reset noise model at the END of the task (including on exception)
+  // This avoids dangling pointers and global state pollution.
   KernelExecutionTask task(
       [qpu_id, explicitMeasurements, shots, kernelName, &platform,
+       noise = std::move(noise),
        kernel = std::forward<KernelFunctor>(wrappedKernel)]() mutable {
-        return details::runSampling(kernel, platform, kernelName, shots,
-                                    explicitMeasurements, qpu_id)
-            .value();
+        const bool hasNoise = noise.has_value() && !noise->empty();
+
+        // Set noise model before execution if provided.
+        if (hasNoise)
+          platform.set_noise(&noise.value(), qpu_id);
+
+        std::optional<sample_result> result;
+        try {
+          result = details::runSampling(kernel, platform, kernelName, shots,
+                                        explicitMeasurements, qpu_id);
+        } catch (...) {
+          // Ensure noise model is reset even on exception.
+          if (hasNoise)
+            platform.reset_noise(qpu_id);
+          throw;
+        }
+
+        // Reset noise model after execution.
+        if (hasNoise)
+          platform.reset_noise(qpu_id);
+
+        return result.value();
       });
 
   return async_sample_result(
@@ -380,17 +422,16 @@ async_sample_result sample_async(const sample_options &options,
   }
   auto &platform = cudaq::get_platform();
   auto kernelName = cudaq::getKernelName(kernel);
-  if (!options.noise.empty())
-    platform.set_noise(&options.noise);
 
-  auto ret = details::runSamplingAsync(
+  // Pass the noise model (copied by value) to runSamplingAsync, which will
+  // set/reset it within the asynchronous task to avoid dangling pointers and
+  // state pollution.
+  return details::runSamplingAsync(
       [&kernel, ... args = std::forward<Args>(args)]() mutable {
         kernel(std::forward<Args>(args)...);
       },
       platform, kernelName, options.shots, options.explicit_measurements,
-      qpu_id);
-  platform.reset_noise();
-  return ret;
+      qpu_id, options.noise);
 }
 
 /// @brief Sample the given kernel expression asynchronously and return