diff --git a/.github/workflows/config/spelling_allowlist.txt b/.github/workflows/config/spelling_allowlist.txt
index e5d7558dcc5..54f94cfaaa3 100644
--- a/.github/workflows/config/spelling_allowlist.txt
+++ b/.github/workflows/config/spelling_allowlist.txt
@@ -74,7 +74,6 @@ MyST
 NGC
 NVIDIA
 NVLink
-NVQC
 NVQIR
 OPX
 OQC
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index ef2a8451349..f6c80b4412d 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -26,7 +26,6 @@ on:
           - pasqal
           - qci
           - quantinuum
-          - nvqc
       single_test_name:
         type: string
         required: false
@@ -44,11 +43,6 @@ on:
         type: string
         required: false
         description: 'Commit SHA to pull the code (examples/tests) for testing. Default to the commit associated with the CUDA Quantum docker image if left blank'
-      cudaq_nvqc_deploy_image:
-        type: string
-        required: false
-        default: '' # same as cudaq_test_image if not provided
-        description: 'CUDA Quantum image to use for NVQC deployment to NVCF. Default to the latest CUDA Quantum nightly image'
       workflow_id:
         type: string
         required: false
@@ -66,11 +60,6 @@ on:
     - cron: 0 3 * * *
 
 env:
-  NGC_QUANTUM_ORG: pnyjrcojiblh
-  NGC_QUANTUM_TEAM: cuda-quantum
-  NVQC_FUNCTION_ID: 3bfa0342-7d2a-4f1b-8e81-b6608d28ca7d
-  # <Backend>:<GPU Type>:<Instance Type>:<Min Instances>:<Max Instances>
-  NGC_NVQC_DEPLOYMENT_SPEC: GFN:L40S:gl40s_1.br25_2xlarge:1:1
   python_version: '3.12'
 
 jobs:
@@ -88,7 +77,6 @@ jobs:
 
     outputs:
       cudaq_test_image: ${{ steps.vars.outputs.cudaq_nightly_image }}@${{ steps.test_image.outputs.digest }}
-      cudaq_nvqc_deploy_image: ${{ inputs.cudaq_nvqc_deploy_image || format('{0}@{1}', steps.vars.outputs.cudaq_nightly_image, steps.test_image.outputs.digest) }}
 
     steps:
       - name: Set variables
@@ -174,135 +162,6 @@ jobs:
             echo "sha=$(cat $CUDA_QUANTUM_PATH/build_info.txt | grep -o 'source-sha: \S*' | cut -d ' ' -f 2)" >> $GITHUB_OUTPUT
           fi
 
-  build_nvqc_image:
-    name: Build NVQC deployment image
-    runs-on: ubuntu-latest
-    if: (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly')
-    needs: [setup, metadata]
-    permissions:
-      contents: read
-      packages: write
-
-    environment: ghcr-deployment
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ needs.metadata.outputs.cudaq_commit }}
-          fetch-depth: 1
-
-      - name: Set up context for buildx
-        run: |
-          docker context create builder_context
-
-      - name: Set up buildx runner
-        uses: docker/setup-buildx-action@v3
-        with:
-          endpoint: builder_context
-          version: v0.19.0
-          driver-opts: |
-            image=moby/buildkit:v0.19.0
-
-      - name: Login to NGC container registry
-        uses: docker/login-action@v3
-        with:
-          registry: nvcr.io
-          username: '$oauthtoken'
-          password: ${{ secrets.NGC_CREDENTIALS }}
-
-      # Log in to GHCR (in case the base image is a local one)
-      - name: Log in to the GitHub container registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ github.token }}
-
-      - name: Build NVQC image
-        id: docker_build
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          file: ./docker/release/cudaq.nvqc.Dockerfile
-          build-args: |
-            base_image=${{ needs.setup.outputs.cudaq_nvqc_deploy_image }}
-          tags: nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/${{ vars.packages_prefix }}cuda-quantum:nightly
-          platforms: linux/amd64
-          provenance: false
-          push: true
-
-  deploy_nvqc_test_function:
-    name: Deploy NVQC function
-    runs-on: ubuntu-latest
-    needs: [metadata, build_nvqc_image]
-    if: (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly')
-    permissions:
-      contents: read
-
-    # Must have environment protection
-    environment: ghcr-deployment
-
-    outputs:
-      nvqc_function_version_id: ${{ steps.deploy.outputs.nvqc_function_version_id }}
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ needs.metadata.outputs.cudaq_commit }}
-          fetch-depth: 1
-
-      - name: Install NGC CLI
-        uses: ./.github/actions/install-ngc-cli
-        with:
-          version: 3.38.0
-          checksum: 427c67684d792b673b63882a6d0cbb8777815095c0f2f31559c1570a91187388
-
-      - name: Deploy NVQC Function
-        id: deploy
-        env:
-          NGC_CLI_API_KEY: ${{ secrets.NGC_CREDENTIALS }}
-          NGC_CLI_ORG: ${{ env.NGC_QUANTUM_ORG }}
-          NGC_CLI_TEAM: cuda-quantum
-        # When a new REST version is introduced, NVQC_REST_PAYLOAD_VERSION needs to be updated in lockstep with the new nightly CUDA Quantum image.
-        # Otherwise, deployment of the test function will fail.
-        run: |
-          # We run with CUDAQ_SER_CODE_EXEC set. The final NVQC deployment may
-          # or may not have this set, but since we run the client with
-          # CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 (below), we need to run
-          # the CI with CUDAQ_SER_CODE_EXEC=1. If we ever remove
-          # CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 below, we can consider
-          # removing CUDAQ_SER_CODE_EXEC=1.
-          create_function_result=$(ngc-cli/ngc cloud-function function create \
-            --container-image nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/cuda-quantum:nightly \
-            --container-environment-variable NUM_GPUS:1 \
-            --container-environment-variable NVQC_REST_PAYLOAD_VERSION:1.1 \
-            --container-environment-variable RUN_AS_NOBODY:1 \
-            --container-environment-variable CUDAQ_SER_CODE_EXEC:1 \
-            --api-body-format CUSTOM \
-            --inference-port 3030 \
-            --health-uri / \
-            --inference-url /job \
-            --name cudaq-nightly-integration-test \
-            $NVQC_FUNCTION_ID)
-          version_id=$(echo "$create_function_result" | grep 'Version: \S*' | head -1 | cut -d ':' -f 2 | tr -d ' ')
-          echo "Create version Id: $version_id"
-          echo "nvqc_function_version_id=$version_id" >> $GITHUB_OUTPUT
-          # Deploy it
-          ngc-cli/ngc cloud-function function deploy create --deployment-specification $NGC_NVQC_DEPLOYMENT_SPEC $NVQC_FUNCTION_ID:$version_id
-          function_status=DEPLOYING
-          while [ "$function_status" = "DEPLOYING" ]; do
-            echo "Waiting for deploying NVQC function version $version_id ..."
-            sleep 120
-            function_info=$(ngc-cli/ngc cloud-function function info $NVQC_FUNCTION_ID:$version_id)
-            function_status=$(echo "$function_info" | grep 'Status: \S*' | head -1 | cut -d ':' -f 2 | tr -d ' ')
-          done
-          if [ "$function_status" != "ACTIVE" ]; then
-            echo "::error:: Failed to deploy NVQC Test Function"
-            exit 1
-          fi
-
   # Setup job to determine which providers to test
   provider_matrix_setup:
     name: Setup provider matrix
@@ -813,348 +672,3 @@ jobs:
           # Clean up
           rm -f "$HOME/.anyon_config" "$HOME/.quantinuum_config"
         shell: bash
-
-  nvqc_integration_docker_test:
-    name: NVQC integration test using Docker image
-    runs-on: ubuntu-latest
-    if: (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly')
-    needs: [setup, metadata, build_nvqc_image, deploy_nvqc_test_function]
-    permissions:
-      contents: read
-      packages: read
-
-    # Must have environment protection
-    environment:
-      name: ghcr-deployment
-      url: ${{ vars.deployment_url }}
-
-    container:
-      image: ${{ needs.setup.outputs.cudaq_test_image }}
-      options: --user root
-      credentials:
-        username: ${{ github.actor }}
-        password: ${{ github.token }}
-
-    steps:
-      - name: Skip NVQC Docker tests (temporary)
-        id: skip_check
-        run: |
-          echo "### Submit to NVQC" >> $GITHUB_STEP_SUMMARY
-          echo ":warning: NVQC Docker integration tests are temporarily skipped" >> $GITHUB_STEP_SUMMARY
-          echo "::warning::NVQC Docker integration tests are temporarily skipped"
-          echo "skipped=true" >> $GITHUB_OUTPUT
-        shell: bash
-
-      - name: Get code
-        if: steps.skip_check.outputs.skipped != 'true'
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ needs.metadata.outputs.cudaq_commit }}
-          fetch-depth: 1
-
-      - name: Submit to NVQC
-        run: |
-          echo "### Submit to NVQC" >> $GITHUB_STEP_SUMMARY
-          export NVQC_API_KEY="${{ secrets.NVQC_SERVICE_KEY }}"
-          export NVQC_FUNCTION_ID="$NVQC_FUNCTION_ID"
-          export NVQC_FUNCTION_VERSION_ID="${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}"
-          # When overriding the NVQC_FUNCTION_ID to a function that doesn't
-          # follow the production naming convenvtions, we need to set the
-          # following environment variable to tell the client that the server
-          # has all the remote capabilities.
-          export CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1
-          set +e # Allow script to keep going through errors
-          test_err_sum=0
-          # Test all NVQPP execution tests
-          for filename in `find targettests/execution/ -name '*.cpp'`; do
-            echo "$filename"
-            # Only run tests that require execution (not a syntax-only check)
-            if grep -q "ifndef SYNTAX_CHECK" "$filename"; then
-              nvq++ -v $filename --target nvqc
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                ./a.out
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              else
-                echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY
-                test_err_sum=$((test_err_sum+1))
-              fi
-            fi
-          done
-
-          # Test all remote-sim tests
-          for filename in `find targettests/Remote-Sim -name '*.cpp'`; do
-            # unsupport_args is compile error test
-            # qvector_init_from_state, qvector_init_from_state_lazy, test_trotter: New argument synthesis is not executed for nvqc (https://github.com/NVIDIA/cuda-quantum/issues/2146)
-            if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]] && [[ "$filename" != *"qvector_init_from_state_lazy"* ]] && [[ "$filename" != *"test_trotter"* ]]; then
-              echo "$filename"
-              nvqc_config=""
-              # Look for a --remote-mqpu-auto-launch to determine the number of QPUs
-              num_qpus=`cat $filename | grep -oP -m 1 '^//\s*RUN:\s*nvq++.+--remote-mqpu-auto-launch\s+\K\S+'`
-              if [ -n "$num_qpus" ]; then
-                echo "Intended to run on '$num_qpus' QPUs."
-                nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus"
-              fi
-              nvq++ -v $filename --target nvqc $nvqc_config
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                ./a.out
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              else
-                  echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-              fi
-            fi
-          done
-
-          # Test C++ examples with NVQC
-          for filename in `find examples/cpp/ applications/cpp/ targets/cpp/ -name '*.cpp'`; do
-            if [[ "$filename" == *"nvqc"* ]]; then
-              echo "$filename"
-              nvqc_config=""
-              # Look for a --nvqc-backend flag to nvq++ in the comment block
-              nvqc_backend=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-backend\s+\K\S+'`
-              if [ -n "$nvqc_backend" ]; then
-                echo "Intended for execution on '$nvqc_backend' backend."
-                nvqc_config="$nvqc_config --nvqc-backend $nvqc_backend"
-              fi
-              # Look for a --nvqc-nqpus flag to nvq++ in the comment block
-              num_qpus=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-nqpus\s+\K\S+'`
-              if [ -n "$num_qpus" ]; then
-                echo "Intended to run on '$num_qpus' QPUs."
-                nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus"
-              fi
-              nvq++ -v $filename --target nvqc $nvqc_config
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                ./a.out
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              else
-                  echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-              fi
-            fi
-          done
-
-          # Test NVQC Python examples + Python MLIR execution tests (not IR tests)
-          python3 -m pip install pytest
-          for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do
-            filename=$(basename -- "$ex")
-            filename="${filename%.*}"
-            echo "Testing $filename:"
-            if [[ "$ex" == *"nvqc"* ]]; then
-              # This is an NVQC example
-              python3 $ex 1> /dev/null
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-              else
-                echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                test_err_sum=$((test_err_sum+1))
-              fi
-            # building_kernels.py is disabled due to https://github.com/NVIDIA/cuda-quantum/issues/2299.
-            elif [[ "$ex" != *"building_kernels"* ]]; then
-              # Only run examples that are not target-specific (e.g., ionq, iqm)
-              if ! grep -q "set_target" "$ex"; then
-                # Use --target command line option to run these examples with nvqc
-                python3 $ex --target nvqc 1> /dev/null
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              fi
-            fi
-          done
-
-          set -e # Re-enable exit code error checking
-          if [ ! $test_err_sum -eq 0 ]; then
-            echo "::error::${test_err_sum} tests failed. See step summary for a list of failures"
-            exit 1
-          fi
-        shell: bash
-
-  nvqc_integration_wheel_test:
-    name: NVQC integration test using Python wheels
-    runs-on: ubuntu-latest
-    if: inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly'
-    needs: [metadata, build_nvqc_image, deploy_nvqc_test_function]
-    permissions:
-      contents: read
-
-    # Must have environment protection
-    environment: ghcr-deployment
-
-    steps:
-      - name: Skip NVQC wheel tests (temporary)
-        id: skip_check
-        run: |
-          echo "### Submit to NVQC from Python wheels" >> $GITHUB_STEP_SUMMARY
-          echo ":warning: NVQC Python wheel integration tests are temporarily skipped" >> $GITHUB_STEP_SUMMARY
-          echo "::warning::NVQC Python wheel integration tests are temporarily skipped"
-          echo "skipped=true" >> $GITHUB_OUTPUT
-        shell: bash
-
-      - name: Get code
-        if: steps.skip_check.outputs.skipped != 'true'
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ needs.metadata.outputs.cudaq_commit }}
-          fetch-depth: 1
-
-      - name: Install wheel
-        if: steps.skip_check.outputs.skipped != 'true'
-        id: install_wheel
-        run: |
-          python_version=${{ inputs.python_version || env.python_version }}
-          workflow_id=${{ inputs.workflow_id }}
-          # Helper to get the *valid* Publishing run Id for a commit hash
-          # Notes: runs that have 'CUDA-Q Python wheels' jobs skipped are not considered.
-          function get_publishing_run_id {
-            # Find all Publishing runs, we'll look into its jobs' status later
-            if [[ -z "$1" ]]; then
-              publishing_run_ids=$(gh run list --workflow Publishing --json databaseId --jq .[].databaseId)
-            else
-              publishing_run_ids=$(gh run list --commit $1 --workflow Publishing --json databaseId --jq .[].databaseId)
-            fi
-            for run_id in $publishing_run_ids ; do
-                # Look into its jobs: if "CUDA-Q Python wheels" matrix build was performed,
-                # then we have multiple jobs, like "CUDA-Q Python wheels (python_arm64....")
-                cuda_wheel_build_jobs=$(gh run view $run_id --jq '.jobs.[] | select(.name | startswith("CUDA-Q Python wheels (python_")).name' --json jobs)
-                if [ ! -z "$cuda_wheel_build_jobs" ]; then
-                  # This is a valid run that produces wheel artifacts
-                  echo $run_id
-                  break
-                fi
-            done
-          }
-
-          if [ -z "${workflow_id}" ]; then
-            workflow_id=$(get_publishing_run_id ${{ needs.metadata.outputs.cudaq_commit }})
-          fi
-          if [ ! -z "$workflow_id" ]; then
-            echo "Using artifacts from workflow id $workflow_id"
-            # Allow error when trying to download wheel artifacts since they might be expired.
-            set +e
-            gh run download $workflow_id --name "x86_64-cu12-py$python_version-wheels"
-            retVal=$?
-            set -e
-            if [ $retVal -ne 0 ]; then
-              echo "Failed to download wheels artifact from Publishing workflow run Id $workflow_id. Perhaps the artifacts have been expired."
-              # This is allowed since there might be a period where no Publishing workflow is run (e.g., no PR merged to main).
-              echo "skipped=true" >> $GITHUB_OUTPUT
-              exit 0
-            fi
-            python_version_filename=$(echo "${python_version//.}")
-            # Install Python and the wheel
-            apt-get update && apt-get install -y --no-install-recommends python$python_version python3-pip
-            wheelfile=$(find . -name "cuda_quantum_cu12*cp$python_version_filename*x86_64.whl")
-            python$python_version -m pip install $wheelfile
-            echo "skipped=false" >> $GITHUB_OUTPUT
-          else
-            echo "Failed to retrieve Publishing workflow run Id for commit ${{ needs.metadata.outputs.cudaq_commit }}"
-            exit 1
-          fi
-        env:
-          GH_TOKEN: ${{ github.token }}
-
-      - name: Test NVQC
-        if: ${{ steps.skip_check.outputs.skipped != 'true' || steps.install_wheel.outputs.skipped != 'true' }}
-        run: |
-          echo "### Submit to NVQC from Python wheels" >> $GITHUB_STEP_SUMMARY
-          python_version=${{ inputs.python_version || env.python_version }}
-          export NVQC_API_KEY="${{ secrets.NVQC_SERVICE_KEY }}"
-          export NVQC_FUNCTION_ID="$NVQC_FUNCTION_ID"
-          export NVQC_FUNCTION_VERSION_ID="${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}"
-          set +e # Allow script to keep going through errors
-          python$python_version -m pip install pytest
-          test_err_sum=0
-          for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do
-            filename=$(basename -- "$ex")
-            filename="${filename%.*}"
-            echo "Testing $filename:"
-            if [[ "$ex" == *"nvqc"* ]]; then
-              python$python_version $ex 1> /dev/null
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-              else
-                echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                test_err_sum=$((test_err_sum+1))
-              fi
-            # building_kernels.py is disabled due to https://github.com/NVIDIA/cuda-quantum/issues/2299.
-            elif [[ "$ex" != *"building_kernels"* ]]; then
-              # Only run examples that are not target-specific (e.g., ionq, iqm)
-              if ! grep -q "set_target" "$ex"; then
-                # Use --target command line option to run these examples with nvqc
-                python$python_version $ex --target nvqc 1> /dev/null
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              fi
-            fi
-          done
-          set -e # Re-enable exit code error checking
-          if [ ! $test_err_sum -eq 0 ]; then
-            echo "::error::${test_err_sum} tests failed. See step summary for a list of failures"
-            exit 1
-          fi
-
-  cleanup_nvqc_resources:
-    name: Cleanup NVQC resources
-    runs-on: ubuntu-latest
-    if: (success() || failure()) && (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly')
-    needs: [build_nvqc_image, deploy_nvqc_test_function, nvqc_integration_docker_test, nvqc_integration_wheel_test]
-    permissions:
-      contents: read
-
-    # Must have environment protection
-    environment: ghcr-deployment
-
-    steps:
-      - name: Get code
-        uses: actions/checkout@v4
-
-      - name: Install NGC CLI
-        uses: ./.github/actions/install-ngc-cli
-        with:
-          version: 3.38.0
-          checksum: 427c67684d792b673b63882a6d0cbb8777815095c0f2f31559c1570a91187388
-
-      - name: Cleanup
-        env:
-          NGC_CLI_API_KEY: ${{ secrets.NGC_CREDENTIALS }}
-          NGC_CLI_ORG: ${{ env.NGC_QUANTUM_ORG }}
-          NGC_CLI_TEAM: cuda-quantum
-        run: |
-          echo "Version Id: ${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}"
-          # Remove deployment (make it inactive)
-          ngc-cli/ngc cloud-function function deploy remove $NVQC_FUNCTION_ID:${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}
-          # Remove the function version
-          ngc-cli/ngc cloud-function function remove $NVQC_FUNCTION_ID:${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}
-          # Remove the docker image
-          ngc-cli/ngc registry image remove -y nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/cuda-quantum:nightly
diff --git a/.github/workflows/nvqc_regression_tests.yml b/.github/workflows/nvqc_regression_tests.yml
deleted file mode 100644
index 055eeb2e723..00000000000
--- a/.github/workflows/nvqc_regression_tests.yml
+++ /dev/null
@@ -1,356 +0,0 @@
-name: Nvqc regression tests
-
-concurrency:
-  group: ${{ github.workflow }}${{ github.event.workflow_run.name }}
-  cancel-in-progress: false
-
-# Run on request and every day at 3 AM UTC
-on:
-  workflow_dispatch:
-    inputs:
-      cudaq_test_image:
-        type: string
-        required: false
-        default: '' # picked up from repo variable if not provided
-        description: 'CUDA Quantum image to run the tests in. Default to the latest CUDA Quantum nightly image'
-      commit_sha:
-        type: string
-        required: false
-        description: 'Commit SHA to pull the code (examples/tests) for testing. Default to the commit associated with the CUDA Quantum docker image if left blank'
-      workflow_id:
-        type: string
-        required: false
-        description: 'Workflow Id to retrieve the Python wheel for testing. Default to the wheels produced by the Publishing workflow associated with the latest nightly CUDA Quantum Docker image if left blank'
-      python_version:
-        type: choice
-        required: true
-        description: 'Python version to run wheel test'
-        options:
-        - '3.11'
-        - '3.12'
-        - '3.13'
-
-  schedule:
-    - cron: 0 3 * * *
-
-env:
-  python_version: '3.12'
-
-jobs:
-  # We need this job purely to choose the container image values because the
-  # `env` context is unavailable outside of "steps" contexts.
-  setup:
-    name: Set variables
-    runs-on: ubuntu-latest
-    permissions: {}
-
-    outputs:
-      cudaq_test_image: ${{ steps.vars.outputs.cudaq_test_image }}
-
-    steps:
-      - name: Set variables
-        id: vars
-        run: |
-          echo "cudaq_test_image=${{ inputs.cudaq_test_image || vars.cudaq_test_image }}" >> $GITHUB_OUTPUT
-
-  metadata:
-    name: Retrieve commit info
-    runs-on: ubuntu-latest
-    needs: setup
-    permissions:
-      contents: read
-      packages: read
-
-    environment: backend-validation
-    container:
-      image: ${{ needs.setup.outputs.cudaq_test_image }}
-      options: --user root
-
-    outputs:
-      cudaq_commit: ${{ steps.commit-sha.outputs.sha }}
-
-    steps:
-      - name: Get commit SHA
-        id: commit-sha
-        run: |
-          if [ -n "${{ inputs.commit_sha }}" ]; then
-            echo "sha=${{ inputs.commit_sha }}" >> $GITHUB_OUTPUT
-          else
-            echo "sha=$(cat $CUDA_QUANTUM_PATH/build_info.txt | grep -o 'source-sha: \S*' | cut -d ' ' -f 2)" >> $GITHUB_OUTPUT
-          fi
-
-  nvqc_integration_docker_test:
-    name: NVQC integration test using Docker image
-    runs-on: ubuntu-latest
-    needs: [setup, metadata]
-    permissions:
-      contents: read
-      packages: read
-
-    # Must have environment protection
-    environment: backend-validation
-    container:
-      image: ${{ needs.setup.outputs.cudaq_test_image }}
-      options: --user root
-
-    steps:
-      - name: Get code
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ needs.metadata.outputs.cudaq_commit }}
-          fetch-depth: 1
-
-      - name: Submit to NVQC
-        run: |
-          echo "### Submit to NVQC" >> $GITHUB_STEP_SUMMARY
-          export NVQC_API_KEY="${{ secrets.NVQC_PROD_SERVICE_KEY }}"
-          set +e # Allow script to keep going through errors
-          test_err_sum=0
-          # Test all NVQPP execution tests
-          for filename in `find targettests/execution/ -name '*.cpp'`; do
-            echo "$filename"
-            # Only run tests that require execution (not a syntax-only check)
-            if grep -q "ifndef SYNTAX_CHECK" "$filename"; then
-              nvq++ -v $filename --target nvqc
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                ./a.out
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              else
-                echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY
-                test_err_sum=$((test_err_sum+1))
-              fi
-            fi
-          done
-
-          # Test all remote-sim tests
-          for filename in `find targettests/Remote-Sim -name '*.cpp'`; do
-            # unsupport_args is compile error test
-            # pauli_word: https://github.com/NVIDIA/cuda-quantum/issues/1957
-            # custom_operation: https://github.com/NVIDIA/cuda-quantum/issues/1985
-            # return_values: only supported in 0.8 NVQC service.
-            # qvector_init_from_vector: only supported in 0.8 NVQC service.
-            # qvector_init_from_state, qvector_init_from_state_lazy, test_trotter: not supported yet on nvqc: https://github.com/NVIDIA/cuda-quantum/issues/2146
-            if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"pauli_word"* ]] && [[ "$filename" != *"custom_operation"* ]] && [[ "$filename" != *"return_values"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]] && [[ "$filename" != *"qvector_init_from_state_lazy"* ]] && [[ "$filename" != *"qvector_init_from_vector"* ]] && [[ "$filename" != *"test_trotter"* ]]; then
-              echo "$filename"
-              nvqc_config=""
-              # Look for a --remote-mqpu-auto-launch to determine the number of QPUs
-              num_qpus=`cat $filename | grep -oP -m 1 '^//\s*RUN:\s*nvq++.+--remote-mqpu-auto-launch\s+\K\S+'`
-              if [ -n "$num_qpus" ]; then
-                echo "Intended to run on '$num_qpus' QPUs."
-                nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus"
-              fi
-              nvq++ -v $filename --target nvqc $nvqc_config
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                ./a.out
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              else
-                  echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-              fi
-            fi
-          done
-
-          # Test C++ examples with NVQC
-          for filename in `find examples/cpp/ applications/cpp/ targets/cpp/ -name '*.cpp'`; do
-            if [[ "$filename" == *"nvqc"* ]]; then
-              echo "$filename"
-              nvqc_config=""
-              # Look for a --nvqc-backend flag to nvq++ in the comment block
-              nvqc_backend=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-backend\s+\K\S+'`
-              if [ -n "$nvqc_backend" ]; then
-                echo "Intended for execution on '$nvqc_backend' backend."
-                nvqc_config="$nvqc_config --nvqc-backend $nvqc_backend"
-              fi
-              # Look for a --nvqc-nqpus flag to nvq++ in the comment block
-              num_qpus=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-nqpus\s+\K\S+'`
-              if [ -n "$num_qpus" ]; then
-                echo "Intended to run on '$num_qpus' QPUs."
-                nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus"
-              fi
-              nvq++ -v $filename --target nvqc $nvqc_config
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                ./a.out
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              else
-                  echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-              fi
-            fi
-          done
-
-          # Test NVQC Python examples + Python MLIR execution tests (not IR tests)
-          python3 -m pip install pytest
-          # Disabling building_kernels as the state is not yet supported on NVQC
-          for ex in `find examples/python python/tests/mlir/target -name '*.py' ! -name '*building_kernels*'`; do
-            filename=$(basename -- "$ex")
-            filename="${filename%.*}"
-            echo "Testing $filename:"
-            if [[ "$ex" == *"nvqc"* ]]; then
-              # This is an NVQC example
-              python3 $ex 1> /dev/null
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-              else
-                echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                test_err_sum=$((test_err_sum+1))
-              fi
-            else
-              # Only run examples that are not target-specific (e.g., ionq, iqm)
-              if ! grep -q "set_target" "$ex"; then
-                # Use --target command line option to run these examples with nvqc
-                python3 $ex --target nvqc 1> /dev/null
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              fi
-            fi
-          done
-
-          set -e # Re-enable exit code error checking
-          if [ ! $test_err_sum -eq 0 ]; then
-            echo "::error::${test_err_sum} tests failed. See step summary for a list of failures"
-            exit 1
-          fi
-        shell: bash
-
-  nvqc_integration_wheel_test:
-    name: NVQC integration test using Python wheels
-    runs-on: ubuntu-latest
-    needs: [metadata]
-    permissions:
-      contents: read
-
-    # Must have environment protection
-    environment: backend-validation
-
-    steps:
-      - name: Get code
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ needs.metadata.outputs.cudaq_commit }}
-          fetch-depth: 1
-
-      - name: Install wheel
-        id: install_wheel
-        run: |
-          python_version=${{ inputs.python_version || env.python_version }}
-          workflow_id=${{ inputs.workflow_id }}
-          # Helper to get the *valid* Publishing run Id for a commit hash
-          # Notes: runs that have 'CUDA-Q Python wheels' jobs skipped are not considered.
-          function get_publishing_run_id {
-            # Find all Publishing runs, we'll look into its jobs' status later
-            if [[ -z "$1" ]]; then
-              publishing_run_ids=$(gh run -R NVIDIA/cuda-quantum list --workflow Publishing --json databaseId --jq .[].databaseId)
-            else
-              publishing_run_ids=$(gh run -R NVIDIA/cuda-quantum list --commit $1 --workflow Publishing --json databaseId --jq .[].databaseId)
-            fi
-            for run_id in $publishing_run_ids ; do
-                # Look into its jobs: if "CUDA-Q Python wheels" matrix build was performed,
-                # then we have multiple jobs, like "CUDA-Q Python wheels (python_arm64....")
-                cuda_wheel_build_jobs=$(gh run -R NVIDIA/cuda-quantum view $run_id --jq '.jobs.[] | select(.name | startswith("CUDA-Q Python wheels (python_")).name' --json jobs)
-                if [ ! -z "$cuda_wheel_build_jobs" ]; then
-                  # This is a valid run that produces wheel artifacts
-                  echo $run_id
-                  break
-                fi
-            done
-          }
-
-          if [ -z "${workflow_id}" ]; then
-            workflow_id=$(get_publishing_run_id ${{ needs.metadata.outputs.cudaq_commit }})
-          fi
-          if [ ! -z "$workflow_id" ]; then
-            echo "Using artifacts from workflow id $workflow_id"
-            # Allow error when trying to download wheel artifacts since they might be expired.
-            set +e
-            gh run -R NVIDIA/cuda-quantum download $workflow_id --name "x86_64-py$python_version-wheels"
-            retVal=$?
-            set -e
-            if [ $retVal -ne 0 ]; then
-              echo "Failed to download wheels artifact from Publishing workflow run Id $workflow_id. Perhaps the artifacts have been expired."
-              # This is allowed since there might be a period where no Publishing workflow is run (e.g., no PR merged to main).
-              echo "skipped=true" >> $GITHUB_OUTPUT
-              exit 0
-            fi
-            python_version_filename=$(echo "${python_version//.}")
-            # Install Python and the wheel
-            apt-get update && apt-get install -y --no-install-recommends python$python_version python3-pip
-            wheelfile=$(find . -name "cuda_quantum_cu12*cp$python_version_filename*x86_64.whl")
-            python$python_version -m pip install $wheelfile
-            echo "skipped=false" >> $GITHUB_OUTPUT
-          else
-            echo "Failed to retrieve Publishing workflow run Id for commit ${{ needs.metadata.outputs.cudaq_commit }}"
-            exit 1
-          fi
-        env:
-          GH_TOKEN: ${{ github.token }}
-
-      - name: Test NVQC
-        if: ${{ steps.install_wheel.outputs.skipped != 'true' }}
-        run: |
-          echo "### Submit to NVQC from Python wheels" >> $GITHUB_STEP_SUMMARY
-          python_version=${{ inputs.python_version || env.python_version }}
-          export NVQC_API_KEY="${{ secrets.NVQC_PROD_SERVICE_KEY }}"
-          set +e # Allow script to keep going through errors
-          python$python_version -m pip install pytest
-          test_err_sum=0
-          for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do
-            filename=$(basename -- "$ex")
-            filename="${filename%.*}"
-            echo "Testing $filename:"
-            if [[ "$ex" == *"nvqc"* ]]; then
-              python$python_version $ex 1> /dev/null
-              test_status=$?
-              if [ $test_status -eq 0 ]; then
-                echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-              else
-                echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                test_err_sum=$((test_err_sum+1))
-              fi
-            # building_kernels.py is disabled due to https://github.com/NVIDIA/cuda-quantum/issues/2299.
-            elif [[ "$ex" != *"building_kernels"* ]]; then
-              # Only run examples that are not target-specific (e.g., ionq, iqm)
-              if ! grep -q "set_target" "$ex"; then
-                # Use --target command line option to run these examples with nvqc
-                python$python_version $ex --target nvqc 1> /dev/null
-                test_status=$?
-                if [ $test_status -eq 0 ]; then
-                  echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY
-                else
-                  echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY
-                  test_err_sum=$((test_err_sum+1))
-                fi
-              fi
-            fi
-          done
-          set -e # Re-enable exit code error checking
-          if [ ! $test_err_sum -eq 0 ]; then
-            echo "::error::${test_err_sum} tests failed. See step summary for a list of failures"
-            exit 1
-          fi
diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
index 3c1bed2e4b0..e21930b6e68 100644
--- a/.github/workflows/python_wheels.yml
+++ b/.github/workflows/python_wheels.yml
@@ -323,7 +323,7 @@ jobs:
           docker run --rm -dit --name wheel-validation-snippets wheel_validation:local
           status_sum=0
 
-          for ex in `find docs/sphinx/snippets/python -name '*.py' -not -path '*/platform/*' -not -path '*/nvqc/*' -not -path '*/backends/*'`; do
+          for ex in `find docs/sphinx/snippets/python -name '*.py' -not -path '*/platform/*' -not -path '*/backends/*'`; do
             file="${ex#docs/sphinx/snippets/python/}"
             echo "__Snippet ${file}:__" >> /tmp/validation.out
             (docker exec wheel-validation-snippets bash -c "python${{ inputs.python_version }} /tmp/snippets/$file" >> /tmp/validation.out) && success=true || success=false
diff --git a/docker/release/cudaq.nvqc.Dockerfile b/docker/release/cudaq.nvqc.Dockerfile
deleted file mode 100644
index 069d3c15b84..00000000000
--- a/docker/release/cudaq.nvqc.Dockerfile
+++ /dev/null
@@ -1,53 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-
-# This file is used to build CUDA-Q NVQC service container to be deployed to NVCF.
-#
-# Usage:
-# Must be built from the repo root with:
-#   # to skip prerequisites (default)
-#   docker build --target=without_tpls -f docker/release/cudaq.nvqc.Dockerfile .
-#
-#   # to install and clone prerequisites
-#   docker build --target=with_tpls \
-#       -f docker/release/cudaq.nvqc.Dockerfile .
-
-# Base image is CUDA-Q image 
-ARG base_image=nvcr.io/nvidia/nightly/cuda-quantum:cu12-latest
-FROM $base_image AS nvcf_image
-
-# With prerequisites
-FROM $base_image AS with_tpls
-RUN echo "Build with prerequisites"
-# COPY install_prerequisites into the image
-RUN sudo mkdir -p /tmp
-COPY --chmod=0755 scripts/install_prerequisites.sh /tmp/install_prerequisites.sh
-COPY .gitmodules /tmp/.gitmodules
-# Manually run this command locally to create tpls_commits.lock file
-# git config --file .gitmodules --get-regexp '^submodule\..*\.path$' \
-#         | awk '{print $2}' \
-#         | while read p; do printf "%s %s\n" "$(git rev-parse HEAD:$p)" "$p"; done \
-#         > tpls_commits.lock
-COPY tpls_commits.lock /tmp/tpls_commits.lock
-RUN sudo bash /tmp/install_prerequisites.sh -l /tmp/tpls_commits.lock
-
-# Without prerequisites
-FROM $base_image AS without_tpls
-RUN echo "Default build without prerequisites"
-
-# Run the tar command and then uncomment ADD cudaq.tar.gz ... in order to
-# override the installation.
-# tar czvf /workspaces/cuda-quantum/cudaq.tar.gz -C /usr/local/cudaq .
-# ADD cudaq.tar.gz /opt/nvidia/cudaq
-
-RUN sudo mkdir /nvqc_scripts
-ADD tools/cudaq-qpud/nvqc_proxy.py /nvqc_scripts
-ADD tools/cudaq-qpud/json_request_runner.py /nvqc_scripts
-ADD scripts/nvqc_launch.sh /nvqc_scripts
-
-ENTRYPOINT ["bash", "-l", "/nvqc_scripts/nvqc_launch.sh"]
diff --git a/docs/sphinx/api/languages/cpp_api.rst b/docs/sphinx/api/languages/cpp_api.rst
index bc63e68d2d1..36b0088138c 100644
--- a/docs/sphinx/api/languages/cpp_api.rst
+++ b/docs/sphinx/api/languages/cpp_api.rst
@@ -326,8 +326,6 @@ Platform
 
 .. doxygenclass:: cudaq::BaseRemoteSimulatorQPU
 
-.. doxygenclass:: cudaq::BaseNvcfSimulatorQPU
-
 .. doxygenclass:: cudaq::AnalogRemoteRESTQPU    
 
 .. doxygenclass:: cudaq::FermioniqBaseQPU
@@ -340,8 +338,6 @@ Platform
 .. doxygenstruct:: cudaq::RemoteCapabilities
     :members:
 
-.. doxygenclass:: cudaq::SerializedCodeExecutionContext
-
 .. doxygentypedef:: cudaq::QuantumTask
 
 .. doxygentypedef:: cudaq::QubitConnectivity
diff --git a/docs/sphinx/releases.rst b/docs/sphinx/releases.rst
index 1a2ac96b33a..952250cfb47 100644
--- a/docs/sphinx/releases.rst
+++ b/docs/sphinx/releases.rst
@@ -147,7 +147,7 @@ The full change log can be found `here <https://github.com/NVIDIA/cuda-quantum/r
 
 **0.7.0**
 
-The 0.7.0 release adds support for using :doc:`NVIDIA Quantum Cloud <using/backends/cloud/nvqc>`,
+The 0.7.0 release adds support for using NVIDIA Quantum Cloud,
 giving you access to our most powerful GPU-accelerated simulators even if you don't have an NVIDIA GPU.
 With 0.7.0, we have furthermore greatly increased expressiveness of the Python and C++ language frontends. 
 Check out our `documentation <https://nvidia.github.io/cuda-quantum/0.7.0/using/quick_start.html>`__ 
diff --git a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp b/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp
deleted file mode 100644
index 5427c4fd27a..00000000000
--- a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// [Begin Documentation]
-#include <cudaq.h>
-
-// Define a simple quantum kernel to execute on NVQC.
-struct ghz {
-  // Maximally entangled state between 25 qubits.
-  auto operator()() __qpu__ {
-    constexpr int NUM_QUBITS = 25;
-    cudaq::qvector q(NUM_QUBITS);
-    h(q[0]);
-    for (int i = 0; i < NUM_QUBITS - 1; i++) {
-      x<cudaq::ctrl>(q[i], q[i + 1]);
-    }
-    auto result = mz(q);
-  }
-};
-
-int main() {
-  auto counts = cudaq::sample(ghz{});
-  counts.dump();
-}
diff --git a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp b/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp
deleted file mode 100644
index 5480ff5600e..00000000000
--- a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-// [Begin Documentation]
-#include <cudaq.h>
-#include <cudaq/algorithm.h>
-#include <cudaq/gradients.h>
-#include <cudaq/optimizers.h>
-#include <iostream>
-
-int main() {
-  cudaq::spin_op h =
-      5.907 - 2.1433 * cudaq::spin_op::x(0) * cudaq::spin_op::x(1) -
-      2.1433 * cudaq::spin_op::y(0) * cudaq::spin_op::y(1) +
-      .21829 * cudaq::spin_op::z(0) - 6.125 * cudaq::spin_op::z(1);
-
-  auto [ansatz, theta] = cudaq::make_kernel<double>();
-  auto q = ansatz.qalloc();
-  auto r = ansatz.qalloc();
-  ansatz.x(q);
-  ansatz.ry(theta, r);
-  ansatz.x<cudaq::ctrl>(r, q);
-
-  // Run VQE with a gradient-based optimizer.
-  // Delegate cost function and gradient computation across different NVQC-based
-  // QPUs.
-  // Note: this needs to be compiled with `--nvqc-nqpus 3` create 3 virtual
-  // QPUs.
-  cudaq::optimizers::lbfgs optimizer;
-  auto [opt_val, opt_params] = optimizer.optimize(
-      /*dim=*/1, /*opt_function*/ [&](const std::vector<double> &params,
-                                      std::vector<double> &grads) {
-        // Queue asynchronous jobs to do energy evaluations across multiple QPUs
-        auto energy_future =
-            cudaq::observe_async(/*qpu_id=*/0, ansatz, h, params[0]);
-        const double paramShift = M_PI_2;
-        auto plus_future = cudaq::observe_async(/*qpu_id=*/1, ansatz, h,
-                                                params[0] + paramShift);
-        auto minus_future = cudaq::observe_async(/*qpu_id=*/2, ansatz, h,
-                                                 params[0] - paramShift);
-        grads[0] = (plus_future.get().expectation() -
-                    minus_future.get().expectation()) /
-                   2.0;
-        return energy_future.get().expectation();
-      });
-  std::cout << "Minimum energy = " << opt_val << " (expected -1.74886).\n";
-}
diff --git a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_intro.py b/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_intro.py
deleted file mode 100644
index ead26814f23..00000000000
--- a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_intro.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-# [Begin Documentation]
-import cudaq
-
-cudaq.set_target("nvqc")
-num_qubits = 25
-# Define a simple quantum kernel to execute on NVQC.
-kernel = cudaq.make_kernel()
-qubits = kernel.qalloc(num_qubits)
-# Maximally entangled state between 25 qubits.
-kernel.h(qubits[0])
-for i in range(num_qubits - 1):
-    kernel.cx(qubits[i], qubits[i + 1])
-kernel.mz(qubits)
-
-counts = cudaq.sample(kernel)
-print(counts)
diff --git a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_mqpu.py b/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_mqpu.py
deleted file mode 100644
index 2055be2659c..00000000000
--- a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_mqpu.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-# [Begin Documentation]
-import cudaq
-from cudaq import spin
-import math
-
-# Use NVQC with 3 virtual QPUs
-cudaq.set_target("nvqc", nqpus=3)
-
-print("Number of QPUs:", cudaq.get_target().num_qpus())
-# Create the parameterized ansatz
-kernel, theta = cudaq.make_kernel(float)
-qreg = kernel.qalloc(2)
-kernel.x(qreg[0])
-kernel.ry(theta, qreg[1])
-kernel.cx(qreg[1], qreg[0])
-
-# Define its spin Hamiltonian.
-hamiltonian = (5.907 - 2.1433 * spin.x(0) * spin.x(1) -
-               2.1433 * spin.y(0) * spin.y(1) + 0.21829 * spin.z(0) -
-               6.125 * spin.z(1))
-
-
-def opt_gradient(parameter_vector):
-    # Evaluate energy and gradient on different remote QPUs
-    # (i.e., concurrent job submissions to NVQC)
-    energy_future = cudaq.observe_async(kernel,
-                                        hamiltonian,
-                                        parameter_vector[0],
-                                        qpu_id=0)
-    plus_future = cudaq.observe_async(kernel,
-                                      hamiltonian,
-                                      parameter_vector[0] + 0.5 * math.pi,
-                                      qpu_id=1)
-    minus_future = cudaq.observe_async(kernel,
-                                       hamiltonian,
-                                       parameter_vector[0] - 0.5 * math.pi,
-                                       qpu_id=2)
-    return (energy_future.get().expectation(), [
-        (plus_future.get().expectation() - minus_future.get().expectation()) /
-        2.0
-    ])
-
-
-optimizer = cudaq.optimizers.LBFGS()
-optimal_value, optimal_parameters = optimizer.optimize(1, opt_gradient)
-print("Ground state energy =", optimal_value)
-print("Optimal parameters =", optimal_parameters)
diff --git a/docs/sphinx/targets/cpp/nvqc_qml.cpp b/docs/sphinx/targets/cpp/nvqc_qml.cpp
deleted file mode 100644
index c6a12d958b7..00000000000
--- a/docs/sphinx/targets/cpp/nvqc_qml.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-// Compile and run with:
-// ```
-// nvq++ --target nvqc nvqc_qml.cpp -o out.x
-// ./out.x
-// ```
-// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment
-// variable. Please refer to the documentations for information about how to
-// attain NVQC API key.
-
-// This example demonstrates the simulation of large parameterized quantum
-// circuits with NVQC. These parameterized circuits can be applied to quantum
-// machine learning to classify data points, e.g. see
-// https://arxiv.org/pdf/1906.07682.pdf.
-
-#include <ctime>
-#include <cudaq.h>
-#include <cudaq/algorithm.h>
-#include <cudaq/optimizers.h>
-#include <random>
-
-struct kernel {
-
-  auto operator()(const int n_qubits,
-                  const std::vector<double> parameters) __qpu__ {
-
-    cudaq::qvector qubits(n_qubits);
-    h(qubits);
-
-    for (size_t i = 0; i < n_qubits; i++) {
-      rx(parameters[i], qubits[i]);
-    }
-
-    for (size_t i = 0; i < n_qubits; i++) {
-      ry(parameters[i + n_qubits], qubits[i]);
-    }
-
-    h(qubits);
-
-    for (size_t i = 0; i < n_qubits; i++) {
-      rz(parameters[i + n_qubits * 2], qubits[i]);
-    }
-
-    for (size_t i = 0; i < n_qubits; i += 2) {
-      cx(qubits[i], qubits[i + 1]);
-    }
-
-    for (size_t i = 0; i < n_qubits; i++) {
-      rz(parameters[i + n_qubits * 2], qubits[i]);
-    }
-
-    for (size_t i = 0; i < n_qubits; i += 2) {
-      cy(qubits[i], qubits[i + 1]);
-    }
-
-    for (size_t i = 0; i < n_qubits; i++) {
-      ry(parameters[i + n_qubits], qubits[i]);
-    }
-
-    for (size_t i = 0; i < n_qubits; i += 2) {
-      cz(qubits[i], qubits[i + 1]);
-    }
-
-    x(qubits);
-    y(qubits);
-    h(qubits);
-  }
-};
-
-std::vector<double> initial_parameters(int n_parameters, int seed) {
-
-  std::default_random_engine generator(seed);
-  std::uniform_real_distribution<float> distribution(0.0, 1.0);
-  std::vector<double> parameters(n_parameters);
-
-  for (size_t i = 0; i < n_parameters; i++) {
-    parameters[i] = distribution(generator);
-  }
-  return parameters;
-}
-
-int main() {
-
-  const int n_qubits = 26;
-  const int n_parameters = 3 * n_qubits;
-  std::vector<double> parameters = initial_parameters(n_parameters, 13);
-  auto h = cudaq::spin::z(0);
-
-  auto start = clock();
-  auto exp_val = cudaq::observe(kernel{}, h, n_qubits, parameters);
-  auto end = clock();
-
-  printf("Expectation Value: %f \n", exp_val.expectation());
-  printf("Runtime: %fs \n", float(end - start) / float(CLOCKS_PER_SEC));
-
-  return 0;
-}
diff --git a/docs/sphinx/targets/cpp/nvqc_sample.cpp b/docs/sphinx/targets/cpp/nvqc_sample.cpp
deleted file mode 100644
index 4e2aa69b36b..00000000000
--- a/docs/sphinx/targets/cpp/nvqc_sample.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Compile and run with:
-// ```
-// nvq++ --target nvqc nvqc_sample.cpp -o out.x
-// ./out.x
-// ```
-// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment
-// variable. Please refer to the documentations for information about how to
-// attain NVQC API key.
-
-#include <cudaq.h>
-#include <iostream>
-
-// Define a simple quantum kernel to execute on NVQC.
-struct ghz {
-  // Maximally entangled state between 25 qubits.
-  auto operator()() __qpu__ {
-    constexpr int NUM_QUBITS = 25;
-    cudaq::qvector q(NUM_QUBITS);
-    h(q[0]);
-    for (int i = 0; i < NUM_QUBITS - 1; i++) {
-      x<cudaq::ctrl>(q[i], q[i + 1]);
-    }
-    auto result = mz(q);
-  }
-};
-
-int main() {
-  // Submit to NVQC asynchronously (e.g., continue executing
-  // code in the file until the job has been returned).
-  auto async_counts_handle = cudaq::sample_async(ghz{});
-  // ... classical code to execute in the meantime ...
-  std::cout << "Waiting for NVQC result...\n";
-
-  // Calling .get() on the handle to synchronize the result.
-  auto async_counts = async_counts_handle.get();
-  async_counts.dump();
-
-  // OR: Submit to NVQC synchronously (e.g., wait for the job
-  // result to be returned before proceeding).
-  auto counts = cudaq::sample(ghz{});
-  counts.dump();
-}
diff --git a/docs/sphinx/targets/cpp/nvqc_state.cpp b/docs/sphinx/targets/cpp/nvqc_state.cpp
deleted file mode 100644
index d7cb6317f80..00000000000
--- a/docs/sphinx/targets/cpp/nvqc_state.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// Compile and run with:
-// ```
-// nvq++ --target nvqc --nvqc-backend tensornet nvqc_state.cpp -o out.x
-// ./out.x
-// ```
-// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment
-// variable. Please refer to the documentations for information about how to
-// attain NVQC API key.
-
-#include "cudaq/algorithms/get_state.h"
-#include <cudaq.h>
-#include <iostream>
-
-int main() {
-  auto kernel = cudaq::make_kernel();
-  const std::size_t NUM_QUBITS = 20;
-  auto q = kernel.qalloc(NUM_QUBITS);
-  kernel.h(q[0]);
-  for (std::size_t qId = 0; qId < NUM_QUBITS - 1; ++qId)
-    kernel.x<cudaq::ctrl>(q[qId], q[qId + 1]);
-  auto state = cudaq::get_state(kernel);
-  std::cout << "Amplitude(00..00) = " << state[0] << "\n";
-  std::cout << "Amplitude(11..11) = " << state[(1ULL << NUM_QUBITS) - 1]
-            << "\n";
-}
diff --git a/docs/sphinx/targets/cpp/nvqc_vqe.cpp b/docs/sphinx/targets/cpp/nvqc_vqe.cpp
deleted file mode 100644
index 72b70767843..00000000000
--- a/docs/sphinx/targets/cpp/nvqc_vqe.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// Compile and run with:
-// ```
-// nvq++ --target nvqc --nvqc-nqpus 3 nvqc_vqe.cpp -o out.x
-// ./out.x
-// ```
-// Note: we set `nqpus` to 3 to establish 3 concurrent NVQC job submission
-// pipes.
-// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment
-// variable. Please refer to the documentations for information about how to
-// attain NVQC API key.
-
-#include <cudaq.h>
-#include <cudaq/algorithm.h>
-#include <cudaq/gradients.h>
-#include <cudaq/optimizers.h>
-#include <iostream>
-
-int main() {
-  cudaq::spin_op h =
-      5.907 - 2.1433 * cudaq::spin_op::x(0) * cudaq::spin_op::x(1) -
-      2.1433 * cudaq::spin_op::y(0) * cudaq::spin_op::y(1) +
-      .21829 * cudaq::spin_op::z(0) - 6.125 * cudaq::spin_op::z(1);
-
-  auto [ansatz, theta] = cudaq::make_kernel<double>();
-  auto q = ansatz.qalloc();
-  auto r = ansatz.qalloc();
-  ansatz.x(q);
-  ansatz.ry(theta, r);
-  ansatz.x<cudaq::ctrl>(r, q);
-
-  // Run VQE with a gradient-based optimizer.
-  // Delegate cost function and gradient computation across different NVQC-based
-  // QPUs. Note: depending on the user's account, there might be different
-  // number of NVQC worker instances available. Hence, although we're making
-  // concurrent job submissions across multiple QPUs, the speedup would be
-  // determined by the number of NVQC worker instances.
-  cudaq::optimizers::lbfgs optimizer;
-  auto [opt_val, opt_params] = optimizer.optimize(
-      /*dim=*/1, /*opt_function*/ [&](const std::vector<double> &params,
-                                      std::vector<double> &grads) {
-        // Queue asynchronous jobs to do energy evaluations across multiple QPUs
-        auto energy_future =
-            cudaq::observe_async(/*qpu_id=*/0, ansatz, h, params[0]);
-        const double paramShift = M_PI_2;
-        auto plus_future = cudaq::observe_async(/*qpu_id=*/1, ansatz, h,
-                                                params[0] + paramShift);
-        auto minus_future = cudaq::observe_async(/*qpu_id=*/2, ansatz, h,
-                                                 params[0] - paramShift);
-        grads[0] = (plus_future.get().expectation() -
-                    minus_future.get().expectation()) /
-                   2.0;
-        return energy_future.get().expectation();
-      });
-  std::cout << "Minimum energy = " << opt_val << " (expected -1.74886).\n";
-}
diff --git a/docs/sphinx/targets/python/nvqc_mgpu.py b/docs/sphinx/targets/python/nvqc_mgpu.py
deleted file mode 100644
index 00b7d806e5e..00000000000
--- a/docs/sphinx/targets/python/nvqc_mgpu.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import argparse
-import cudaq
-import random
-
-# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable.
-# If not, you can set the API Key environment variable in the Python script with:
-# ```
-# os.environ["NVQC_API_KEY"] = "<YOUR NVQC API KEY>"`
-# ```
-
-
-def random_bitstring(length: int):
-    bitstring = ""
-    for bit in range(length):
-        bitstring += str(random.randint(0, 1))
-    return bitstring
-
-
-def oracle(kernel: cudaq.Kernel, register: cudaq.QuakeValue,
-           auxillary_qubit: cudaq.QuakeValue, hidden_bitstring: str):
-    """
-    The inner-product oracle for the Bernstein-Vazirani algorithm.
-    """
-    for index, bit in enumerate(hidden_bitstring):
-        if bit == "0":
-            # Apply identity operation to the qubit if it's
-            # in the 0-state.
-            # In this case, we do nothing.
-            pass
-        else:
-            # Otherwise, apply a `cx` gate with the current qubit as
-            # the control and the auxillary qubit as the target.
-            kernel.cx(control=register[index], target=auxillary_qubit)
-
-
-def bernstein_vazirani(qubit_count: int):
-    """
-    Returns a kernel implementing the Bernstein-Vazirani algorithm
-    for a random, hidden bitstring.
-    """
-    kernel = cudaq.make_kernel()
-    # Allocate the specified number of qubits - this
-    # corresponds to the length of the hidden bitstring.
-    qubits = kernel.qalloc(qubit_count)
-    # Allocate an extra auxillary qubit.
-    auxillary_qubit = kernel.qalloc()
-
-    # Prepare the auxillary qubit.
-    kernel.h(auxillary_qubit)
-    kernel.z(auxillary_qubit)
-
-    # Place the rest of the register in a superposition state.
-    kernel.h(qubits)
-
-    # Generate a random, hidden bitstring for the oracle
-    # to encode. Note: we define the bitstring here so
-    # as to be able to return it for verification.
-    hidden_bitstring = random_bitstring(qubit_count)
-
-    # Query the oracle.
-    oracle(kernel, qubits, auxillary_qubit, hidden_bitstring)
-
-    # Apply another set of Hadamards to the register.
-    kernel.h(qubits)
-
-    # Apply measurement gates to just the `qubits`
-    # (excludes the auxillary qubit).
-    kernel.mz(qubits)
-    return kernel, hidden_bitstring
-
-
-# This example demonstrated GPU-accelerated simulator backends on NVQC can easily handle a large number of qubits.
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        prog='python',
-        description='Run a Bernstein-Vazirani algorithm using NVQC.',
-        epilog=
-        'For more information about CUDA-Q, see https://nvidia.github.io/cuda-quantum'
-    )
-    parser.add_argument('--size',
-                        type=int,
-                        required=False,
-                        default=30,
-                        help='The number of bits in the secret string.')
-    parser.add_argument('--ngpus',
-                        type=int,
-                        required=False,
-                        default=1,
-                        help='The number of NVQC GPUs to run the simulation.')
-    parser.add_argument('--seed',
-                        type=int,
-                        required=False,
-                        default=0,
-                        help='The random seed to generate the secret string.')
-    args = parser.parse_args()
-
-    # Depending on the number of GPUs requested, you can
-    # set the size of the secret string to around 31-34 (total qubit count = string length + 1) when
-    # you pass the `--ngpus` as a command line argument.
-    qubit_count = args.size
-    if args.seed != 0:
-        random.seed(args.seed)
-
-    cudaq.set_target("nvqc", backend="nvidia-mgpu", ngpus=args.ngpus)
-
-    print(
-        f"Running on NVQC using 'nvidia-mgpu' simulator backend with {args.ngpus} GPU(s) ..."
-    )
-    kernel, hidden_bitstring = bernstein_vazirani(qubit_count)
-    result = cudaq.sample(kernel)
-
-    print(f"encoded bitstring = {hidden_bitstring}")
-    print(f"measured state = {result.most_probable()}")
-    print(f"Were we successful? {hidden_bitstring == result.most_probable()}")
diff --git a/docs/sphinx/targets/python/nvqc_sample.py b/docs/sphinx/targets/python/nvqc_sample.py
deleted file mode 100644
index 55c11b4f1f3..00000000000
--- a/docs/sphinx/targets/python/nvqc_sample.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import cudaq
-
-# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable.
-# If not, you can set the API Key environment variable in the Python script with:
-# ```
-# os.environ["NVQC_API_KEY"] = "<YOUR NVQC API KEY>"`
-# ```
-cudaq.set_target("nvqc", backend="tensornet")
-
-# Note: The `tensornet` simulator is capable of distributing tensor contraction operations across multiple GPUs.
-# User can use the `ngpus` option to target a multi-GPU NVQC endpoint.
-# For example, to use the `tensornet` simulator with 8 GPUs, we can do
-# `cudaq.set_target("nvqc", backend="tensornet", ngpus=8)`
-# Please refer to your NVQC dashboard for the list of available multi-GPU configurations.
-num_qubits = 50
-kernel = cudaq.make_kernel()
-qubits = kernel.qalloc(num_qubits)
-# Place qubits in superposition state.
-kernel.h(qubits[0])
-for i in range(num_qubits - 1):
-    kernel.cx(qubits[i], qubits[i + 1])
-# Measure.
-kernel.mz(qubits)
-
-counts = cudaq.sample(kernel, shots_count=100)
-print(counts)
diff --git a/docs/sphinx/targets/python/nvqc_state.py b/docs/sphinx/targets/python/nvqc_state.py
deleted file mode 100644
index eae5c423395..00000000000
--- a/docs/sphinx/targets/python/nvqc_state.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import cudaq
-
-# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable.
-# If not, you can set the API Key environment variable in the Python script with:
-# ```
-# os.environ["NVQC_API_KEY"] = "<YOUR NVQC API KEY>"`
-# ```
-
-cudaq.set_target("nvqc")
-
-num_qubits = 20
-kernel = cudaq.make_kernel()
-qubits = kernel.qalloc(num_qubits)
-# Place qubits in GHZ state.
-kernel.h(qubits[0])
-for i in range(num_qubits - 1):
-    kernel.cx(qubits[i], qubits[i + 1])
-
-state = cudaq.get_state(kernel)
-print("Amplitude(00..00) =", state[0])
-print("Amplitude(11..11) =", state[2**num_qubits - 1])
diff --git a/docs/sphinx/targets/python/nvqc_vqe.py b/docs/sphinx/targets/python/nvqc_vqe.py
deleted file mode 100644
index da90150a6e6..00000000000
--- a/docs/sphinx/targets/python/nvqc_vqe.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import cudaq
-from cudaq import spin
-import math
-
-# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable.
-# If not, you can set the API Key environment variable in the Python script with:
-# ```
-# os.environ["NVQC_API_KEY"] = "<YOUR NVQC API KEY>"`
-# ```
-
-cudaq.set_target("nvqc", nqpus=3)
-
-print("Number of QPUs:", cudaq.get_target().num_qpus())
-
-
-# Note: depending on the user's account, there might be different
-# number of NVQC worker instances available. Hence, although we're making
-# concurrent job submissions across multiple QPUs, the speedup would be
-# determined by the number of NVQC worker instances.
-# Create the parameterized ansatz
-@cudaq.kernel
-def ansatz(theta: float):
-    qvector = cudaq.qvector(2)
-    x(qvector[0])
-    ry(theta, qvector[1])
-    x.ctrl(qvector[1], qvector[0])
-
-
-# Define its spin Hamiltonian.
-hamiltonian = (5.907 - 2.1433 * spin.x(0) * spin.x(1) -
-               2.1433 * spin.y(0) * spin.y(1) + 0.21829 * spin.z(0) -
-               6.125 * spin.z(1))
-
-
-def opt_gradient(parameter_vector):
-    # Evaluate energy and gradient on different remote QPUs
-    # (i.e., concurrent job submissions to NVQC)
-    energy_future = cudaq.observe_async(ansatz,
-                                        hamiltonian,
-                                        parameter_vector[0],
-                                        qpu_id=0)
-    plus_future = cudaq.observe_async(ansatz,
-                                      hamiltonian,
-                                      parameter_vector[0] + 0.5 * math.pi,
-                                      qpu_id=1)
-    minus_future = cudaq.observe_async(ansatz,
-                                       hamiltonian,
-                                       parameter_vector[0] - 0.5 * math.pi,
-                                       qpu_id=2)
-    return (energy_future.get().expectation(), [
-        (plus_future.get().expectation() - minus_future.get().expectation()) /
-        2.0
-    ])
-
-
-optimizer = cudaq.optimizers.LBFGS()
-optimal_value, optimal_parameters = optimizer.optimize(1, opt_gradient)
-print("Ground state energy =", optimal_value)
-print("Optimal parameters =", optimal_parameters)
diff --git a/docs/sphinx/using/backends/cloud.rst b/docs/sphinx/using/backends/cloud.rst
index 20ef1d74117..4601b1e2da2 100644
--- a/docs/sphinx/using/backends/cloud.rst
+++ b/docs/sphinx/using/backends/cloud.rst
@@ -7,5 +7,4 @@ CUDA-Q provides a number of options to access hardware resources (GPUs and QPUs)
    :maxdepth: 1
       
         Amazon Braket (braket) <cloud/braket.rst>
-        NVIDIA Quantum Cloud (nvqc) <cloud/nvqc.rst>
 
diff --git a/docs/sphinx/using/backends/cloud/nvqc.rst b/docs/sphinx/using/backends/cloud/nvqc.rst
deleted file mode 100644
index ba69faef432..00000000000
--- a/docs/sphinx/using/backends/cloud/nvqc.rst
+++ /dev/null
@@ -1,257 +0,0 @@
-NVIDIA Quantum Cloud
-+++++++++++++++++++++
-
-NVIDIA Quantum Cloud (NVQC) offers universal access to the world’s most powerful computing platform, 
-for every quantum researcher to do their life’s work.
-To learn more about NVQC visit this `link <https://www.nvidia.com/en-us/solutions/quantum-computing/cloud>`__. 
-
-Apply for early access `here <https://developer.nvidia.com/quantum-cloud-early-access-join>`__. 
-Access to the Quantum Cloud early access program requires an NVIDIA Developer account.
-
-Quick Start
-^^^^^^^^^^^
-Once you have been approved for an early access to NVQC, you will be able to follow these instructions to use it.
-
-1. Follow the instructions in your NVQC Early Access welcome email to obtain an API Key for NVQC. 
-You can also find the instructions `here <https://developer.nvidia.com/quantum-cloud-early-access-members>`__ (link available only for approved users)
-
-2. Set the environment variable `NVQC_API_KEY` to the API Key obtained above.
-
- .. code-block:: console
-
-    export NVQC_API_KEY=<your NVQC API key>
-
-You may wish to persist that environment variable between bash sessions, e.g., by adding it to your `$HOME/.bashrc` file.
-
-3. Run your first NVQC example
-
-The following is a typical CUDA-Q kernel example. 
-By selecting the `nvqc` target, the quantum circuit simulation will run on NVQC in the cloud, rather than running locally.
-
-
-.. tab:: Python
-    
-    .. literalinclude:: ../../../snippets/python/using/cudaq/nvqc/nvqc_intro.py
-        :language: python
-        :start-after: [Begin Documentation]
-
-    .. code-block:: console
-        
-        [2024-03-14 19:26:31.438] Submitting jobs to NVQC service with 1 GPU(s). Max execution time: 3600 seconds (excluding queue wait time).
-
-        ================ NVQC Device Info ================
-        GPU Device Name: "NVIDIA H100 80GB HBM3"
-        CUDA Driver Version / Runtime Version: 12.2 / 12.0
-        Total global memory (GB): 79.1
-        Memory Clock Rate (MHz): 2619.000
-        GPU Clock Rate (MHz): 1980.000
-        ==================================================
-        { 1111111111111111111111111:486 0000000000000000000000000:514 }
-
-.. tab:: C++
-
-    .. literalinclude:: ../../../snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp
-        :language: cpp
-        :start-after: [Begin Documentation]
-
-    The code above is saved in `nvqc_intro.cpp` and compiled with the following command, targeting the :code:`nvqc` platform
-
-    .. code-block:: console
-
-        nvq++ nvqc_intro.cpp -o nvqc_intro.x --target nvqc 
-        ./nvqc_intro.x
-
-        [2024-03-14 19:25:05.545] Submitting jobs to NVQC service with 1 GPU(s). Max execution time: 3600 seconds (excluding queue wait time).
-
-        ================ NVQC Device Info ================
-        GPU Device Name: "NVIDIA H100 80GB HBM3"
-        CUDA Driver Version / Runtime Version: 12.2 / 12.0
-        Total global memory (GB): 79.1
-        Memory Clock Rate (MHz): 2619.000
-        GPU Clock Rate (MHz): 1980.000
-        ==================================================
-        { 
-        __global__ : { 1111111111111111111111111:487 0000000000000000000000000:513 }
-        result : { 1111111111111111111111111:487 0000000000000000000000000:513 }
-        }
-
-
-Simulator Backend Selection
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-NVQC hosts all CUDA-Q simulator backends (see :ref:`simulators <simulators>`). 
-You may use the NVQC `backend` (Python) or `--nvqc-backend` (C++) option to select the simulator to be used by the service.
-
-For example, to request the `tensornet` simulator backend, the user can do the following for C++ or Python.
-
-.. tab:: Python
-
-    .. code-block:: python
-
-        cudaq.set_target("nvqc", backend="tensornet")
-
-.. tab:: C++
-    
-    .. code-block:: console
-
-        nvq++ nvqc_sample.cpp -o nvqc_sample.x --target nvqc --nvqc-backend tensornet
-
-
-.. note::
-
-  By default, the single-GPU single-precision `custatevec-fp32` simulator backend will be selected if backend information is not specified.
-
-Multiple GPUs
-^^^^^^^^^^^^^^
-
-Some CUDA-Q simulator backends are capable of multi-GPU distribution as detailed in :ref:`simulators <simulators>`.
-For example, the `nvidia-mgpu` backend can partition and distribute state vector simulation to multiple GPUs to simulate 
-a larger number of qubits, whose state vector size grows beyond the memory size of a single GPU.
-
-To select a specific number of GPUs on the NVQC managed service, the following `ngpus` (Python) or `--nvqc-ngpus` (C++) option can be used.
-
-
-.. tab:: Python
-
-    .. code-block:: python
-
-        cudaq.set_target("nvqc", backend="nvidia-mgpu", ngpus=4)
-
-.. tab:: C++
-
-    .. code-block:: console
-
-        nvq++ nvqc_sample.cpp -o nvqc_sample.x --target nvqc --nvqc-backend nvidia-mgpu --nvqc-ngpus 4
-
-
-.. note::
-
-    If your NVQC subscription does not contain service instances that have the specified number of GPUs, 
-    you may encounter the following error.
-
-    .. code-block:: console
-        
-        Unable to find NVQC deployment with 16 GPUs.
-        Available deployments have {1, 2, 4, 8} GPUs.
-        Please check your `ngpus` value (Python) or `--nvqc-ngpus` value (C++).
-
-.. note::
-
-    Not all simulator backends are capable of utilizing multiple GPUs. 
-    When requesting a multiple-GPU service with a single-GPU simulator backend, 
-    you might encounter the following log message:
-
-    .. code-block:: console
-        
-        The requested backend simulator (custatevec-fp32) is not capable of using all 4 GPUs requested.
-        Only one GPU will be used for simulation.
-        Please refer to CUDA-Q documentation for a list of multi-GPU capable simulator backends.
-
-    Consider removing the `ngpus` value (Python) or `--nvqc-ngpus` value (C++) to use the default of 1 GPU 
-    if you don't need to use a multi-GPU backend to better utilize NVQC resources.
-
-    Please refer to the table below for a list of backend simulator names along with its multi-GPU capability.
-
-    .. list-table:: Simulator Backends
-        :widths: 20 50 10 10
-        :header-rows: 1
-
-        *   - Name
-            - Description
-            - GPU Accelerated 
-            - Multi-GPU 
-        *   - `qpp`
-            - CPU-only state vector simulator
-            - no
-            - no
-        *   - `dm`
-            - CPU-only density matrix simulator
-            - no
-            - no
-        *   - `custatevec-fp32`
-            - Single-precision `cuStateVec` simulator
-            - yes
-            - no
-        *   - `custatevec-fp64`
-            - Double-precision `cuStateVec` simulator
-            - yes
-            - no
-        *   - `tensornet`
-            - Double-precision `cuTensorNet` full tensor network contraction simulator
-            - yes
-            - yes
-        *   - `tensornet-mps`
-            - Double-precision `cuTensorNet` matrix-product state simulator
-            - yes
-            - no
-        *   - `nvidia-mgpu`
-            - Double-precision `cuStateVec` multi-GPU simulator
-            - yes
-            - yes
-    
-
-Multiple QPUs Asynchronous Execution
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-NVQC provides scalable QPU virtualization services, whereby clients
-can submit asynchronous jobs simultaneously to NVQC. These jobs are
-handled by a pool of service worker instances.
-
-For example, in the following code snippet, using the `nqpus` (Python) or `--nvqc-nqpus` (C++) configuration option,
-the user instantiates 3 virtual QPU instances to submit simulation jobs to NVQC
-calculating the expectation value along with parameter-shift gradients simultaneously.
-
-.. tab:: Python
-
-    .. literalinclude:: ../../../snippets/python/using/cudaq/nvqc/nvqc_mqpu.py
-        :language: python
-        :start-after: [Begin Documentation]
-
-.. tab:: C++
-
-    .. literalinclude:: ../../../snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp
-        :language: cpp
-        :start-after: [Begin Documentation]
-
-    The code above is saved in `nvqc_vqe.cpp` and compiled with the following command, targeting the :code:`nvqc` platform with 3 virtual QPUs.
-
-    .. code-block:: console
-
-        nvq++ nvqc_vqe.cpp -o nvqc_vqe.x --target nvqc --nvqc-nqpus 3 
-        ./nvqc_vqe.x
-
-
-.. note::
-
-    The NVQC managed-service has a pool of worker instances processing incoming requests on a 
-    first-come-first-serve basis. Thus, the attainable speedup using multiple virtual QPUs vs. 
-    sequential execution on a single QPU depends on the NVQC service load. For example, 
-    if the number of free workers is greater than the number of requested virtual QPUs, a linear
-    (ideal) speedup could be achieved. On the other hand, if all the service workers are busy, 
-    multi-QPU distribution may not deliver any substantial speedup.  
-
-FAQ
-^^^^^
-
-1. How do I get more information about my NVQC API submission?
-
-The environment variable `NVQC_LOG_LEVEL` can be used to turn on and off
-certain logs. There are three levels:
-
-- Info (`info`): basic information about NVQC is logged to the console. This is the default.
-
-- Off (`off` or `0`): disable all NVQC logging.
-
-- Trace: (`trace`): log additional information for each NVQC job execution (including timing)
-
-2. I want to persist my API key to a configuration file.
-
-You may persist your NVQC API Key to a credential configuration file in lieu of 
-using the `NVQC_API_KEY` environment variable. 
-The configuration file can be generated as follows, replacing
-the `api_key` value with your NVQC API Key.
-
-.. code:: bash
-
-    echo "key: <api_key>" >> $HOME/.nvqc_config
-
diff --git a/python/runtime/cudaq/algorithms/py_optimizer.cpp b/python/runtime/cudaq/algorithms/py_optimizer.cpp
index 8cbbae1e770..f83ed438d77 100644
--- a/python/runtime/cudaq/algorithms/py_optimizer.cpp
+++ b/python/runtime/cudaq/algorithms/py_optimizer.cpp
@@ -9,7 +9,6 @@
 #include <pybind11/stl.h>
 
 #include "common/JsonConvert.h"
-#include "common/SerializedCodeExecutionContext.h"
 #include "cudaq/algorithms/gradients/central_difference.h"
 #include "cudaq/algorithms/gradients/forward_difference.h"
 #include "cudaq/algorithms/gradients/parameter_shift.h"
@@ -20,43 +19,6 @@
 
 namespace cudaq {
 
-/// Form the SerializedCodeExecutionContext
-static SerializedCodeExecutionContext
-get_serialized_code(std::string &source_code) {
-  SerializedCodeExecutionContext ctx;
-  try {
-    py::object json = py::module_::import("json");
-    auto var_dict = get_serializable_var_dict();
-    ctx.scoped_var_dict = py::str(json.attr("dumps")(var_dict));
-    ctx.source_code = source_code;
-  } catch (py::error_already_set &e) {
-    throw std::runtime_error("Failed to serialized data: " +
-                             std::string(e.what()));
-  }
-  return ctx;
-}
-
-static std::string
-get_required_raw_source_code(const int dim, const py::function &func,
-                             const std::string &optimizer_var_name) {
-  // Get source code and remove the leading whitespace
-  std::string source_code = get_source_code(func);
-
-  // Form the Python call to optimizer.optimize
-  std::ostringstream os;
-  auto obj_func_name = func.attr("__name__").cast<std::string>();
-  os << "energy, params_at_energy = " << optimizer_var_name << ".optimize("
-     << dim << ", " << obj_func_name << ")\n";
-  // The _json_request_result dictionary is a special dictionary where outputs
-  // are saved. Must be serializable to JSON using the JSON structures.
-  os << "_json_request_result['executionContext']['optResult'] = [energy, "
-        "params_at_energy]\n";
-  auto function_call = os.str();
-
-  // Return the combined code
-  return source_code + "\n" + function_call;
-}
-
 /// @brief Bind the `cudaq::optimization_result` typedef.
 void bindOptimizationResult(py::module &mod) {
   py::class_<optimization_result>(mod, "OptimizationResult");
@@ -188,36 +150,6 @@ py::class_<OptimizerT> addPyOptimizer(py::module &mod, std::string &&name) {
       .def(
           "optimize",
           [](OptimizerT &opt, const int dim, py::function &func) {
-            auto &platform = cudaq::get_platform();
-            if (platform.get_remote_capabilities().serializedCodeExec &&
-                platform.num_qpus() == 1) {
-              std::string optimizer_var_name =
-                  cudaq::get_var_name_for_handle(py::cast(&opt));
-              if (optimizer_var_name.empty())
-                throw std::runtime_error(
-                    "Unable to find desired optimizer object in any "
-                    "namespace. Aborting.");
-
-              auto ctx = std::make_unique<cudaq::ExecutionContext>("sample", 0);
-              platform.set_exec_ctx(ctx.get());
-
-              std::string combined_code =
-                  get_required_raw_source_code(dim, func, optimizer_var_name);
-
-              SerializedCodeExecutionContext serialized_code_execution_object =
-                  get_serialized_code(combined_code);
-
-              platform.launchSerializedCodeExecution(
-                  func.attr("__name__").cast<std::string>(),
-                  serialized_code_execution_object);
-
-              platform.reset_exec_ctx();
-              auto result = cudaq::optimization_result{};
-              if (ctx->optResult)
-                result = std::move(*ctx->optResult);
-              return result;
-            }
-
             return opt.optimize(dim, [&](std::vector<double> x,
                                          std::vector<double> &grad) {
               // Call the function.
diff --git a/python/runtime/cudaq/algorithms/py_state.cpp b/python/runtime/cudaq/algorithms/py_state.cpp
index 966a4fa3c1c..b89de2f2f52 100644
--- a/python/runtime/cudaq/algorithms/py_state.cpp
+++ b/python/runtime/cudaq/algorithms/py_state.cpp
@@ -783,8 +783,7 @@ index pair.
   mod.def(
       "get_state",
       [&](py::object kernel, py::args args) {
-        if (holder.getTarget().name == "remote-mqpu" ||
-            holder.getTarget().name == "nvqc")
+        if (holder.getTarget().name == "remote-mqpu")
           return pyGetStateRemote(kernel, args);
         if (holder.getTarget().name == "orca-photonics")
           return pyGetStateLibraryMode(kernel, args);
diff --git a/python/runtime/cudaq/algorithms/py_utils.h b/python/runtime/cudaq/algorithms/py_utils.h
index 227f615a9d1..070a2922ae5 100644
--- a/python/runtime/cudaq/algorithms/py_utils.h
+++ b/python/runtime/cudaq/algorithms/py_utils.h
@@ -16,13 +16,6 @@ namespace py = pybind11;
 
 namespace cudaq {
 
-/// @brief Get a JSON-encoded dictionary of a combination of all local
-/// and global variables that are JSON compatible
-py::dict get_serializable_var_dict();
-
-/// @brief Fetch the Python source code from a `py::function`
-std::string get_source_code(const py::function &func);
-
 /// @brief Find the variable name for a given Python object handle. It searches
 /// locally first, walks up the call stack, and finally checks the global
 /// namespace. If not found, it returns an empty string.
diff --git a/python/runtime/cudaq/algorithms/py_vqe.cpp b/python/runtime/cudaq/algorithms/py_vqe.cpp
index cf793046d14..d030ffd4a3b 100644
--- a/python/runtime/cudaq/algorithms/py_vqe.cpp
+++ b/python/runtime/cudaq/algorithms/py_vqe.cpp
@@ -10,12 +10,9 @@
 #include <pybind11/stl.h>
 
 #include "common/ArgumentWrapper.h"
-#include "common/JsonConvert.h"
-#include "common/SerializedCodeExecutionContext.h"
 #include "cudaq/Optimizer/Dialect/CC/CCTypes.h"
 #include "cudaq/algorithms/gradient.h"
 #include "cudaq/algorithms/optimizer.h"
-#include "py_utils.h"
 #include "py_vqe.h"
 #include "runtime/cudaq/platform/py_alt_launch_kernel.h"
 #include "utils/OpaqueArguments.h"
@@ -173,95 +170,6 @@ pyVQE_remote_cpp(cudaq::quantum_platform &platform, py::object &kernel,
   return ctx->optResult.value_or(optimization_result{});
 }
 
-/// @brief Perform VQE on a remote platform. This function is used for many of
-/// the pyVQE variants below, so some of the parameters may be nullptr.
-static optimization_result
-pyVQE_remote(cudaq::quantum_platform &platform, py::object &kernel,
-             spin_op &hamiltonian, cudaq::optimizer &optimizer,
-             cudaq::gradient *gradient, py::function *argumentMapper,
-             const int n_params, const int shots) {
-  py::object json = py::module_::import("json");
-  py::object inspect = py::module_::import("inspect");
-
-  // Form scoped_vars_str. This is needed for a) capturing user variables when
-  // an argumentMapper is provided, and b) automatically capturing all nested
-  // cudaq.kernels.
-  py::dict scoped_vars = get_serializable_var_dict();
-
-// This macro loads a JSON-like object into scoped_vars[] as
-// scoped_vars["__varname"] = varname. This roughly corresponds to the
-// following Python code:
-//  scoped_vars["__varname/module.name"] = json.loads(varname.to_json())
-#define LOAD_VAR(VAR_NAME)                                                     \
-  do {                                                                         \
-    py::object val = py::cast(VAR_NAME);                                       \
-    scoped_vars[py::str(                                                       \
-        std::string("__" #VAR_NAME "/") +                                      \
-        val.get_type().attr("__module__").cast<std::string>() + "." +          \
-        val.get_type().attr("__name__").cast<std::string>())] =                \
-        json.attr("loads")(val.attr("to_json")());                             \
-  } while (0)
-#define LOAD_VAR_NO_CAST(VAR_NAME)                                             \
-  do {                                                                         \
-    scoped_vars[py::str(                                                       \
-        std::string("__" #VAR_NAME "/") +                                      \
-        VAR_NAME.get_type().attr("__module__").cast<std::string>() + "." +     \
-        VAR_NAME.get_type().attr("__name__").cast<std::string>())] =           \
-        json.attr("loads")(VAR_NAME.attr("to_json")());                        \
-  } while (0)
-
-  auto spin = cudaq::spin_op::canonicalize(hamiltonian);
-  LOAD_VAR(spin);
-  LOAD_VAR(optimizer);
-  LOAD_VAR_NO_CAST(kernel);
-  if (gradient)
-    LOAD_VAR(gradient);
-
-  // Get a string representation of the scoped_vars dictionary. This is
-  // guaranteed to be a JSON-friendly dictionary, so the conversion should occur
-  // cleanly.
-  auto scoped_vars_str = json.attr("dumps")(scoped_vars).cast<std::string>();
-
-  // Form SerializedCodeExecutionContext.source_code
-  std::ostringstream os;
-  if (argumentMapper) {
-    std::string source_code = cudaq::get_source_code(*argumentMapper);
-    // If it is a lambda function and it is used inline with a function call, it
-    // can sometimes include the trailing comma. Remove that here.
-    auto end = source_code.find_last_not_of(", \t\r\n");
-    if (end != std::string::npos)
-      source_code.erase(end + 1);
-    os << "__arg_mapper = " << source_code << '\n';
-  }
-  os << "energy, params_at_energy = cudaq.vqe(";
-  os << "kernel=__kernel, ";
-  if (gradient)
-    os << "gradient_strategy=__gradient, ";
-  os << "spin_operator=__spin, ";
-  os << "optimizer=__optimizer, ";
-  os << "parameter_count=" << n_params << ", ";
-  if (argumentMapper)
-    os << "argument_mapper=__arg_mapper, ";
-  os << "shots=" << shots << ")\n";
-  os << "_json_request_result['executionContext']['optResult'] = [energy, "
-        "params_at_energy]\n";
-  auto function_call = os.str();
-
-  SerializedCodeExecutionContext scCtx;
-  scCtx.scoped_var_dict = std::move(scoped_vars_str);
-  scCtx.source_code = std::move(function_call);
-
-  auto ctx = std::make_unique<cudaq::ExecutionContext>("sample", 0);
-  platform.set_exec_ctx(ctx.get());
-  platform.launchSerializedCodeExecution(
-      kernel.attr("name").cast<std::string>(), scCtx);
-  platform.reset_exec_ctx();
-  auto result = cudaq::optimization_result{};
-  if (ctx->optResult)
-    result = std::move(*ctx->optResult);
-  return result;
-}
-
 /// @brief Throw an exception instructing the user how to achieve optimal
 /// performance
 static void throwPerformanceError() {
@@ -284,10 +192,6 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian,
                               n_params, shots);
     throwPerformanceError();
   }
-  if (platform.get_remote_capabilities().serializedCodeExec)
-    return pyVQE_remote(platform, kernel, hamiltonian, optimizer,
-                        /*gradient=*/nullptr, /*argumentMapper=*/nullptr,
-                        n_params, shots);
   return optimizer.optimize(n_params, [&](const std::vector<double> &x,
                                           std::vector<double> &grad_vec) {
     py::args params = py::make_tuple(x);
@@ -310,9 +214,6 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian,
                               shots);
     throwPerformanceError();
   }
-  if (platform.get_remote_capabilities().serializedCodeExec)
-    return pyVQE_remote(platform, kernel, hamiltonian, optimizer,
-                        /*gradient=*/nullptr, &argumentMapper, n_params, shots);
   return optimizer.optimize(n_params, [&](const std::vector<double> &x,
                                           std::vector<double> &grad_vec) {
     py::args params;
@@ -343,9 +244,6 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient,
                               /*argumentMapper=*/nullptr, n_params, shots);
     throwPerformanceError();
   }
-  if (platform.get_remote_capabilities().serializedCodeExec)
-    return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient,
-                        /*argumentMapper=*/nullptr, n_params, shots);
   std::function<double(std::vector<double>)> get_expected_value =
       [&](std::vector<double> x) {
         py::args params = py::make_tuple(x);
@@ -381,9 +279,6 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient,
                               &gradient, &argumentMapper, n_params, shots);
     throwPerformanceError();
   }
-  if (platform.get_remote_capabilities().serializedCodeExec)
-    return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient,
-                        &argumentMapper, n_params, shots);
   std::function<double(std::vector<double>)> get_expected_value =
       [&](std::vector<double> x) {
         py::args params;
diff --git a/python/runtime/utils/PyRemoteSimulatorQPU.cpp b/python/runtime/utils/PyRemoteSimulatorQPU.cpp
index c0008c9f51f..bbcaee66b48 100644
--- a/python/runtime/utils/PyRemoteSimulatorQPU.cpp
+++ b/python/runtime/utils/PyRemoteSimulatorQPU.cpp
@@ -15,7 +15,7 @@ using namespace mlir;
 namespace {
 
 // This is a helper function to help reduce duplicated code across
-// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU.
+// PyRemoteSimulatorQPU.
 static void launchVqeImpl(cudaq::ExecutionContext *executionContextPtr,
                           std::unique_ptr<cudaq::RemoteRuntimeClient> &m_client,
                           const std::string &m_simName, const std::string &name,
@@ -37,15 +37,15 @@ static void launchVqeImpl(cudaq::ExecutionContext *executionContextPtr,
 
   std::string errorMsg;
   const bool requestOkay = m_client->sendRequest(
-      *mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr,
-      gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr,
-      wrapper->rawArgs, /*argSize=*/0, &errorMsg);
+      *mlirContext, *executionContextPtr, gradient, &optimizer, n_params,
+      m_simName, name, /*kernelFunc=*/nullptr, wrapper->rawArgs, /*argSize=*/0,
+      &errorMsg);
   if (!requestOkay)
     throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg);
 }
 
 // This is a helper function to help reduce duplicated code across
-// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU.
+// PyRemoteSimulatorQPU.
 static void
 launchKernelImpl(cudaq::ExecutionContext *executionContextPtr,
                  std::unique_ptr<cudaq::RemoteRuntimeClient> &m_client,
@@ -68,7 +68,7 @@ launchKernelImpl(cudaq::ExecutionContext *executionContextPtr,
       executionContextPtr ? *executionContextPtr : defaultContext;
   std::string errorMsg;
   const bool requestOkay = m_client->sendRequest(
-      *mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
+      *mlirContext, executionContext,
       /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
       m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg);
   if (!requestOkay)
@@ -103,7 +103,7 @@ static void launchKernelStreamlineImpl(
   actualArgs.erase(actualArgs.begin());
 
   const bool requestOkay = m_client->sendRequest(
-      *mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
+      *mlirContext, executionContext,
       /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
       m_simName, name, nullptr, nullptr, 0, &errorMsg, &actualArgs);
   if (!requestOkay)
@@ -159,57 +159,6 @@ class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
   virtual ~PyRemoteSimulatorQPU() = default;
 };
 
-/// Implementation of QPU subtype that submits simulation request to NVCF.
-/// NOTE: This class duplicates the `isEmulated` and `launchKernel` methods from
-/// `PyRemoteSimulatorQPU` class above; tried using multiple inheritance, but,
-/// got errors from the functionality to register type.
-class PyNvcfSimulatorQPU : public cudaq::BaseNvcfSimulatorQPU {
-public:
-  PyNvcfSimulatorQPU() : BaseNvcfSimulatorQPU() {}
-
-  virtual bool isEmulated() override { return true; }
-
-  void launchVQE(const std::string &name, const void *kernelArgs,
-                 cudaq::gradient *gradient, const cudaq::spin_op &H,
-                 cudaq::optimizer &optimizer, const int n_params,
-                 const std::size_t shots) override {
-    CUDAQ_INFO("PyNvcfSimulatorQPU: Launch VQE kernel named '{}' remote QPU {} "
-               "(simulator = {})",
-               name, qpu_id, m_simName);
-    ::launchVqeImpl(getExecutionContextForMyThread(), m_client, m_simName, name,
-                    kernelArgs, gradient, H, optimizer, n_params, shots);
-  }
-
-  cudaq::KernelThunkResultType
-  launchKernel(const std::string &name, cudaq::KernelThunkType kernelFunc,
-               void *args, std::uint64_t voidStarSize,
-               std::uint64_t resultOffset,
-               const std::vector<void *> &rawArgs) override {
-    CUDAQ_INFO("PyNvcfSimulatorQPU: Launch kernel named '{}' remote QPU {} "
-               "(simulator = {})",
-               name, qpu_id, m_simName);
-    ::launchKernelImpl(getExecutionContextForMyThread(), m_client, m_simName,
-                       name, make_degenerate_kernel_type(kernelFunc), args,
-                       voidStarSize, resultOffset, rawArgs);
-    // TODO: Python should probably support return values too.
-    return {};
-  }
-
-  void launchKernel(const std::string &name,
-                    const std::vector<void *> &rawArgs) override {
-    CUDAQ_INFO("PyNvcfSimulatorQPU: Streamline launch kernel named '{}' "
-               "remote QPU {} "
-               "(simulator = {})",
-               name, qpu_id, m_simName);
-    ::launchKernelStreamlineImpl(getExecutionContextForMyThread(), m_client,
-                                 m_simName, name, rawArgs);
-  }
-
-  PyNvcfSimulatorQPU(PyNvcfSimulatorQPU &&) = delete;
-  virtual ~PyNvcfSimulatorQPU() = default;
-};
-
 } // namespace
 
 CUDAQ_REGISTER_TYPE(cudaq::QPU, PyRemoteSimulatorQPU, RemoteSimulatorQPU)
-CUDAQ_REGISTER_TYPE(cudaq::QPU, PyNvcfSimulatorQPU, NvcfSimulatorQPU)
diff --git a/python/runtime/utils/PyRestRemoteClient.cpp b/python/runtime/utils/PyRestRemoteClient.cpp
index 852daf4f6c0..5dd5988feb2 100644
--- a/python/runtime/utils/PyRestRemoteClient.cpp
+++ b/python/runtime/utils/PyRestRemoteClient.cpp
@@ -17,15 +17,6 @@ class PyRestRemoteClient : public cudaq::BaseRemoteRestRuntimeClient {
   PyRestRemoteClient() : BaseRemoteRestRuntimeClient() {}
 };
 
-/// Implementation of QPU subtype that submits simulation request to NVCF.
-/// REST client submitting jobs to NVCF-hosted `cudaq-qpud` service.
-class PyNvcfRuntimeClient : public cudaq::BaseNvcfRuntimeClient {
-public:
-  /// @brief The constructor
-  PyNvcfRuntimeClient() : BaseNvcfRuntimeClient() {}
-};
-
 } // namespace
 
 CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, PyRestRemoteClient, rest)
-CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, PyNvcfRuntimeClient, NVCF)
diff --git a/python/tests/remote/test_remote_code_exec.py b/python/tests/remote/test_remote_code_exec.py
deleted file mode 100644
index 8ff617f15e4..00000000000
--- a/python/tests/remote/test_remote_code_exec.py
+++ /dev/null
@@ -1,444 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-import pytest
-import os
-import sys
-import subprocess
-import time
-import numpy as np
-
-import cudaq
-from cudaq import spin
-import numpy as np
-
-try:
-    import requests
-    import psutil
-    have_requests = True
-except ImportError:
-    have_requests = False
-
-skipIfModulesNotInstalled = pytest.mark.skipif(
-    not have_requests,
-    reason="please install requests and/or psutil for these tests")
-
-
-def assert_close(want, got, tolerance=1.e-5) -> bool:
-    return abs(want - got) < tolerance
-
-
-def kill_proc_and_child_processes(parent_proc: subprocess.Popen):
-    try:
-        parent = psutil.Process(parent_proc.pid)
-    except psutil.NoSuchProcess:
-        return
-
-    # Try to kill the children processes, giving them 3 seconds for a graceful
-    # exit, and then a forceful kill after that.
-    children = parent.children(recursive=True)
-    for child in children:
-        try:
-            child.terminate()
-        except psutil.NoSuchProcess:
-            continue
-
-    _, still_alive = psutil.wait_procs(children, timeout=3)
-
-    for child in still_alive:
-        try:
-            child.kill()
-        except psutil.NoSuchProcess:
-            continue
-
-    # Now kill the parent process
-    parent.terminate()
-    _, still_alive = psutil.wait_procs([parent], timeout=3)
-    for p in still_alive:
-        try:
-            p.kill()
-        except psutil.NoSuchProcess:
-            continue
-
-
-def wait_until_port_active(port: int) -> bool:
-    port_up = False
-    retries = 0
-    port_url = 'http://localhost:' + str(port)
-    while (not port_up):
-        try:
-            ping_response = requests.get(port_url)
-            port_up = (ping_response.status_code == 200)
-        except:
-            port_up = False
-        if not port_up:
-            retries += 1
-            if retries > 100:
-                print("EXIT: TOO MANY RETRIES!")
-                return False
-            time.sleep(0.1)
-    return True
-
-
-@pytest.fixture(scope="session", autouse=True)
-def startUpMockServer():
-    os.environ['CUDAQ_SER_CODE_EXEC'] = '1'
-    cudaq_qpud = os.path.dirname(cudaq.__file__) + "/../bin/cudaq-qpud.py"
-    nvqc_proxy = os.path.dirname(cudaq.__file__) + "/../bin/nvqc_proxy.py"
-    p1 = subprocess.Popen([sys.executable, nvqc_proxy])
-    p2 = subprocess.Popen([sys.executable, cudaq_qpud, '--port', '3031'])
-    cudaq.set_target("remote-mqpu", url="localhost:3030")
-    proxy_up = wait_until_port_active(3030)
-    qpud_up = wait_until_port_active(3031)
-
-    # Shutdown servers if either one fails to come up. The tests will fail
-    # downstream.
-    if not proxy_up or not qpud_up:
-        kill_proc_and_child_processes(p1)
-        kill_proc_and_child_processes(p2)
-
-    yield
-    cudaq.reset_target()
-    kill_proc_and_child_processes(p1)
-    kill_proc_and_child_processes(p2)
-
-
-@pytest.fixture(autouse=True)
-def do_something():
-    yield
-    cudaq.__clearKernelRegistries()
-
-
-@skipIfModulesNotInstalled
-def test_setup():
-    target = cudaq.get_target()
-    numQpus = target.num_qpus()
-    assert numQpus == 1
-
-
-@skipIfModulesNotInstalled
-def test_optimizer():
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    # Verify that variables can be captured by kernels
-    testVar = 0
-
-    @cudaq.kernel
-    def kernel(angles: list[float]):
-        qvector = cudaq.qvector(2)
-        x(qvector[0])
-        ry(angles[0] + testVar, qvector[1])
-        x.ctrl(qvector[1], qvector[0])
-
-    optimizer = cudaq.optimizers.Adam()
-    gradient = cudaq.gradients.CentralDifference()
-
-    def objective_function(parameter_vector: list[float],
-                           hamiltonian=hamiltonian,
-                           gradient_strategy=gradient,
-                           kernel=kernel) -> tuple[float, list[float]]:
-        get_result = lambda parameter_vector: cudaq.observe(
-            kernel, hamiltonian, parameter_vector).expectation()
-        cost = get_result(parameter_vector)
-        gradient_vector = gradient_strategy.compute(parameter_vector,
-                                                    get_result, cost)
-        return cost, gradient_vector
-
-    energy, parameter = optimizer.optimize(dimensions=1,
-                                           function=objective_function)
-    print(f"\nminimized <H> = {round(energy,16)}")
-    print(f"optimal theta = {round(parameter[0],16)}")
-    assert assert_close(energy, -1.7483830311526454, 1e-3)
-    assert assert_close(parameter[0], 0.5840908448487905, 1e-3)
-
-
-@skipIfModulesNotInstalled
-def test_optimizer_nested_kernels():
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    @cudaq.kernel
-    def kernelA(qvector: cudaq.qview):
-        x(qvector[0])
-
-    @cudaq.kernel
-    def kernelB(angles: list[float]):
-        qvector = cudaq.qvector(2)
-        # This x() is done in a nested kernel
-        # x(qvector[0])
-        kernelA(qvector)
-        ry(angles[0], qvector[1])
-        x.ctrl(qvector[1], qvector[0])
-
-    optimizer = cudaq.optimizers.Adam()
-    gradient = cudaq.gradients.CentralDifference()
-
-    def nested_obj_func():
-        print('Calling nested_obj_func')
-
-    def objective_function(parameter_vector: list[float],
-                           hamiltonian=hamiltonian,
-                           gradient_strategy=gradient,
-                           kernel=kernelB) -> tuple[float, list[float]]:
-        nested_obj_func()  # gratuitous call to a nested function (for testing)
-
-        def another_nested_obj_func():
-            print('I am in another_nested_obj_func')
-
-        another_nested_obj_func()
-        get_result = lambda parameter_vector: cudaq.observe(
-            kernel, hamiltonian, parameter_vector).expectation()
-        cost = get_result(parameter_vector)
-        gradient_vector = gradient_strategy.compute(parameter_vector,
-                                                    get_result, cost)
-        return cost, gradient_vector
-
-    energy, parameter = optimizer.optimize(dimensions=1,
-                                           function=objective_function)
-    print(f"\nminimized <H> = {round(energy,16)}")
-    print(f"optimal theta = {round(parameter[0],16)}")
-    assert assert_close(energy, -1.7483830311526454, 1e-3)
-    assert assert_close(parameter[0], 0.5840908448487905, 1e-3)
-
-
-@skipIfModulesNotInstalled
-@pytest.mark.parametrize(
-    "optimizer", [cudaq.optimizers.COBYLA(),
-                  cudaq.optimizers.NelderMead()])
-def test_simple_vqe(optimizer):
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    @cudaq.kernel
-    def kernel(angles: list[float]):
-        qvector = cudaq.qvector(2)
-        x(qvector[0])
-        ry(angles[0], qvector[1])
-        x.ctrl(qvector[1], qvector[0])
-
-    energy, parameter = cudaq.vqe(kernel=kernel,
-                                  spin_operator=hamiltonian,
-                                  optimizer=optimizer,
-                                  parameter_count=1)
-
-    print(f"\nminimized <H> = {round(energy,16)}")
-    print(f"optimal theta = {round(parameter[0],16)}")
-    want_expectation_value = -1.7487948611472093
-    want_optimal_parameters = [0.59]
-    assert assert_close(want_expectation_value, energy, tolerance=1e-2)
-    assert all(
-        assert_close(want_parameter, got_parameter, tolerance=1e-2)
-        for want_parameter, got_parameter in zip(want_optimal_parameters,
-                                                 parameter))
-
-
-@skipIfModulesNotInstalled
-@pytest.mark.parametrize(
-    "optimizer", [cudaq.optimizers.COBYLA(),
-                  cudaq.optimizers.NelderMead()])
-def test_simple_vqe_nested_kernels(optimizer):
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    @cudaq.kernel
-    def kernelA(qvector: cudaq.qview):
-        x(qvector[0])
-
-    @cudaq.kernel
-    def kernelB(angles: list[float]):
-        qvector = cudaq.qvector(2)
-        # This x() is done in a nested kernel
-        # x(qvector[0])
-        kernelA(qvector)
-        ry(angles[0], qvector[1])
-        x.ctrl(qvector[1], qvector[0])
-
-    energy, parameter = cudaq.vqe(kernel=kernelB,
-                                  spin_operator=hamiltonian,
-                                  optimizer=optimizer,
-                                  parameter_count=1)
-
-    print(f"\nminimized <H> = {round(energy,16)}")
-    print(f"optimal theta = {round(parameter[0],16)}")
-    want_expectation_value = -1.7487948611472093
-    want_optimal_parameters = [0.59]
-    assert assert_close(want_expectation_value, energy, tolerance=1e-2)
-    assert all(
-        assert_close(want_parameter, got_parameter, tolerance=1e-2)
-        for want_parameter, got_parameter in zip(want_optimal_parameters,
-                                                 parameter))
-
-
-@skipIfModulesNotInstalled
-def test_complex_vqe_inline_lambda():
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    @cudaq.kernel
-    def kernel(angles: list[float], num_qubits: int):
-        qvector = cudaq.qvector(num_qubits)
-        x(qvector[0])
-        ry(angles[0], qvector[1])
-        x.ctrl(qvector[1], qvector[0])
-
-    optimizer = cudaq.optimizers.Adam()
-    grad = cudaq.gradients.CentralDifference()
-
-    num_qubits = 2
-    energy, parameter = cudaq.vqe(kernel=kernel,
-                                  gradient_strategy=grad,
-                                  spin_operator=hamiltonian,
-                                  optimizer=optimizer,
-                                  argument_mapper=lambda x: (x, num_qubits),
-                                  parameter_count=1)
-
-    print(f"\nminimized <H> = {round(energy,16)}")
-    print(f"optimal theta = {round(parameter[0],16)}")
-    assert assert_close(energy, -1.7488648395275948, 1e-3)
-    assert assert_close(parameter[0], 0.5840908448487905, 1e-3)
-
-
-@skipIfModulesNotInstalled
-def test_vqe_perf_warning():
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    @cudaq.kernel
-    def kernel(num_qubits: int, angles: list[float]):
-        qvector = cudaq.qvector(num_qubits)
-        x(qvector[0])
-        ry(angles[0], qvector[1])
-        x.ctrl(qvector[1], qvector[0])
-
-    optimizer = cudaq.optimizers.Adam()
-    grad = cudaq.gradients.CentralDifference()
-
-    num_qubits = 2
-    with pytest.raises(RuntimeError) as error:
-        energy, parameter = cudaq.vqe(kernel=kernel,
-                                      gradient_strategy=grad,
-                                      spin_operator=hamiltonian,
-                                      optimizer=optimizer,
-                                      argument_mapper=lambda x: (num_qubits, x),
-                                      parameter_count=1)
-
-
-# This is a helper function used by parameterized tests below.
-@skipIfModulesNotInstalled
-@pytest.mark.skip
-def test_complex_vqe_named_lambda(optimizer, gradient):
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    @cudaq.kernel
-    def kernel(angles: list[float], num_qubits: int):
-        qvector = cudaq.qvector(num_qubits)
-        x(qvector[0])
-        ry(angles[0], qvector[1])
-        x.ctrl(qvector[1], qvector[0])
-
-    num_qubits = 2
-    arg_mapper = lambda x: (x, num_qubits)
-    energy, parameter = cudaq.vqe(kernel=kernel,
-                                  gradient_strategy=gradient,
-                                  spin_operator=hamiltonian,
-                                  optimizer=optimizer,
-                                  argument_mapper=arg_mapper,
-                                  parameter_count=1)
-
-    print(f"\nminimized <H> = {round(energy,16)}")
-    print(f"optimal theta = {round(parameter[0],16)}")
-    want_expectation_value = -1.7487948611472093
-    want_optimal_parameters = [0.59]
-    assert assert_close(want_expectation_value, energy, tolerance=1e-2)
-    assert all(
-        assert_close(want_parameter, got_parameter, tolerance=1e-2)
-        for want_parameter, got_parameter in zip(want_optimal_parameters,
-                                                 parameter))
-
-
-@skipIfModulesNotInstalled
-@pytest.mark.parametrize("optimizer", [
-    cudaq.optimizers.LBFGS(),
-    cudaq.optimizers.Adam(),
-    cudaq.optimizers.GradientDescent(),
-    cudaq.optimizers.SGD(),
-])
-def test_complex_vqe_named_lambda_sweep_opt(optimizer):
-    test_complex_vqe_named_lambda(optimizer,
-                                  cudaq.gradients.CentralDifference())
-
-
-@skipIfModulesNotInstalled
-@pytest.mark.parametrize("gradient", [
-    cudaq.gradients.CentralDifference(),
-    cudaq.gradients.ParameterShift(),
-    cudaq.gradients.ForwardDifference()
-])
-def test_complex_vqe_named_lambda_sweep_grad(gradient):
-    test_complex_vqe_named_lambda(cudaq.optimizers.Adam(), gradient)
-
-
-@skipIfModulesNotInstalled
-def test_state_preparation():
-
-    @cudaq.kernel
-    def kernel(vec: list[complex]):
-        qubits = cudaq.qvector(vec)
-
-    state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.]
-    counts = cudaq.sample(kernel, state)
-    assert '00' in counts
-    assert '10' in counts
-    assert not '01' in counts
-    assert not '11' in counts
-
-
-@skipIfModulesNotInstalled
-def test_state_preparation_builder():
-    kernel, state = cudaq.make_kernel(list[complex])
-    qubits = kernel.qalloc(state)
-
-    state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.]
-    counts = cudaq.sample(kernel, state)
-    assert '00' in counts
-    assert '10' in counts
-    assert not '01' in counts
-    assert not '11' in counts
-
-
-@skipIfModulesNotInstalled
-@pytest.mark.skip(reason="https://github.com/NVIDIA/cuda-quantum/issues/1924")
-def test_arbitrary_unitary_synthesis():
-    cudaq.register_operation("custom_h",
-                             1. / np.sqrt(2.) * np.array([1, 1, 1, -1]))
-    cudaq.register_operation("custom_x", np.array([0, 1, 1, 0]))
-
-    @cudaq.kernel
-    def bell(angles: list[float]):
-        qubits = cudaq.qvector(2)
-        custom_h(qubits[0])
-        custom_x.ctrl(qubits[0], qubits[1])
-        ry(angles[0], qubits[1])
-
-    hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
-        0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)
-
-    optimizer = cudaq.optimizers.Adam()
-    energy, parameter = cudaq.vqe(kernel=bell,
-                                  spin_operator=hamiltonian,
-                                  optimizer=optimizer,
-                                  parameter_count=1)
-    print(f"\nminimized <H> = {round(energy,16)}")
-    print(f"optimal theta = {round(parameter[0],16)}")
-
-
-# leave for gdb debugging
-if __name__ == "__main__":
-    loc = os.path.abspath(__file__)
-    pytest.main([loc, "-rP"])
diff --git a/runtime/common/BaseRemoteSimulatorQPU.h b/runtime/common/BaseRemoteSimulatorQPU.h
index 090d5a00f86..ce9a9fd9cf4 100644
--- a/runtime/common/BaseRemoteSimulatorQPU.h
+++ b/runtime/common/BaseRemoteSimulatorQPU.h
@@ -13,7 +13,6 @@
 #include "common/RemoteKernelExecutor.h"
 #include "common/Resources.h"
 #include "common/RuntimeMLIR.h"
-#include "common/SerializedCodeExecutionContext.h"
 #include "cudaq.h"
 #include "cudaq/Optimizer/Builder/Runtime.h"
 #include "cudaq/Optimizer/Transforms/Passes.h"
@@ -105,9 +104,9 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU {
 
     std::string errorMsg;
     const bool requestOkay = m_client->sendRequest(
-        *m_mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr,
-        gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr,
-        kernelArgs, /*argSize=*/0, &errorMsg);
+        *m_mlirContext, *executionContextPtr, gradient, &optimizer, n_params,
+        m_simName, name, /*kernelFunc=*/nullptr, kernelArgs, /*argSize=*/0,
+        &errorMsg);
     if (!requestOkay)
       throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg);
   }
@@ -188,7 +187,7 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU {
 
     std::string errorMsg;
     const bool requestOkay = m_client->sendRequest(
-        *m_mlirContext, executionContext, /*serializedCodeContext=*/nullptr,
+        *m_mlirContext, executionContext,
         /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
         m_simName, name, make_degenerate_kernel_type(kernelFunc), args,
         voidStarSize, &errorMsg, rawArgs);
@@ -204,8 +203,7 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU {
             " bytes overflows the argument buffer.");
       // Currently, we only support result buffer serialization on LittleEndian
       // CPUs (x86, ARM, PPC64LE).
-      // Note: NVQC service will always be using LE. If
-      // the client (e.g., compiled from source) is built for big-endian, we
+      // If the client (e.g., compiled from source) is built for big-endian, we
       // will throw an error if result buffer data is returned.
       if (llvm::sys::IsBigEndianHost)
         throw std::runtime_error(
@@ -223,40 +221,6 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU {
     return {};
   }
 
-  void
-  launchSerializedCodeExecution(const std::string &name,
-                                cudaq::SerializedCodeExecutionContext
-                                    &serializeCodeExecutionObject) override {
-    CUDAQ_INFO(
-        "BaseRemoteSimulatorQPU: Launch remote code named '{}' remote QPU {} "
-        "(simulator = {})",
-        name, qpu_id, m_simName);
-
-    cudaq::ExecutionContext *executionContextPtr =
-        getExecutionContextForMyThread();
-
-    if (executionContextPtr && executionContextPtr->name == "tracer") {
-      return;
-    }
-
-    // Default context for a 'fire-and-ignore' kernel launch; i.e., no context
-    // was set before launching the kernel. Use a static variable per thread to
-    // set up a single-shot execution context for this case.
-    static thread_local cudaq::ExecutionContext defaultContext("sample",
-                                                               /*shots=*/1);
-    cudaq::ExecutionContext &executionContext =
-        executionContextPtr ? *executionContextPtr : defaultContext;
-
-    std::string errorMsg;
-    const bool requestOkay = m_client->sendRequest(
-        *m_mlirContext, executionContext, &serializeCodeExecutionObject,
-        /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0,
-        m_simName, name, /*kernelFunc=*/nullptr, /*args=*/nullptr,
-        /*voidStarSize=*/0, &errorMsg);
-    if (!requestOkay)
-      throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg);
-  }
-
   void setExecutionContext(cudaq::ExecutionContext *context) override {
     CUDAQ_INFO("BaseRemoteSimulatorQPU::setExecutionContext QPU {}", qpu_id);
     std::scoped_lock<std::mutex> lock(m_contextMutex);
@@ -274,127 +238,4 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU {
   }
 };
 
-/// Implementation of base QPU subtype that submits simulation request to
-/// NVCF.
-class BaseNvcfSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
-public:
-  BaseNvcfSimulatorQPU() : BaseRemoteSimulatorQPU() {
-    m_client = cudaq::registry::get<cudaq::RemoteRuntimeClient>("NVCF");
-  }
-
-  // Encapsulates Nvcf configurations that we need.
-  // Empty strings mean no config available.
-  struct NvcfConfig {
-    std::string apiKey;
-    std::string functionId;
-    std::string versionId;
-  };
-
-  virtual void setTargetBackend(const std::string &backend) override {
-    auto parts = cudaq::split(backend, ';');
-    if (parts.size() % 2 != 0)
-      throw std::invalid_argument("Unexpected backend configuration string. "
-                                  "Expecting a ';'-separated key-value pairs.");
-    std::string apiKey, functionId, versionId, ngpus;
-
-    for (std::size_t i = 0; i < parts.size(); i += 2) {
-      if (parts[i] == "simulator")
-        m_simName = parts[i + 1];
-      // First, check if api key or function Id is provided as target options.
-      if (parts[i] == "function_id")
-        functionId = parts[i + 1];
-      if (parts[i] == "api_key")
-        apiKey = parts[i + 1];
-      if (parts[i] == "version_id")
-        versionId = parts[i + 1];
-      if (parts[i] == "ngpus")
-        ngpus = parts[i + 1];
-    }
-    // If none provided, look for them in environment variables or the config
-    // file.
-    const auto config = searchNvcfConfig();
-    if (apiKey.empty())
-      apiKey = config.apiKey;
-    if (functionId.empty())
-      functionId = config.functionId;
-    if (versionId.empty())
-      versionId = config.versionId;
-
-    // API key and function Id are required.
-    if (apiKey.empty())
-      throw std::runtime_error(
-          "Cannot find NVQC API key. Please refer to the documentation for "
-          "information about obtaining and using your NVQC API key.");
-
-    if (!apiKey.starts_with("nvapi-"))
-      std::runtime_error(
-          "An invalid NVQC API key is provided. Please check your settings.");
-    std::unordered_map<std::string, std::string> clientConfigs{
-        {"api-key", apiKey}};
-    if (!functionId.empty())
-      clientConfigs.emplace("function-id", functionId);
-    if (!versionId.empty())
-      clientConfigs.emplace("version-id", versionId);
-    if (!ngpus.empty())
-      clientConfigs.emplace("ngpus", ngpus);
-
-    m_client->setConfig(clientConfigs);
-  }
-
-  // The NVCF version of this function needs to dynamically fetch the remote
-  // capabilities from the currently deployed servers.
-  virtual RemoteCapabilities getRemoteCapabilities() const override {
-    return m_client->getRemoteCapabilities();
-  }
-
-protected:
-  // Helper to search NVQC config from environment variable or config file.
-  NvcfConfig searchNvcfConfig() {
-    NvcfConfig config;
-    // Search from environment variable
-    if (auto apiKey = std::getenv("NVQC_API_KEY"))
-      config.apiKey = std::string(apiKey);
-
-    if (auto funcIdEnv = std::getenv("NVQC_FUNCTION_ID"))
-      config.functionId = std::string(funcIdEnv);
-
-    if (auto versionIdEnv = std::getenv("NVQC_FUNCTION_VERSION_ID"))
-      config.versionId = std::string(versionIdEnv);
-
-    std::string nvqcConfig;
-    // Allow someone to tweak this with an environment variable
-    if (auto creds = std::getenv("CUDAQ_NVQC_CREDENTIALS"))
-      nvqcConfig = std::string(creds);
-    else
-      nvqcConfig = std::string(getenv("HOME")) + std::string("/.nvqc_config");
-    if (cudaq::fileExists(nvqcConfig)) {
-      std::ifstream stream(nvqcConfig);
-      std::string contents((std::istreambuf_iterator<char>(stream)),
-                           std::istreambuf_iterator<char>());
-      std::vector<std::string> lines;
-      lines = cudaq::split(contents, '\n');
-      for (const std::string &l : lines) {
-        std::vector<std::string> keyAndValue = cudaq::split(l, ':');
-        if (keyAndValue.size() != 2)
-          throw std::runtime_error("Ill-formed configuration file (" +
-                                   nvqcConfig +
-                                   "). Key-value pairs must be in `<key> : "
-                                   "<value>` format. (One per line)");
-        cudaq::trim(keyAndValue[0]);
-        cudaq::trim(keyAndValue[1]);
-        if (config.apiKey.empty() &&
-            (keyAndValue[0] == "key" || keyAndValue[0] == "apikey"))
-          config.apiKey = keyAndValue[1];
-        if (config.functionId.empty() && (keyAndValue[0] == "function-id" ||
-                                          keyAndValue[0] == "Function ID"))
-          config.functionId = keyAndValue[1];
-        if (config.versionId.empty() &&
-            (keyAndValue[0] == "version-id" || keyAndValue[0] == "Version ID"))
-          config.versionId = keyAndValue[1];
-      }
-    }
-    return config;
-  }
-};
-
 } // namespace cudaq
diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h
index 70d483f665f..54821313251 100644
--- a/runtime/common/BaseRestRemoteClient.h
+++ b/runtime/common/BaseRestRemoteClient.h
@@ -12,7 +12,6 @@
 #include "common/Environment.h"
 #include "common/JsonConvert.h"
 #include "common/Logger.h"
-#include "common/NvqcConfig.h"
 #include "common/RemoteKernelExecutor.h"
 #include "common/RestClient.h"
 #include "common/RuntimeMLIR.h"
@@ -319,14 +318,11 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient {
 
   cudaq::RestRequest constructJobRequest(
       mlir::MLIRContext &mlirContext, cudaq::ExecutionContext &io_context,
-      cudaq::SerializedCodeExecutionContext *serializedCodeContext,
       const std::string &backendSimName, const std::string &kernelName,
       void (*kernelFunc)(void *), const void *kernelArgs,
       std::uint64_t argsSize, const std::vector<void *> *rawArgs) {
 
     cudaq::RestRequest request(io_context, version());
-    if (serializedCodeContext)
-      request.serializedCodeExecutionContext = *serializedCodeContext;
     request.entryPoint = kernelName;
     request.passes = serverPasses;
     request.format = cudaq::CodeFormat::MLIR;
@@ -362,7 +358,7 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient {
       request.entryPoint = stateIrPayload1.entryPoint;
       // Second kernel of the overlap calculation
       request.overlapKernel = stateIrPayload2;
-    } else if (serializedCodeContext == nullptr) {
+    } else {
       request.code =
           constructKernelPayload(mlirContext, kernelName, kernelArgs, argsSize,
                                  /*startingArgIdx=*/0, rawArgs);
@@ -389,7 +385,6 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient {
   virtual bool
   sendRequest(mlir::MLIRContext &mlirContext,
               cudaq::ExecutionContext &io_context,
-              cudaq::SerializedCodeExecutionContext *serializedCodeContext,
               cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer,
               const int vqe_n_params, const std::string &backendSimName,
               const std::string &kernelName, void (*kernelFunc)(void *),
@@ -406,13 +401,12 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient {
         return constructVQEJobRequest(mlirContext, io_context, backendSimName,
                                       kernelName, kernelArgs, vqe_gradient,
                                       *vqe_optimizer, vqe_n_params, rawArgs);
-      return constructJobRequest(mlirContext, io_context, serializedCodeContext,
-                                 backendSimName, kernelName, kernelFunc,
-                                 kernelArgs, argsSize, rawArgs);
+      return constructJobRequest(mlirContext, io_context, backendSimName,
+                                 kernelName, kernelFunc, kernelArgs, argsSize,
+                                 rawArgs);
     }();
 
-    if (request.code.empty() && (serializedCodeContext == nullptr ||
-                                 serializedCodeContext->source_code.empty())) {
+    if (request.code.empty()) {
       if (optionalErrorMsg)
         *optionalErrorMsg =
             std::string(
@@ -485,833 +479,4 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient {
   }
 };
 
-/// Base class for the REST client submitting jobs to NVCF-hosted `cudaq-qpud`
-/// service.
-class BaseNvcfRuntimeClient : public cudaq::BaseRemoteRestRuntimeClient {
-protected:
-  // None: Don't log; Info: basic info; Trace: Timing data per invocation.
-  enum class LogLevel : int { None = 0, Info, Trace };
-  // NVQC logging level
-  // Enabled high-level info log by default (can be set by an environment
-  // variable)
-  LogLevel m_logLevel = LogLevel::Info;
-  // API key for authentication
-  std::string m_apiKey;
-  // Rest client to send HTTP request
-  cudaq::RestClient m_restClient;
-  // NVCF function Id to use
-  std::string m_functionId;
-  // NVCF version Id of that function to use
-  std::string m_functionVersionId;
-  // Information about function deployment from environment variable info.
-  struct FunctionEnvironments {
-    // These configs should be positive numbers.
-    int majorVersion{-1};
-    int minorVersion{-1};
-    int numGpus{-1};
-    int timeoutSecs{-1};
-    int hasSerializedCodeExec{-1}; // -1 means unknown; 0 = false, 1 = true
-    std::string name;
-  };
-  // Available functions: function Id to info mapping
-  using DeploymentInfo = std::unordered_map<std::string, FunctionEnvironments>;
-  DeploymentInfo m_availableFuncs;
-  const std::string CUDAQ_NCA_ID = cudaq::getNvqcNcaId();
-  // Base URL for NVCF APIs
-  static inline const std::string m_baseUrl = "api.nvcf.nvidia.com/v2";
-  // Return the URL to invoke the function specified in this client
-  std::string nvcfInvocationUrl() const {
-    return fmt::format("https://{}/nvcf/exec/functions/{}/versions/{}",
-                       m_baseUrl, m_functionId, m_functionVersionId);
-  }
-  // Return the URL to request an Asset upload link
-  std::string nvcfAssetUrl() const {
-    return fmt::format("https://{}/nvcf/assets", m_baseUrl);
-  }
-  // Return the URL to retrieve status/result of an NVCF request.
-  std::string
-  nvcfInvocationStatus(const std::string &invocationRequestId) const {
-    return fmt::format("https://{}/nvcf/exec/status/{}", m_baseUrl,
-                       invocationRequestId);
-  }
-  // Construct the REST headers for calling NVCF REST APIs
-  std::map<std::string, std::string> getHeaders() const {
-    std::map<std::string, std::string> header{
-        {"Authorization", fmt::format("Bearer {}", m_apiKey)},
-        {"Content-type", "application/json"}};
-    return header;
-  };
-  // Helper to retrieve the list of all available versions of the specified
-  // function Id.
-  std::vector<cudaq::NvcfFunctionVersionInfo> getFunctionVersions() {
-    auto headers = getHeaders();
-    auto versionDataJs = m_restClient.get(
-        fmt::format("https://{}/nvcf/functions/{}", m_baseUrl, m_functionId),
-        "/versions", headers, /*enableSsl=*/true);
-    CUDAQ_INFO("Version data: {}", versionDataJs.dump());
-    std::vector<cudaq::NvcfFunctionVersionInfo> versions;
-    versionDataJs["functions"].get_to(versions);
-    return versions;
-  }
-  DeploymentInfo
-  getAllAvailableDeployments(const std::string &functionOverride,
-                             const std::string &versionOverride) {
-    auto headers = getHeaders();
-    auto allVisibleFunctions =
-        m_restClient.get(fmt::format("https://{}/nvcf/functions", m_baseUrl),
-                         "", headers, /*enableSsl=*/true);
-    const std::string cudaqNvcfFuncNamePrefix = "cuda_quantum";
-    DeploymentInfo info;
-
-    // NCA ID Precedence order is:
-    // 1. CUDAQ_NCA_ID if it was specifically overriden
-    // 2. Dev org NCA ID if active dev functions are visible with selected key
-    // 3. Production NCA ID
-    const std::string ncaIdToSearch = [&]() {
-      // Check for override
-      if (isNvqcNcaIdOverridden())
-        return CUDAQ_NCA_ID;
-      // Check to see if dev NCA ID functions are available
-      for (auto funcInfo : allVisibleFunctions["functions"]) {
-        if (funcInfo["ncaId"].get<std::string>() ==
-                std::string(DEV_NVQC_NCA_ID) &&
-            funcInfo["status"].get<std::string>() == "ACTIVE" &&
-            funcInfo["name"].get<std::string>().starts_with(
-                cudaqNvcfFuncNamePrefix)) {
-          return std::string(DEV_NVQC_NCA_ID);
-        }
-      }
-      // Fallback on production NCA ID
-      return CUDAQ_NCA_ID;
-    }();
-
-    // Only add functions that are the latest minor version for the major
-    // version matched by the client.
-    // I.e. If client 1.x sees server 1.2 and 1.3, choose 1.3.
-    int highestMinorVersion = 0;
-    for (auto funcInfo : allVisibleFunctions["functions"]) {
-      bool matchesOverride =
-          funcInfo["id"].get<std::string>() == functionOverride ||
-          funcInfo["versionId"].get<std::string>() == versionOverride;
-      bool matchesWithoutOverride =
-          funcInfo["ncaId"].get<std::string>() == ncaIdToSearch &&
-          funcInfo["status"].get<std::string>() == "ACTIVE" &&
-          funcInfo["name"].get<std::string>().starts_with(
-              cudaqNvcfFuncNamePrefix);
-      if (matchesOverride || matchesWithoutOverride) {
-        const auto containerEnvs = [&]() -> FunctionEnvironments {
-          FunctionEnvironments envs;
-          // Function name convention:
-          // Example: cuda_quantum_v1_t3600_8x
-          //          ------------  -  ---- -
-          //            Prefix      |    |  |
-          //              Version __|    |  |
-          //           Timeout (secs)  __|  |
-          //              Number of GPUs  __|
-          // Also supported: cuda_quantum_v1-1_t3600_8x
-          // Also supported: cuda_quantum_suffix_v1-1_t3600_8x
-          const std::regex funcNameRegex(
-              R"(^cuda_quantum_.*v([\d\-]+)_t(\d+)_(\d+)x$)");
-          // The first match is the whole string.
-          constexpr std::size_t expectedNumMatches = 4;
-          std::smatch baseMatch;
-          const std::string fname = funcInfo["name"].get<std::string>();
-          auto getMajorMinorVersion = [](const std::string &versionStr) {
-            std::size_t pos = versionStr.find('-');
-            int majorVersion = 0;
-            int minorVersion = 0;
-            if (pos != std::string::npos) {
-              majorVersion = std::stoi(versionStr.substr(0, pos));
-              minorVersion = std::stoi(versionStr.substr(pos + 1));
-            } else {
-              // If it doesn't say x.y, then assume it is x.0
-              majorVersion = std::stoi(versionStr);
-              minorVersion = 0;
-            }
-            return std::make_pair(majorVersion, minorVersion);
-          };
-          // If the function name matches 'Production' naming convention,
-          // retrieve deployment information from the name.
-          envs.name = fname;
-          if (std::regex_match(fname, baseMatch, funcNameRegex) &&
-              baseMatch.size() == expectedNumMatches) {
-            std::tie(envs.majorVersion, envs.minorVersion) =
-                getMajorMinorVersion(baseMatch[1].str());
-            envs.timeoutSecs = std::stoi(baseMatch[2].str());
-            envs.numGpus = std::stoi(baseMatch[3].str());
-            envs.hasSerializedCodeExec =
-                fname.starts_with("cuda_quantum_remote_py") ? 1 : 0;
-          } else if (funcInfo.contains("containerEnvironment")) {
-            // Otherwise, retrieve the info from deployment configurations.
-            // TODO: at some point, we may want to consolidate these two paths
-            // (name vs. meta-data). We keep it here since function metadata
-            // (similar to `containerEnvironment`) will be supported in the near
-            // future.
-            // Convert to unordered_map
-            std::unordered_map<std::string, std::string> containerEnvironment;
-            for (auto it : funcInfo["containerEnvironment"])
-              containerEnvironment[it["key"].get<std::string>()] =
-                  it["value"].get<std::string>();
-            // Fetch values
-            const auto getIntIfFound = [&](const std::string &envKey,
-                                           int &varToSet) {
-              if (auto it = containerEnvironment.find(envKey);
-                  it != containerEnvironment.end())
-                varToSet = std::stoi(it->second);
-            };
-            getIntIfFound("NUM_GPUS", envs.numGpus);
-            getIntIfFound("WATCHDOG_TIMEOUT_SEC", envs.timeoutSecs);
-            getIntIfFound("CUDAQ_SER_CODE_EXEC", envs.hasSerializedCodeExec);
-            if (auto it =
-                    containerEnvironment.find("NVQC_REST_PAYLOAD_VERSION");
-                it != containerEnvironment.end())
-              std::tie(envs.majorVersion, envs.minorVersion) =
-                  getMajorMinorVersion(it->second);
-          }
-
-          // Note: invalid/uninitialized FunctionEnvironments will be
-          // discarded, i.e., not added to the valid deployment list, since the
-          // API version number will not match.
-          return envs;
-        }();
-
-        // Only add functions that match client version, unless overridden
-        if (matchesOverride || containerEnvs.majorVersion == version()) {
-          info[funcInfo["id"].get<std::string>()] = containerEnvs;
-          highestMinorVersion =
-              std::max(highestMinorVersion, containerEnvs.minorVersion);
-        }
-      }
-    }
-
-    // Now make a pass through info and remove all the lower minor versions.
-    if (functionOverride.empty()) {
-      std::vector<std::string> funcsToRemove;
-      for (auto &iter : info)
-        if (iter.second.minorVersion != highestMinorVersion)
-          funcsToRemove.push_back(iter.first);
-      for (auto &funcToRemove : funcsToRemove)
-        info.erase(funcToRemove);
-    }
-
-    return info;
-  }
-
-  std::optional<std::size_t> getQueueDepth(const std::string &funcId,
-                                           const std::string &verId) {
-    auto headers = getHeaders();
-    try {
-      auto queueDepthInfo = m_restClient.get(
-          fmt::format("https://{}/nvcf/queues/functions/{}/versions/{}",
-                      m_baseUrl, funcId, verId),
-          "", headers, /*enableSsl=*/true);
-
-      if (queueDepthInfo.contains("functionId") &&
-          queueDepthInfo["functionId"] == funcId &&
-          queueDepthInfo.contains("queues")) {
-        for (auto queueInfo : queueDepthInfo["queues"]) {
-          if (queueInfo.contains("functionVersionId") &&
-              queueInfo["functionVersionId"] == verId &&
-              queueInfo.contains("queueDepth")) {
-            return queueInfo["queueDepth"].get<std::size_t>();
-          }
-        }
-      }
-      return std::nullopt;
-    } catch (...) {
-      // Make this non-fatal. Returns null, i.e., unknown.
-      return std::nullopt;
-    }
-  }
-
-  // Fetch the queue position of the given request ID. If the job has already
-  // begun execution, it will return `std::nullopt`.
-  std::optional<std::size_t> getQueuePosition(const std::string &requestId) {
-    auto headers = getHeaders();
-    try {
-      auto queuePos =
-          m_restClient.get(fmt::format("https://{}/nvcf/queues/{}/position",
-                                       m_baseUrl, requestId),
-                           "", headers, /*enableSsl=*/true);
-      if (queuePos.contains("positionInQueue"))
-        return queuePos["positionInQueue"].get<std::size_t>();
-      // When the job enters execution, it returns "status": 400 and "title":
-      // "Bad Request", so translate that to `std::nullopt`.
-      return std::nullopt;
-    } catch (...) {
-      // Make this non-fatal. Returns null, i.e., unknown.
-      return std::nullopt;
-    }
-  }
-
-public:
-  virtual void setConfig(
-      const std::unordered_map<std::string, std::string> &configs) override {
-    {
-      // Check if user set a specific log level (e.g., disable logging)
-      if (auto logConfigEnv = std::getenv("NVQC_LOG_LEVEL")) {
-        auto logConfig = std::string(logConfigEnv);
-        std::transform(logConfig.begin(), logConfig.end(), logConfig.begin(),
-                       [](unsigned char c) { return std::tolower(c); });
-        if (logConfig == "0" || logConfig == "off" || logConfig == "false" ||
-            logConfig == "no" || logConfig == "none")
-          m_logLevel = LogLevel::None;
-        if (logConfig == "trace")
-          m_logLevel = LogLevel::Trace;
-        if (logConfig == "info")
-          m_logLevel = LogLevel::Info;
-      }
-    }
-    {
-      const auto apiKeyIter = configs.find("api-key");
-      if (apiKeyIter != configs.end())
-        m_apiKey = apiKeyIter->second;
-      if (m_apiKey.empty())
-        throw std::runtime_error("No NVQC API key is provided.");
-    }
-
-    // Save some iterators to be used later
-    const auto funcIdIter = configs.find("function-id");
-    const auto versionIdIter = configs.find("version-id");
-    const auto nGpusIter = configs.find("ngpus");
-    // Default is 1 GPU if none specified
-    const int numGpusRequested =
-        (nGpusIter != configs.end()) ? std::stoi(nGpusIter->second) : 1;
-
-    // Override strings for function id and function version
-    const auto functionOverride = [&]() -> std::string {
-      if (funcIdIter == configs.end())
-        return "";
-      return funcIdIter->second;
-    }();
-    const auto versionOverride = [&]() -> std::string {
-      if (versionIdIter == configs.end())
-        return "";
-      return versionIdIter->second;
-    }();
-
-    // Pass the optional overrides to getAllAvailableDeployments so that it will
-    // return information about functions if they are manually specified by the
-    // user, even if they don't conform to naming conventions.
-    m_availableFuncs =
-        getAllAvailableDeployments(functionOverride, versionOverride);
-    for (const auto &[funcId, info] : m_availableFuncs)
-      CUDAQ_INFO("Function Id {} (API version {}.{}) has {} GPUs.", funcId,
-                 info.majorVersion, info.minorVersion, info.numGpus);
-    {
-      if (funcIdIter != configs.end()) {
-        // User overrides a specific function Id.
-        m_functionId = funcIdIter->second;
-        if (m_logLevel > LogLevel::None) {
-          // Print out the configuration
-          cudaq::log("Submitting jobs to NVQC using function Id {}.",
-                     m_functionId);
-        }
-      } else {
-        // Output an error message if no deployments can be found.
-        if (m_availableFuncs.empty())
-          throw std::runtime_error(
-              "Unable to find any active NVQC deployments for this key. Check "
-              "if you see any active functions on ngc.nvidia.com in the cloud "
-              "functions tab, or try to regenerate the key.");
-
-        // Determine the function Id based on the number of GPUs
-        CUDAQ_INFO("Looking for an NVQC deployment that has {} GPUs.",
-                   numGpusRequested);
-        for (const auto &[funcId, info] : m_availableFuncs) {
-          if (info.numGpus == numGpusRequested) {
-            m_functionId = funcId;
-            if (m_logLevel > LogLevel::None) {
-              // Print out the configuration
-              cudaq::log(
-                  "Submitting jobs to NVQC service with {} GPU(s). Max "
-                  "execution time: {} seconds (excluding queue wait time).",
-                  info.numGpus, info.timeoutSecs);
-            }
-            break;
-          }
-        }
-        if (m_functionId.empty()) {
-          // Make sure that we sort the GPU count list
-          std::set<std::size_t> gpuCounts;
-          for (const auto &[funcId, info] : m_availableFuncs) {
-            gpuCounts.emplace(info.numGpus);
-          }
-          std::stringstream ss;
-          ss << "Unable to find NVQC deployment with " << numGpusRequested
-             << " GPUs.\nAvailable deployments have ";
-          ss << fmt::format("{}", gpuCounts) << " GPUs.\n";
-          ss << "Please check your 'ngpus' value (Python) or `--nvqc-ngpus` "
-                "value (C++).\n";
-          throw std::runtime_error(ss.str());
-        }
-      }
-    }
-    {
-      auto versions = getFunctionVersions();
-      // Check if a version Id is set
-      if (versionIdIter != configs.end()) {
-        m_functionVersionId = versionIdIter->second;
-        // Do a sanity check that this is an active version (i.e., usable).
-        const auto versionInfoIter =
-            std::find_if(versions.begin(), versions.end(),
-                         [&](const cudaq::NvcfFunctionVersionInfo &info) {
-                           return info.versionId == m_functionVersionId;
-                         });
-        // Invalid version Id.
-        if (versionInfoIter == versions.end())
-          throw std::runtime_error(
-              fmt::format("Version Id '{}' is not valid for NVQC function Id "
-                          "'{}'. Please check your NVQC configurations.",
-                          m_functionVersionId, m_functionId));
-        // The version is not active/deployed.
-        if (versionInfoIter->status != cudaq::FunctionStatus::ACTIVE)
-          throw std::runtime_error(
-              fmt::format("Version Id '{}' of NVQC function Id "
-                          "'{}' is not ACTIVE. Please check your NVQC "
-                          "configurations or contact support.",
-                          m_functionVersionId, m_functionId));
-      } else {
-        // No version Id is set. Just pick the latest version of the function
-        // Id. The timestamp is an ISO 8601 string, e.g.,
-        // 2024-01-25T04:14:46.360Z. To sort it from latest to oldest, we can
-        // use string sorting.
-        std::sort(versions.begin(), versions.end(),
-                  [](const auto &a, const auto &b) {
-                    return a.createdAt > b.createdAt;
-                  });
-        for (const auto &versionInfo : versions)
-          CUDAQ_INFO("Found version Id {}, created at {}",
-                     versionInfo.versionId, versionInfo.createdAt);
-
-        auto activeVersions =
-            versions |
-            std::ranges::views::filter(
-                [](const cudaq::NvcfFunctionVersionInfo &info) {
-                  return info.status == cudaq::FunctionStatus::ACTIVE;
-                });
-
-        if (activeVersions.empty())
-          throw std::runtime_error(
-              fmt::format("No active version available for NVQC function Id "
-                          "'{}'. Please check your function Id.",
-                          m_functionId));
-
-        m_functionVersionId = activeVersions.front().versionId;
-        CUDAQ_INFO("Selected the latest version Id {} for function Id {}",
-                   m_functionVersionId, m_functionId);
-      }
-    }
-  }
-
-  // The NVCF version of this function needs to dynamically determine the remote
-  // capabilities based on the servers currently deployed.
-  virtual RemoteCapabilities getRemoteCapabilities() const override {
-    // Allow the user to override to all true.
-    if (getEnvBool("CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE", false))
-      return RemoteCapabilities(/*initValues=*/true);
-    // Else determine capabilities based on server deployment info.
-    RemoteCapabilities capabilities(/*initValues=*/false);
-    if (!m_availableFuncs.contains(m_functionId)) {
-      // The user has manually overridden an NVQC function selection, but it
-      // wasn't found in m_availableFuncs.
-      CUDAQ_INFO(
-          "Function id overriden ({}) but cannot retrieve its remote "
-          "capabilities because a deployment for it was not found. Will assume "
-          "all optional remote capabilities are unsupported. You can set "
-          "CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 if you wish to override "
-          "this.",
-          m_functionId);
-      return capabilities;
-    }
-    const auto &funcEnv = m_availableFuncs.at(m_functionId);
-    capabilities.serializedCodeExec = funcEnv.hasSerializedCodeExec > 0;
-    capabilities.stateOverlap =
-        funcEnv.majorVersion > 1 ||
-        (funcEnv.majorVersion >= 1 && funcEnv.minorVersion >= 1);
-    capabilities.vqe = funcEnv.majorVersion > 1 ||
-                       (funcEnv.majorVersion >= 1 && funcEnv.minorVersion >= 1);
-    capabilities.isRemoteSimulator = true;
-    return capabilities;
-  }
-
-  virtual bool
-  sendRequest(mlir::MLIRContext &mlirContext,
-              cudaq::ExecutionContext &io_context,
-              cudaq::SerializedCodeExecutionContext *serializedCodeContext,
-              cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer,
-              const int vqe_n_params, const std::string &backendSimName,
-              const std::string &kernelName, void (*kernelFunc)(void *),
-              const void *kernelArgs, std::uint64_t argsSize,
-              std::string *optionalErrorMsg,
-              const std::vector<void *> *rawArgs) override {
-    if (isDisallowed(io_context.name))
-      throw std::runtime_error(
-          io_context.name +
-          " operation is not supported with cudaq target nvqc!");
-
-    static const std::vector<std::string> MULTI_GPU_BACKENDS = {
-        "tensornet", "nvidia-mgpu", "nvidia-mqpu"};
-    {
-      // Print out a message if users request a multi-GPU deployment while
-      // setting the backend to a single-GPU one. Only print once in case this
-      // is a execution loop.
-      static bool printOnce = false;
-      if (m_availableFuncs[m_functionId].numGpus > 1 &&
-          std::find(MULTI_GPU_BACKENDS.begin(), MULTI_GPU_BACKENDS.end(),
-                    backendSimName) == MULTI_GPU_BACKENDS.end() &&
-          !printOnce) {
-        std::cout << "The requested backend simulator (" << backendSimName
-                  << ") is not capable of using all "
-                  << m_availableFuncs[m_functionId].numGpus
-                  << " GPUs requested.\n";
-        std::cout << "Only one GPU will be used for simulation.\n";
-        std::cout << "Please refer to CUDA-Q documentation for a list of "
-                     "multi-GPU capable simulator backends.\n";
-        printOnce = true;
-      }
-    }
-    // Construct the base `cudaq-qpud` request payload.
-    cudaq::RestRequest request = [&]() {
-      if (vqe_n_params > 0)
-        return constructVQEJobRequest(mlirContext, io_context, backendSimName,
-                                      kernelName, kernelArgs, vqe_gradient,
-                                      *vqe_optimizer, vqe_n_params, rawArgs);
-      return constructJobRequest(mlirContext, io_context, serializedCodeContext,
-                                 backendSimName, kernelName, kernelFunc,
-                                 kernelArgs, argsSize, rawArgs);
-    }();
-
-    if (request.code.empty() && (serializedCodeContext == nullptr ||
-                                 serializedCodeContext->source_code.empty())) {
-      if (optionalErrorMsg)
-        *optionalErrorMsg =
-            std::string(
-                "Failed to construct/retrieve kernel IR for kernel named ") +
-            kernelName;
-      return false;
-    }
-
-    if (request.format != cudaq::CodeFormat::MLIR &&
-        serializedCodeContext == nullptr) {
-      // The `.config` file may have been tampered with.
-      std::cerr << "Internal error: unsupported kernel IR detected.\nThis may "
-                   "indicate a corrupted CUDA-Q installation.";
-      std::abort();
-    }
-
-    // Max message size that we can send in the body
-    constexpr std::size_t MAX_SIZE_BYTES = 250000; // 250 KB
-    json requestJson;
-    auto jobHeader = getHeaders();
-    std::optional<std::string> assetId;
-    // Make sure that we delete the asset that we've uploaded when this
-    // `sendRequest` function exits (success or not).
-    ScopeExit deleteAssetOnExit([&]() {
-      if (assetId.has_value()) {
-        CUDAQ_INFO("Deleting NVQC Asset Id {}", assetId.value());
-        auto headers = getHeaders();
-        m_restClient.del(nvcfAssetUrl(), std::string("/") + assetId.value(),
-                         headers, /*enableLogging=*/false, /*enableSsl=*/true);
-      }
-    });
-
-    // Upload this request as an NVCF asset if needed.
-    // Note: The majority of the payload is the IR code. Hence, first checking
-    // if it exceed the size limit. Otherwise, if the code is small, make sure
-    // that the total payload doesn't exceed that limit as well by constructing
-    // a temporary JSON object of the full payload.
-    if (request.code.size() > MAX_SIZE_BYTES ||
-        json(request).dump().size() > MAX_SIZE_BYTES) {
-      assetId = uploadRequest(request);
-      if (!assetId.has_value()) {
-        if (optionalErrorMsg)
-          *optionalErrorMsg = "Failed to upload request to NVQC as NVCF assets";
-        return false;
-      }
-      json requestBody;
-      // Use NVCF `inputAssetReferences` field to specify the asset that needs
-      // to be pulled in when invoking this function.
-      requestBody["inputAssetReferences"] =
-          std::vector<std::string>{assetId.value()};
-      requestJson["requestBody"] = requestBody;
-      requestJson["requestHeader"] = requestBody;
-    } else {
-      requestJson["requestBody"] = request;
-    }
-
-    try {
-      // Making the request
-      CUDAQ_DBG("Sending NVQC request to {}", nvcfInvocationUrl());
-      auto lastQueuePos = std::numeric_limits<std::size_t>::max();
-
-      if (m_logLevel > LogLevel::Info)
-        cudaq::log("Posting NVQC request now");
-      auto resultJs =
-          m_restClient.post(nvcfInvocationUrl(), "", requestJson, jobHeader,
-                            /*enableLogging=*/false, /*enableSsl=*/true);
-      CUDAQ_DBG("Response: {}", resultJs.dump());
-
-      // Call getQueuePosition() until we're at the front of the queue. If log
-      // level is "none", then skip all this because we don't need to show the
-      // status to the user, and we don't need to know the precise
-      // requestStartTime.
-      if (m_logLevel > LogLevel::None) {
-        if (resultJs.contains("status") &&
-            resultJs["status"] == "pending-evaluation") {
-          const std::string reqId = resultJs["reqId"];
-          auto queuePos = getQueuePosition(reqId);
-          while (queuePos.has_value() && queuePos.value() > 0) {
-            if (queuePos.value() != lastQueuePos) {
-              // Position in queue has changed.
-              if (lastQueuePos == std::numeric_limits<std::size_t>::max()) {
-                // If lastQueuePos hasn't been populated with a true value yet,
-                // it means we have not fetched the queue depth or displayed
-                // anything to the user yet.
-                cudaq::log("Number of jobs ahead of yours in the NVQC queue: "
-                           "{}. Your job will start executing once it gets to "
-                           "the head of the queue.",
-                           queuePos.value());
-              } else {
-                cudaq::log("Position in queue for request {} has changed from "
-                           "{} to {}",
-                           reqId, lastQueuePos, queuePos.value());
-              }
-              lastQueuePos = queuePos.value();
-            }
-            std::this_thread::sleep_for(std::chrono::seconds(1));
-            queuePos = getQueuePosition(reqId);
-          }
-        }
-        if (lastQueuePos != std::numeric_limits<std::size_t>::max())
-          cudaq::log("Your job is finished waiting in the queue and will now "
-                     "begin execution.");
-      }
-
-      const auto requestStartTime = std::chrono::system_clock::now();
-      bool needToPrintNewline = false;
-      while (resultJs.contains("status") &&
-             resultJs["status"] == "pending-evaluation") {
-        const std::string reqId = resultJs["reqId"];
-        const int elapsedTimeSecs =
-            std::chrono::duration_cast<std::chrono::seconds>(
-                std::chrono::system_clock::now() - requestStartTime)
-                .count();
-        // Warns if the remaining time is less than this threshold.
-        constexpr int TIMEOUT_WARNING_SECS = 5 * 60; // 5 minutes.
-        const int remainingSecs =
-            m_availableFuncs[m_functionId].timeoutSecs - elapsedTimeSecs;
-        std::string additionalInfo;
-        if (remainingSecs < 0)
-          fmt::format_to(std::back_inserter(additionalInfo),
-                         ". Exceeded wall time limit ({} seconds), but time "
-                         "spent waiting in queue is not counted. Proceeding.",
-                         m_availableFuncs[m_functionId].timeoutSecs);
-        else if (remainingSecs < TIMEOUT_WARNING_SECS)
-          fmt::format_to(std::back_inserter(additionalInfo),
-                         ". Approaching the wall time limit ({} seconds). "
-                         "Remaining time: {} seconds.",
-                         m_availableFuncs[m_functionId].timeoutSecs,
-                         remainingSecs);
-        // If NVQC log level is high enough or if we have additional info to
-        // print, then print the full message; else print a simple "."
-        if (m_logLevel > LogLevel::Info || !additionalInfo.empty()) {
-          if (needToPrintNewline)
-            std::cout << "\n";
-          needToPrintNewline = false;
-          cudaq::log("Polling NVQC result data for Request Id {}{}", reqId,
-                     additionalInfo);
-        } else if (m_logLevel > LogLevel::None) {
-          std::cout << ".";
-          std::cout.flush();
-          needToPrintNewline = true;
-        }
-        // Wait 1 sec then poll the result
-        std::this_thread::sleep_for(std::chrono::seconds(1));
-        resultJs = m_restClient.get(nvcfInvocationStatus(reqId), "", jobHeader,
-                                    /*enableSsl=*/true);
-      }
-
-      if (needToPrintNewline)
-        std::cout << "\n";
-
-      if (!resultJs.contains("status") || resultJs["status"] != "fulfilled") {
-        if (optionalErrorMsg)
-          *optionalErrorMsg =
-              std::string(
-                  "Failed to complete the simulation request. Status: ") +
-              (resultJs.contains("status") ? std::string(resultJs["status"])
-                                           : std::string("unknown"));
-        return false;
-      }
-
-      // If there is a `responseReference` field, this is a large response.
-      // Hence, need to download result .zip file from the provided URL.
-      if (resultJs.contains("responseReference")) {
-        // This is a large response that needs to be downloaded
-        const std::string downloadUrl = resultJs["responseReference"];
-        const std::string reqId = resultJs["reqId"];
-        CUDAQ_INFO("Download result for Request Id {} at {}", reqId,
-                   downloadUrl);
-        llvm::SmallString<32> tempDir;
-        llvm::sys::path::system_temp_directory(/*ErasedOnReboot*/ true,
-                                               tempDir);
-        std::filesystem::path resultFilePath =
-            std::filesystem::path(tempDir.c_str()) / (reqId + ".zip");
-        m_restClient.download(downloadUrl, resultFilePath.string(),
-                              /*enableLogging=*/false, /*enableSsl=*/true);
-        CUDAQ_INFO("Downloaded zip file {}", resultFilePath.string());
-        std::filesystem::path unzipDir =
-            std::filesystem::path(tempDir.c_str()) / reqId;
-        // Unzip the response
-        cudaq::utils::unzip(resultFilePath, unzipDir);
-        std::filesystem::path resultJsonFile =
-            unzipDir / (reqId + "_result.json");
-        if (!std::filesystem::exists(resultJsonFile)) {
-          if (optionalErrorMsg)
-            *optionalErrorMsg =
-                "Unexpected response file: missing the result JSON file.";
-          return false;
-        }
-        std::ifstream t(resultJsonFile.string());
-        std::string resultJsonFromFile((std::istreambuf_iterator<char>(t)),
-                                       std::istreambuf_iterator<char>());
-        try {
-          resultJs["response"] = json::parse(resultJsonFromFile);
-        } catch (...) {
-          if (optionalErrorMsg)
-            *optionalErrorMsg =
-                fmt::format("Failed to parse the response JSON from file '{}'.",
-                            resultJsonFile.string());
-          return false;
-        }
-        CUDAQ_INFO(
-            "Delete response zip file {} and its inflated contents in {}",
-            resultFilePath.c_str(), unzipDir.c_str());
-        std::filesystem::remove(resultFilePath);
-        std::filesystem::remove_all(unzipDir);
-      }
-
-      if (!resultJs.contains("response")) {
-        if (optionalErrorMsg)
-          *optionalErrorMsg = "Unexpected response from the NVQC invocation. "
-                              "Missing the 'response' field.";
-        return false;
-      }
-      if (!resultJs["response"].contains("executionContext")) {
-        if (optionalErrorMsg) {
-          if (resultJs["response"].contains("errorMessage")) {
-            *optionalErrorMsg = fmt::format(
-                "NVQC failed to handle request. Server error: {}",
-                resultJs["response"]["errorMessage"].get<std::string>());
-          } else {
-            *optionalErrorMsg =
-                "Unexpected response from the NVQC response. "
-                "Missing the required field 'executionContext'.";
-          }
-        }
-        return false;
-      }
-      if (m_logLevel > LogLevel::None &&
-          resultJs["response"].contains("executionInfo")) {
-        try {
-          // We only print GPU device info once if logging is not disabled.
-          static bool printDeviceInfoOnce = false;
-          cudaq::NvcfExecutionInfo info;
-          resultJs["response"]["executionInfo"].get_to(info);
-          if (!printDeviceInfoOnce) {
-            std::size_t totalWidth = 50;
-            std::string message = "NVQC Device Info";
-            auto strLen = message.size() + 2; // Account for surrounding spaces
-            auto leftSize = (totalWidth - strLen) / 2;
-            auto rightSize = (totalWidth - strLen) - leftSize;
-            std::string leftSide(leftSize, '=');
-            std::string rightSide(rightSize, '=');
-            auto &platform = cudaq::get_platform();
-            std::ostream &os =
-                platform.getLogStream() ? *platform.getLogStream() : std::cout;
-            os << fmt::format("\n{} {} {}\n", leftSide, message, rightSide);
-            os << fmt::format("GPU Device Name: \"{}\"\n",
-                              info.deviceProps.deviceName);
-            os << fmt::format(
-                "CUDA Driver Version / Runtime Version: {}.{} / {}.{}\n",
-                info.deviceProps.driverVersion / 1000,
-                (info.deviceProps.driverVersion % 100) / 10,
-                info.deviceProps.runtimeVersion / 1000,
-                (info.deviceProps.runtimeVersion % 100) / 10);
-            os << fmt::format("Total global memory (GB): {:.1f}\n",
-                              (float)(info.deviceProps.totalGlobalMemMbytes) /
-                                  1024.0);
-            os << fmt::format("Memory Clock Rate (MHz): {:.3f}\n",
-                              info.deviceProps.memoryClockRateMhz);
-            os << fmt::format("GPU Clock Rate (MHz): {:.3f}\n",
-                              info.deviceProps.clockRateMhz);
-            os << fmt::format("{}\n", std::string(totalWidth, '='));
-            // Only print this device info once.
-            printDeviceInfoOnce = true;
-          }
-
-          // If trace logging mode is enabled, log timing data for each request.
-          if (m_logLevel == LogLevel::Trace) {
-            fmt::print("\n===== NVQC Execution Timing ======\n");
-            fmt::print(" - Pre-processing: {} milliseconds \n",
-                       info.simulationStart - info.requestStart);
-            fmt::print(" - Execution: {} milliseconds \n",
-                       info.simulationEnd - info.simulationStart);
-            fmt::print("==================================\n");
-          }
-        } catch (...) {
-          fmt::print("Unable to parse NVQC execution info metadata.\n");
-        }
-      }
-      resultJs["response"]["executionContext"].get_to(io_context);
-      return true;
-    } catch (std::exception &e) {
-      if (optionalErrorMsg)
-        *optionalErrorMsg = e.what();
-      return false;
-    } catch (...) {
-      std::string exType = __cxxabiv1::__cxa_current_exception_type()->name();
-      auto demangledPtr =
-          __cxxabiv1::__cxa_demangle(exType.c_str(), nullptr, nullptr, nullptr);
-      if (demangledPtr && optionalErrorMsg) {
-        std::string demangledName(demangledPtr);
-        *optionalErrorMsg = "Unhandled exception of type " + demangledName;
-      } else if (optionalErrorMsg) {
-        *optionalErrorMsg = "Unhandled exception of unknown type";
-      }
-      return false;
-    }
-  }
-
-  // Upload a job request as an NVCF asset.
-  // Return asset Id on success. Otherwise, return null.
-  std::optional<std::string>
-  uploadRequest(const cudaq::RestRequest &jobRequest) {
-    json requestJson;
-    requestJson["contentType"] = "application/json";
-    requestJson["description"] = "cudaq-nvqc-job";
-    try {
-      auto headers = getHeaders();
-      auto resultJs =
-          m_restClient.post(nvcfAssetUrl(), "", requestJson, headers,
-                            /*enableLogging=*/false, /*enableSsl=*/true);
-      const std::string uploadUrl = resultJs["uploadUrl"];
-      const std::string assetId = resultJs["assetId"];
-      CUDAQ_INFO("Upload NVQC job request as NVCF Asset Id {} to {}", assetId,
-                 uploadUrl);
-      std::map<std::string, std::string> uploadHeader;
-      // This must match the request to create the upload link
-      uploadHeader["Content-Type"] = "application/json";
-      uploadHeader["x-amz-meta-nvcf-asset-description"] = "cudaq-nvqc-job";
-      json jobRequestJs = jobRequest;
-      m_restClient.put(uploadUrl, "", jobRequestJs, uploadHeader,
-                       /*enableLogging=*/false, /*enableSsl=*/true);
-      return assetId;
-    } catch (...) {
-      return {};
-    }
-  }
-};
-
 } // namespace cudaq
diff --git a/runtime/common/JsonConvert.h b/runtime/common/JsonConvert.h
index 3340f2965dc..56de1bd3718 100644
--- a/runtime/common/JsonConvert.h
+++ b/runtime/common/JsonConvert.h
@@ -10,7 +10,6 @@
 #include "GPUInfo.h"
 #include "common/ExecutionContext.h"
 #include "common/FmtCore.h"
-#include "common/SerializedCodeExecutionContext.h"
 #include "cudaq/Support/Version.h"
 #include "cudaq/gradients.h"
 #include "cudaq/optimizers.h"
@@ -573,8 +572,6 @@ class RestRequest {
   // (2) Breaking changes in the runtime, which make JIT execution incompatible,
   //     e.g., changing the simulator names (.so files), changing signatures of
   //     QIR functions, etc.
-  // IMPORTANT: When a new version is defined, a new NVQC deployment will be
-  // needed.
   static constexpr std::size_t REST_PAYLOAD_VERSION = 1;
   static constexpr std::size_t REST_PAYLOAD_MINOR_VERSION = 1;
   RestRequest(ExecutionContext &context, int versionNumber)
@@ -613,10 +610,6 @@ class RestRequest {
   std::size_t version;
   // Version of the runtime client submitting the request.
   std::string clientVersion;
-  // The SerializedCodeExecutionContext to compile and to execute a limited
-  // subset of Python source code. The server will execute serialized code in
-  // this context
-  std::optional<SerializedCodeExecutionContext> serializedCodeExecutionContext;
 
   friend void to_json(json &j, const RestRequest &p) {
     TO_JSON_HELPER(version);
@@ -631,7 +624,6 @@ class RestRequest {
     TO_JSON_HELPER(seed);
     TO_JSON_HELPER(passes);
     TO_JSON_HELPER(clientVersion);
-    TO_JSON_OPT_HELPER(serializedCodeExecutionContext);
   }
 
   friend void from_json(const json &j, RestRequest &p) {
@@ -647,53 +639,7 @@ class RestRequest {
     FROM_JSON_HELPER(seed);
     FROM_JSON_HELPER(passes);
     FROM_JSON_HELPER(clientVersion);
-    FROM_JSON_OPT_HELPER(serializedCodeExecutionContext);
   }
 };
 
-/// NVCF function version status
-enum class FunctionStatus { ACTIVE, DEPLOYING, ERROR, INACTIVE, DELETED };
-NLOHMANN_JSON_SERIALIZE_ENUM(FunctionStatus,
-                             {
-                                 {FunctionStatus::ACTIVE, "ACTIVE"},
-                                 {FunctionStatus::DEPLOYING, "DEPLOYING"},
-                                 {FunctionStatus::ERROR, "ERROR"},
-                                 {FunctionStatus::INACTIVE, "INACTIVE"},
-                                 {FunctionStatus::DELETED, "DELETED"},
-                             });
-
-// Encapsulates a function version info
-// Note: we only parse a subset of required fields (always present). There may
-// be other fields, which are not required.
-struct NvcfFunctionVersionInfo {
-  // Function Id
-  std::string id;
-  // NVIDIA NGC Org Id (NCA Id)
-  std::string ncaId;
-  // Version Id
-  std::string versionId;
-  // Function name
-  std::string name;
-  // Status of this particular function version
-  FunctionStatus status;
-  // Function version creation timestamp (ISO 8601 string)
-  // e.g., "2024-02-05T00:09:51.154Z"
-  std::string createdAt;
-  NLOHMANN_DEFINE_TYPE_INTRUSIVE(NvcfFunctionVersionInfo, id, ncaId, versionId,
-                                 name, status, createdAt);
-};
-
-// NVCF execution metadata.
-struct NvcfExecutionInfo {
-  // Time point (milliseconds since epoch) when the request handling starts.
-  std::size_t requestStart;
-  // Time point (milliseconds since epoch) when the execution starts (JIT
-  // completed).
-  std::size_t simulationStart;
-  // Time point (milliseconds since epoch) when the execution finishes.
-  std::size_t simulationEnd;
-  CudaDeviceProperties deviceProps;
-  NLOHMANN_DEFINE_TYPE_INTRUSIVE(NvcfExecutionInfo, requestStart,
-                                 simulationStart, simulationEnd, deviceProps);
-};
 } // namespace cudaq
diff --git a/runtime/common/NvqcConfig.h b/runtime/common/NvqcConfig.h
deleted file mode 100644
index 1019e2a2201..00000000000
--- a/runtime/common/NvqcConfig.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/****************************************************************-*- C++ -*-****
- * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-#pragma once
-#include <stdlib.h>
-#include <string>
-namespace cudaq {
-
-static constexpr const char *NVQC_NCA_ID_ENV_VAR = "NVQC_NCA_ID";
-static constexpr const char *DEV_NVQC_NCA_ID =
-    "mZraB3k06kOd8aPhD6MVXJwBVZ67aXDLsfmDo4MYXDs";
-static constexpr const char *PROD_NVQC_NCA_ID =
-    "audj0Ow_82RT0BbiewKaIryIdZWiSrOqiiDSaA8w7a8";
-
-inline bool isNvqcNcaIdOverridden() {
-  return std::getenv(NVQC_NCA_ID_ENV_VAR) != nullptr;
-}
-
-inline std::string getNvqcNcaId() {
-  // Allows runtime override by environment variable.
-  if (auto ncaIdVar = std::getenv(NVQC_NCA_ID_ENV_VAR))
-    return std::string(ncaIdVar);
-  return PROD_NVQC_NCA_ID;
-}
-} // namespace cudaq
diff --git a/runtime/common/RemoteKernelExecutor.h b/runtime/common/RemoteKernelExecutor.h
index e703a989cb5..18b980e0d42 100644
--- a/runtime/common/RemoteKernelExecutor.h
+++ b/runtime/common/RemoteKernelExecutor.h
@@ -28,7 +28,6 @@ namespace cudaq {
 class ExecutionContext;
 class gradient;
 class optimizer;
-class SerializedCodeExecutionContext;
 
 /// Base interface encapsulating a CUDA-Q runtime server capable of
 /// running kernel IR code.
@@ -103,7 +102,6 @@ class RemoteRuntimeClient
   // if this was a local execution.
   virtual bool
   sendRequest(mlir::MLIRContext &mlirContext, ExecutionContext &io_context,
-              SerializedCodeExecutionContext *serializedCodeContext,
               cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer,
               const int vqe_n_params, const std::string &backendSimName,
               const std::string &kernelName, void (*kernelFunc)(void *),
diff --git a/runtime/common/SerializedCodeExecutionContext.h b/runtime/common/SerializedCodeExecutionContext.h
deleted file mode 100644
index 9a17591d50d..00000000000
--- a/runtime/common/SerializedCodeExecutionContext.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/****************************************************************-*- C++ -*-****
- * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
- * All rights reserved.                                                        *
- *                                                                             *
- * This source code and the accompanying materials are made available under    *
- * the terms of the Apache License 2.0 which accompanies this distribution.    *
- ******************************************************************************/
-
-#pragma once
-
-#include "nlohmann/json.hpp"
-#include <optional>
-#include <string>
-#include <vector>
-
-using json = nlohmann::json;
-
-namespace cudaq {
-
-/// @brief The SerializedCodeExecutionContext is an abstraction to indicate
-/// how a serialized code should be executed.
-class SerializedCodeExecutionContext {
-public:
-  /// @brief All variables visible to the Python \p source_code to execute, as a
-  /// JSON-like string object.
-  std::string scoped_var_dict;
-
-  /// @brief The source code of the objective function and its call as a string.
-  std::string source_code;
-
-  SerializedCodeExecutionContext() = default;
-  ~SerializedCodeExecutionContext() = default;
-
-  // Serialization
-  NLOHMANN_DEFINE_TYPE_INTRUSIVE(SerializedCodeExecutionContext,
-                                 scoped_var_dict, source_code);
-};
-} // namespace cudaq
diff --git a/runtime/cudaq/algorithms/gradient.h b/runtime/cudaq/algorithms/gradient.h
index d2c6a13a5a7..73998fa811c 100644
--- a/runtime/cudaq/algorithms/gradient.h
+++ b/runtime/cudaq/algorithms/gradient.h
@@ -76,7 +76,7 @@ class gradient {
   /// Take the quantum kernel and concrete arguments for all arguments except
   /// the first std::vector<double> argument, which is used for the variational
   /// parameters for the gradient. Serialize and save those arguments into this
-  /// object. (Useful for NVQC.)
+  /// object.
   template <typename QuantumKernel, typename... Args>
   void setArgs(QuantumKernel &kernel, Args &&...args) {
     static_assert(
diff --git a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp
index d52eadb0ecf..4315c30e236 100644
--- a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp
+++ b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp
@@ -17,14 +17,6 @@ class RemoteRestRuntimeClient : public cudaq::BaseRemoteRestRuntimeClient {
   RemoteRestRuntimeClient() : BaseRemoteRestRuntimeClient() {}
 };
 
-/// REST client submitting jobs to NVCF-hosted `cudaq-qpud` service.
-class NvcfRuntimeClient : public cudaq::BaseNvcfRuntimeClient {
-public:
-  /// @brief The constructor
-  NvcfRuntimeClient() : BaseNvcfRuntimeClient() {}
-};
-
 } // namespace
 
 CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, RemoteRestRuntimeClient, rest)
-CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, NvcfRuntimeClient, NVCF)
diff --git a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp
index 91127b05e83..21454199df1 100644
--- a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp
+++ b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp
@@ -169,68 +169,11 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer {
           std::string mutableReq;
           for (const auto &[k, v] : headers)
             CUDAQ_INFO("Request Header: {} : {}", k, v);
-          // Checking if this request has its body sent on as NVCF assets.
-          const auto dirIter = headers.find("NVCF-ASSET-DIR");
-          const auto assetIdIter = headers.find("NVCF-FUNCTION-ASSET-IDS");
-          if (dirIter != headers.end() && assetIdIter != headers.end()) {
-            const std::string dir = dirIter->second;
-            const auto ids = cudaq::split(assetIdIter->second, ',');
-            if (ids.size() != 1) {
-              json js;
-              js["status"] =
-                  fmt::format("Invalid asset Id data: {}", assetIdIter->second);
-              return js;
-            }
-            // Load the asset file
-            std::filesystem::path assetFile =
-                std::filesystem::path(dir) / ids[0];
-            if (!std::filesystem::exists(assetFile)) {
-              json js;
-              js["status"] = fmt::format("Unable to find the asset file {}",
-                                         assetFile.string());
-              return js;
-            }
-            std::ifstream t(assetFile);
-            std::string requestFromFile((std::istreambuf_iterator<char>(t)),
-                                        std::istreambuf_iterator<char>());
-            mutableReq = requestFromFile;
-          } else {
-            mutableReq = reqBody;
-          }
+          mutableReq = reqBody;
 
           if (m_hasMpi)
             cudaq::mpi::broadcast(mutableReq, 0);
           auto resultJs = processRequest(mutableReq);
-          // Check whether we have a limit in terms of response size.
-          if (headers.contains("NVCF-MAX-RESPONSE-SIZE-BYTES")) {
-            const std::size_t maxResponseSizeBytes = std::stoll(
-                headers.find("NVCF-MAX-RESPONSE-SIZE-BYTES")->second);
-            if (resultJs.dump().size() > maxResponseSizeBytes) {
-              // If the response size is larger than the limit, write it to the
-              // large output directory rather than sending it back as an HTTP
-              // response.
-              const auto outputDirIter = headers.find("NVCF-LARGE-OUTPUT-DIR");
-              const auto reqIdIter = headers.find("NVCF-REQID");
-              if (outputDirIter == headers.end() ||
-                  reqIdIter == headers.end()) {
-                json js;
-                js["status"] =
-                    "Failed to locate output file location for large response.";
-                return js;
-              }
-
-              const std::string outputDir = outputDirIter->second;
-              const std::string fileName = reqIdIter->second + "_result.json";
-              const std::filesystem::path outputFile =
-                  std::filesystem::path(outputDir) / fileName;
-              std::ofstream file(outputFile.string());
-              file << resultJs.dump();
-              file.flush();
-              json js;
-              js["resultFile"] = fileName;
-              return js;
-            }
-          }
 
           return resultJs;
         });
@@ -835,64 +778,6 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer {
   }
 };
 
-// Runtime server for NVCF
-class NvcfRuntimeServer : public RemoteRestRuntimeServer {
-public:
-  NvcfRuntimeServer() : RemoteRestRuntimeServer() { exitAfterJob = true; }
-
-protected:
-  virtual bool filterRequest(const cudaq::RestRequest &in_request,
-                             std::string &outValidationMessage) const override {
-    // We only support MLIR payload on the NVCF server.
-    if (in_request.format != cudaq::CodeFormat::MLIR) {
-      outValidationMessage =
-          "Unsupported input format: only CUDA-Q MLIR data is allowed.";
-      return false;
-    }
-
-    if (!in_request.passes.empty()) {
-      outValidationMessage =
-          "Unsupported passes: server-side compilation passes are not allowed.";
-      return false;
-    }
-
-    return true;
-  }
-
-protected:
-  virtual json processRequest(const std::string &reqBody,
-                              bool forceLog = false) override {
-    // When calling RemoteRestRuntimeServer::processRequest, set forceLog=true
-    // so that incoming requests are always logged, regardless of what log level
-    // we're running the server at.
-    auto executionResult =
-        RemoteRestRuntimeServer::processRequest(reqBody, /*forceLog=*/true);
-    // Amend execution information
-    executionResult["executionInfo"] = constructExecutionInfo();
-    return executionResult;
-  }
-
-private:
-  cudaq::NvcfExecutionInfo constructExecutionInfo() {
-    cudaq::NvcfExecutionInfo info;
-    const auto optionalTimePointToInt =
-        [](const auto &optionalTimePoint) -> std::size_t {
-      return optionalTimePoint.has_value()
-                 ? std::chrono::duration_cast<std::chrono::milliseconds>(
-                       optionalTimePoint.value().time_since_epoch())
-                       .count()
-                 : 0;
-    };
-    info.requestStart = optionalTimePointToInt(requestStart);
-    info.simulationStart = optionalTimePointToInt(simulationStart);
-    info.simulationEnd = optionalTimePointToInt(simulationEnd);
-    const auto deviceProps = cudaq::getCudaProperties();
-    if (deviceProps.has_value())
-      info.deviceProps = deviceProps.value();
-    return info;
-  }
-};
 } // namespace
 
 CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeServer, RemoteRestRuntimeServer, rest)
-CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeServer, NvcfRuntimeServer, nvcf)
diff --git a/runtime/cudaq/platform/mqpu/CMakeLists.txt b/runtime/cudaq/platform/mqpu/CMakeLists.txt
index 47ef0ab14b3..29da3c61e33 100644
--- a/runtime/cudaq/platform/mqpu/CMakeLists.txt
+++ b/runtime/cudaq/platform/mqpu/CMakeLists.txt
@@ -45,4 +45,3 @@ endif()
   
 install(TARGETS ${LIBRARY_NAME} DESTINATION lib)
 add_target_config(remote-mqpu)
-add_target_config(nvqc)
diff --git a/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp b/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp
index a1f28ec5f8c..e8cb0bb7620 100644
--- a/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp
+++ b/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp
@@ -154,53 +154,7 @@ class MultiQPUQuantumPlatform : public cudaq::quantum_platform {
             fmt::format("Unable to retrieve {} QPU implementation. Please "
                         "check your installation.",
                         qpuSubType));
-      if (qpuSubType == "NvcfSimulatorQPU") {
-        platformQPUs.clear();
-        threadToQpuId.clear();
-        platformCurrentQPU = 0;
-        auto simName = getOpt(description, "backend");
-        if (simName.empty())
-          simName = "custatevec-fp32";
-        std::string configStr =
-            fmt::format("target;nvqc;simulator;{}", simName);
-        auto getOptAndSetConfig = [&](const std::string &key) {
-          auto val = getOpt(description, key);
-          if (!val.empty())
-            configStr += fmt::format(";{};{}", key, val);
-        };
-        getOptAndSetConfig("api_key");
-        getOptAndSetConfig("function_id");
-        getOptAndSetConfig("version_id");
-
-        auto numQpusStr = getOpt(description, "nqpus");
-        int numQpus = numQpusStr.empty() ? 1 : std::stoi(numQpusStr);
-
-        if (simName.find("nvidia-mqpu") != std::string::npos && numQpus > 1) {
-          // If the backend simulator is an MQPU simulator (like nvidia-mqpu),
-          // then use "nqpus" to determine the number of GPUs to request for the
-          // backend. This allows us to seamlessly translate requests for MQPU
-          // requests to the NVQC platform.
-          configStr += fmt::format(";{};{}", "ngpus", numQpus);
-          // Now change numQpus to 1 for the downstream code, which will make a
-          // single NVQC QPU.
-          numQpus = 1;
-        } else {
-          getOptAndSetConfig("ngpus");
-        }
-
-        if (numQpus < 1)
-          throw std::invalid_argument("Number of QPUs must be greater than 0.");
-        for (int qpuId = 0; qpuId < numQpus; ++qpuId) {
-          // Populate the information and add the QPUs
-          auto qpu = cudaq::registry::get<cudaq::QPU>("NvcfSimulatorQPU");
-          qpu->setId(qpuId);
-          qpu->setTargetBackend(configStr);
-          threadToQpuId[std::hash<std::thread::id>{}(
-              qpu->getExecutionThreadId())] = qpuId;
-          platformQPUs.emplace_back(std::move(qpu));
-        }
-        platformNumQPUs = platformQPUs.size();
-      } else if (qpuSubType == "orca") {
+      if (qpuSubType == "orca") {
         auto urls = cudaq::split(getOpt(description, "url"), ',');
         platformQPUs.clear();
         threadToQpuId.clear();
diff --git a/runtime/cudaq/platform/mqpu/nvqc.yml b/runtime/cudaq/platform/mqpu/nvqc.yml
deleted file mode 100644
index b983f1679d7..00000000000
--- a/runtime/cudaq/platform/mqpu/nvqc.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-
-name: nvqc
-description:
-  "The NVQC Target provides access to simulated QPU services hosted on the NVIDIA Quantum Cloud."
-
-target-arguments:
-  - key: backend
-    required: false
-    type: string
-    platform-arg: backend 
-    help-string: "Specify the remote simulator backend."
-  - key: nqpus
-    required: false
-    type: integer
-    platform-arg: nqpus 
-    help-string: "Specify the number of virtual NVQC QPUs."
-  - key: ngpus
-    required: false
-    type: integer
-    platform-arg: ngpus 
-    help-string: "Specify the number of GPUs required."
-  - key: function-id
-    required: false
-    type: uuid
-    platform-arg: function_id 
-    help-string: "Specify the NVQC function Id."
-  - key: function-version-id
-    required: false
-    type: uuid
-    platform-arg: version_id 
-    help-string: "Specify the NVQC function version Id."
-  - key: api-key
-    required: false
-    type: string
-    platform-arg: api_key 
-    help-string: "Specify NVQC API key."
-
-config:
-  gen-target-backend: true
-  platform-library: mqpu
-  platform-qpu: NvcfSimulatorQPU
-  library-mode: false
-  preprocessor-defines: ["-D CUDAQ_REMOTE_SIM"]
-  link-libs: ["-lcudaq-remote-simulator-qpu"]
diff --git a/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp b/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp
index a15d318c8c4..b49c138e949 100644
--- a/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp
+++ b/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp
@@ -25,17 +25,6 @@ class RemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU {
   virtual ~RemoteSimulatorQPU() = default;
 };
 
-/// Implementation of QPU subtype that submits simulation request to NVCF.
-class NvcfSimulatorQPU : public cudaq::BaseNvcfSimulatorQPU {
-public:
-  NvcfSimulatorQPU() : BaseNvcfSimulatorQPU() {
-    m_mlirContext = cudaq::initializeMLIR();
-  }
-
-  NvcfSimulatorQPU(NvcfSimulatorQPU &&) = delete;
-  virtual ~NvcfSimulatorQPU() = default;
-};
 } // namespace
 
 CUDAQ_REGISTER_TYPE(cudaq::QPU, RemoteSimulatorQPU, RemoteSimulatorQPU)
-CUDAQ_REGISTER_TYPE(cudaq::QPU, NvcfSimulatorQPU, NvcfSimulatorQPU)
diff --git a/runtime/cudaq/platform/qpu.h b/runtime/cudaq/platform/qpu.h
index a40b745bb18..0db13ec46c6 100644
--- a/runtime/cudaq/platform/qpu.h
+++ b/runtime/cudaq/platform/qpu.h
@@ -22,7 +22,6 @@
 namespace cudaq {
 class gradient;
 class optimizer;
-class SerializedCodeExecutionContext;
 
 /// Expose the function that will return the current ExecutionManager
 ExecutionManager *getExecutionManager();
@@ -193,12 +192,6 @@ class QPU : public registry::RegisteredType<QPU> {
                                "simulated QPU. This is not supported.");
   }
 
-  /// Launch serialized code for remote execution. Subtypes that support this
-  /// should override this function.
-  virtual void launchSerializedCodeExecution(
-      const std::string &name,
-      cudaq::SerializedCodeExecutionContext &serializeCodeExecutionObject) {}
-
   /// @brief Notify the QPU that a new random seed value is set.
   /// By default do nothing, let subclasses override.
   virtual void onRandomSeedSet(std::size_t seed) {}
diff --git a/runtime/cudaq/platform/quantum_platform.cpp b/runtime/cudaq/platform/quantum_platform.cpp
index e0a2a2319ea..133482dbff5 100644
--- a/runtime/cudaq/platform/quantum_platform.cpp
+++ b/runtime/cudaq/platform/quantum_platform.cpp
@@ -223,22 +223,6 @@ void quantum_platform::launchKernel(const std::string &kernelName,
   qpu->launchKernel(kernelName, rawArgs);
 }
 
-void quantum_platform::launchSerializedCodeExecution(
-    const std::string &name,
-    SerializedCodeExecutionContext &serializeCodeExecutionObject) {
-  std::size_t qpu_id = 0;
-
-  auto tid = std::hash<std::thread::id>{}(std::this_thread::get_id());
-  {
-    std::shared_lock lock(threadToQpuIdMutex);
-    auto iter = threadToQpuId.find(tid);
-    if (iter != threadToQpuId.end())
-      qpu_id = iter->second;
-  }
-  auto &qpu = platformQPUs[qpu_id];
-  qpu->launchSerializedCodeExecution(name, serializeCodeExecutionObject);
-}
-
 void quantum_platform::onRandomSeedSet(std::size_t seed) {
   // Send on the notification to all QPUs.
   for (auto &qpu : platformQPUs)
diff --git a/runtime/cudaq/platform/quantum_platform.h b/runtime/cudaq/platform/quantum_platform.h
index 6e4fdb4cec9..30cc5f85da6 100644
--- a/runtime/cudaq/platform/quantum_platform.h
+++ b/runtime/cudaq/platform/quantum_platform.h
@@ -29,7 +29,6 @@ namespace cudaq {
 class QPU;
 class gradient;
 class optimizer;
-class SerializedCodeExecutionContext;
 struct RuntimeTarget;
 
 /// Typedefs for defining the connectivity structure of a QPU
@@ -164,12 +163,6 @@ class quantum_platform {
                std::uint64_t resultOffset, const std::vector<void *> &rawArgs);
   void launchKernel(const std::string &kernelName, const std::vector<void *> &);
 
-  // This method is the hook for executing SerializedCodeExecutionContext
-  // objects.
-  void launchSerializedCodeExecution(
-      const std::string &name,
-      SerializedCodeExecutionContext &serializeCodeExecutionObject);
-
   /// List all available platforms
   static std::vector<std::string> list_platforms();
 
diff --git a/runtime/cudaq/remote_capabilities.h b/runtime/cudaq/remote_capabilities.h
index ef96f7d6cfd..32795102333 100644
--- a/runtime/cudaq/remote_capabilities.h
+++ b/runtime/cudaq/remote_capabilities.h
@@ -15,9 +15,6 @@ namespace cudaq {
 struct RemoteCapabilities {
   /// True if the remote can perform state overlap operations.
   bool stateOverlap = false;
-  /// True if the remote can perform serialized code execution (raw Python
-  /// commands).
-  bool serializedCodeExec = false;
   /// True if the remote can perform an entire VQE operation without and
   /// back-and-forth client/server communications.
   bool vqe = false;
@@ -26,8 +23,7 @@ struct RemoteCapabilities {
   bool isRemoteSimulator = false;
   /// Constructor that broadcasts \p initValue to all fields.
   RemoteCapabilities(bool initValue)
-      : stateOverlap(initValue), serializedCodeExec(initValue), vqe(initValue),
-        isRemoteSimulator(initValue) {}
+      : stateOverlap(initValue), vqe(initValue), isRemoteSimulator(initValue) {}
 };
 
 } // namespace cudaq
diff --git a/scripts/install_prerequisites.sh b/scripts/install_prerequisites.sh
index 957ec64be70..2db42c8a1dd 100644
--- a/scripts/install_prerequisites.sh
+++ b/scripts/install_prerequisites.sh
@@ -35,7 +35,6 @@
 toolchain=''
 exclude_prereq=''
 install_all=true
-lock_file=""
 this_file_dir=`dirname "$(readlink -f "${BASH_SOURCE[0]}")"`
 __optind__=$OPTIND
 OPTIND=1
@@ -47,19 +46,6 @@ while getopts ":e:t:ml:-:" opt; do
     ;;
     m) install_all=false
     ;;
-    l) lock_file="$OPTARG"
-    ;;
-    -) case $OPTARG in
-          lock-file)
-            lock_file="${!OPTIND}"
-            OPTIND=$((OPTIND + 1))
-            ;;
-          *)
-            echo "Invalid long option --$OPTARG" >&2
-            (return 0 2>/dev/null) && return 1 || exit 1
-            ;;
-       esac
-       ;;
     :) echo "Option -$OPTARG requires an argument."
     (return 0 2>/dev/null) && return 1 || exit 1
     ;;
@@ -70,63 +56,6 @@ while getopts ":e:t:ml:-:" opt; do
 done
 OPTIND=$__optind__
 
-lookup_tpls_sha() {
-  local path="$1"
-
-  # Using lock file
-  if [[ -f $lock_file ]]; then
-    awk -v p="$path" '$2==p{print $1}' "$lock_file" && return 0
-  fi
-}
-
-# Clone the third-party libraries to include its source code in the NVQC docker image.
-if [ -n "$lock_file" ]; then
-  if [ ! -f "$lock_file" ]; then
-    echo "Lock file $lock_file not found."
-    (return 0 2>/dev/null) && return 1 || exit 1
-  fi
-
-  echo "Using lock file: $lock_file"
-
-  tpls_root="${CUDAQ_INSTALL_PREFIX:-/opt/cuda}"
-  tpls_dir="$tpls_root/tpls"
-  mkdir -p "$tpls_dir"
-  this_file_dir=`dirname "$(readlink -f "${BASH_SOURCE[0]}")"`
-
-  echo "Cloning additional third-party libraries into $tpls_dir..."
-  mkdir -p "$tpls_dir"
-  # make sure we are at the repo root
-  cd "$this_file_dir"
-
-  # for each submodule.<name>.url in .gitmodules
-  git config --file .gitmodules --get-regexp 'submodule\..*\.url' | \
-  while read -r key url; do
-    # key = "submodule.tpls/foo.url"
-    sub=${key#submodule.}         # -> "tpls/foo.url"
-    sub=${sub%.url}               # -> "tpls/foo"
-    path=$(git config --file .gitmodules --get "submodule.$sub.path")
-    lib=$(basename "$path")       # -> "foo"
-    dest="$tpls_dir/$lib"
-
-    echo "Processing submodule $lib at path $path ..."
-    repo="$(git config --file=.gitmodules submodule.$path.url)"
-    echo "Repository URL: $repo"
-
-    commit="$(lookup_tpls_sha "$path")" || {
-      echo "ERROR: could not resolve pinned commit for $path. Aborting $lib." >&2
-      exit 1
-    }
-    echo "Using commit $commit for $lib."
-
-    echo "Cloning $lib@$commit from $repo into $dest ..."
-    git clone --no-checkout --filter=tree:0 "$repo" "$dest" \
-    && git -C "$dest" fetch --depth 1 origin "$commit" \
-    && git -C "$dest" checkout --detach FETCH_HEAD \
-    || { echo "Failed to clone $lib"; continue; }
-  done
-  (return 0 2>/dev/null) && return 0 || exit 0
-fi
-
 if $install_all; then
   LLVM_INSTALL_PREFIX=${LLVM_INSTALL_PREFIX:-/opt/llvm}
   PYBIND11_INSTALL_PREFIX=${PYBIND11_INSTALL_PREFIX:-/usr/local/pybind11}
diff --git a/scripts/nvqc_launch.sh b/scripts/nvqc_launch.sh
deleted file mode 100644
index 81b5ea45691..00000000000
--- a/scripts/nvqc_launch.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-
-cat /opt/nvidia/cudaq/build_info.txt
-
-# Launch script: launch cudaq-qpud (nvcf mode) with MPI ranks == Number of NVIDIA GPUs
-# IMPORTANT:
-# (1) NVCF function must set container environment variable `NUM_GPUS`
-# equal to the number of GPUs on the target platform. This will allow clients to query
-# the function capability (number of GPUs) by looking at function info. The below
-# entry point script helps prevent mis-configuration by checking that functions are
-# created and deployed appropriately.
-# (2) NVCF function must set container environment variable `NVQC_REST_PAYLOAD_VERSION` equal
-# to the RestRequest payload version with which `cudaq-qpud` in the deployment Docker image was compiled.
-# Failure to do so will result in early exits of the entry point command, thus deployment failure.
-EXPECTED_REST_PAYLOAD_VERSION="$(cudaq-qpud --type nvcf --schema-version | grep -o "CUDA-Q REST API version: \S*" | cut -d ":" -f 2 | tr -d " ")"
-if [[ "$NVQC_REST_PAYLOAD_VERSION" !=  "$EXPECTED_REST_PAYLOAD_VERSION" ]]; then
-  echo "Invalid Deployment: NVQC_REST_PAYLOAD_VERSION environment variable ($NVQC_REST_PAYLOAD_VERSION) does not match cudaq-qpud (expected $EXPECTED_REST_PAYLOAD_VERSION)."
-  exit 1
-fi
-
-python3 /nvqc_scripts/nvqc_proxy.py &
-
-NUM_ACTUAL_GPUS=$(nvidia-smi --list-gpus | wc -l)
-if [[ "$NUM_GPUS" == "$NUM_ACTUAL_GPUS" ]]; then
-  cd /tmp
-  CMDSTR="mpiexec -np $NUM_ACTUAL_GPUS cudaq-qpud --type nvcf --port 3031"
-  while true; do
-    echo "export PATH=${PATH}; $CMDSTR" | sudo su -s /bin/bash nobody
-  done
-else
-  echo "Invalid Deployment: Number of GPUs does not match the hardware"
-  exit 1
-fi
diff --git a/scripts/release.sh b/scripts/release.sh
index 9980f0f808a..429dc68fadc 100644
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -51,7 +51,7 @@ done
 # Check that all three workflows above completed successfully. 
 # Once the publishing completes, you should see a draft release on GitHub for the new version.
 
-# Check that all nightly integration tests and NVQC regression tests are enabled and run successfully with the release image. 
+# Check that all nightly integration tests are enabled and run successfully with the release image. 
 # Work with QA to get the release candidate fully validated.
 
 # Go to the draft release on GitHub and download the python wheels and metapackages in the draft release.
diff --git a/scripts/validate_container.sh b/scripts/validate_container.sh
index 301cbc62454..53149eaadbf 100644
--- a/scripts/validate_container.sh
+++ b/scripts/validate_container.sh
@@ -81,7 +81,7 @@ available_backends=`\
         qpu=${platform##* }
         requirements=$(cat $file | grep "gpu-requirements:")
         gpus=${requirements##* }
-        if [ "${qpu}" != "remote_rest" ] && [ "${qpu}" != "NvcfSimulatorQPU" ] \
+        if [ "${qpu}" != "remote_rest" ] \
         && [ "${qpu}" != "fermioniq" ] && [ "${qpu}" != "orca" ] \
         && [ "${qpu}" != "pasqal" ] && [ "${qpu}" != "quera" ] \
         && ($gpu_available || [ -z "$gpus" ] || [ "${gpus,,}" == "false" ]); then \
@@ -217,11 +217,6 @@ do
                 echo "Skipping $t target due to incomplete MPI installation.";
                 echo ":white_flag: $filename: Incomplete MPI installation. Test skipped." >> "${tmpFile}_$(echo $t | tr - _)"
                 continue
-
-            else
-                # TODO: remove this once the nvqc backend is part of the validation
-                # tracked in https://github.com/NVIDIA/cuda-quantum/issues/1283
-                target_flag+=" --enable-mlir"
             fi
         fi
 
@@ -409,8 +404,8 @@ fi
 # Python snippet validation 
 if [ -d "snippets/" ];
 then
-    # Skip NVQC and multi-GPU snippets.
-    for ex in `find snippets/ -name '*.py' -not -path '*/nvqc/*' -not -path '*/multi_gpu_workflows/*' | sort`;
+    # Skip multi-GPU snippets.
+    for ex in `find snippets/ -name '*.py' -not -path '*/multi_gpu_workflows/*' | sort`;
     do 
         filename=$(basename -- "$ex")
         filename="${filename%.*}"
diff --git a/scripts/validate_pycudaq.sh b/scripts/validate_pycudaq.sh
index d13bac2068f..145085de35e 100644
--- a/scripts/validate_pycudaq.sh
+++ b/scripts/validate_pycudaq.sh
@@ -32,7 +32,6 @@
 
 # Note: To run the target tests, make sure to set all necessary API keys:
 # COPY docs/sphinx/targets/python /tmp/targets/
-# ENV NVQC_API_KEY=...
 # ENV ...
 
 __optind__=$OPTIND
@@ -177,7 +176,7 @@ fi
 # Run snippets in docs
 # Some snippets generate plots
 python3 -m pip install --user matplotlib
-for ex in `find "$root_folder/snippets" -name '*.py' -not -path '*/nvqc/*'`; do
+for ex in `find "$root_folder/snippets" -name '*.py'`; do
     echo "Executing $ex"
     python3 "$ex"
     if [ ! $? -eq 0 ]; then
@@ -185,16 +184,6 @@ for ex in `find "$root_folder/snippets" -name '*.py' -not -path '*/nvqc/*'`; do
         status_sum=$((status_sum+1))
     fi
 done
-if [ -n "${NVQC_API_KEY}" ]; then
-    for ex in `find "$root_folder/snippets" -name '*.py' -path '*/nvqc/*'`; do
-        echo "Executing $ex"
-        python3 "$ex"
-        if [ ! $? -eq 0 ]; then
-            echo -e "\e[01;31mFailed to execute $ex.\e[0m" >&2
-            status_sum=$((status_sum+1))
-        fi
-    done
-fi
 
 # Run examples
 # Some examples generate plots
@@ -208,9 +197,6 @@ for ex in `find "$root_folder/examples" -name '*.py'`; do
             # to submit a (paid) job to Amazon Braket (includes QuEra).
             echo -e "\e[01;31mWarning: Explicitly set target braket or quera in $ex; skipping validation due to paid submission.\e[0m" >&2
             skip_example=true
-        elif [ "$t" == "nvqc" ] && [ -z "${NVQC_API_KEY}" ]; then 
-            echo -e "\e[01;31mWarning: Explicitly set target nvqc in $ex; skipping validation due to missing API key.\e[0m" >&2
-            skip_example=true
         elif [ "$t" == "pasqal" ] && [ -z "${PASQAL_PASSWORD}" ]; then
             echo -e "\e[01;31mWarning: Explicitly set target pasqal in $ex; skipping validation due to missing token.\e[0m" >&2
             skip_example=true
@@ -246,9 +232,6 @@ if [ -d "$root_folder/targets" ]; then
             elif [ "$t" == "oqc" ] && [ -z "${OQC_URL}" ]; then 
                 echo -e "\e[01;31mWarning: Explicitly set target oqc in $ex; skipping validation due to missing URL.\e[0m" >&2
                 skip_example=true
-            elif [ "$t" == "nvqc" ] && [ -z "${NVQC_API_KEY}" ]; then 
-                echo -e "\e[01;31mWarning: Explicitly set target nvqc in $ex; skipping validation due to missing API key.\e[0m" >&2
-                skip_example=true
             elif [ "$t" == "pasqal" ] && [ -z "${PASQAL_PASSWORD}" ]; then
                 echo -e "\e[01;31mWarning: Explicitly set target pasqal in $ex; skipping validation due to missing token.\e[0m" >&2
                 skip_example=true
diff --git a/targettests/Remote-Sim/args_synthesis.cpp b/targettests/Remote-Sim/args_synthesis.cpp
index bf09524ce2d..f7e3b7134d0 100644
--- a/targettests/Remote-Sim/args_synthesis.cpp
+++ b/targettests/Remote-Sim/args_synthesis.cpp
@@ -13,8 +13,7 @@
 // clang-format on
 
 // This is a comprehensive set of tests for kernel argument synthesis for remote
-// platforms. Note: we use the remote-mqpu platform in MLIR mode as a mock
-// environment for NVQC.
+// platforms.
 #include <cudaq.h>
 #include <iostream>
 
diff --git a/tools/cudaq-qpud/CMakeLists.txt b/tools/cudaq-qpud/CMakeLists.txt
index 7fc3769ad58..4c74b7a1cea 100644
--- a/tools/cudaq-qpud/CMakeLists.txt
+++ b/tools/cudaq-qpud/CMakeLists.txt
@@ -33,5 +33,3 @@ export_executable_symbols_for_plugins(${TOOL_NAME})
 
 install(TARGETS ${TOOL_NAME} DESTINATION bin)
 install(FILES ${TOOL_NAME}.py DESTINATION bin)
-install(FILES nvqc_proxy.py DESTINATION bin)
-install(FILES json_request_runner.py DESTINATION bin)
diff --git a/tools/cudaq-qpud/README.md b/tools/cudaq-qpud/README.md
index 9e43f25d738..e399cd1d8f4 100644
--- a/tools/cudaq-qpud/README.md
+++ b/tools/cudaq-qpud/README.md
@@ -1,19 +1,18 @@
 <!-- markdownlint-disable MD013 -->
-# `remote-mqpu` / `nvqc` Debugging Tips
+# `remote-mqpu` Debugging Tips
 
 This file contains tips and tricks for when you are performing manual testing/
-debugging for `remote-mqpu` or `nvqc` targets. This file is primarily intended
+debugging for `remote-mqpu` targets. This file is primarily intended
 for **CUDA-Q developers, not end users**. See the user-facing docs here:
 
 - [`remote-mqpu`](https://nvidia.github.io/cuda-quantum/latest/using/backends/platform.html#remote-mqpu-platform)
-- [`nvqc`](https://nvidia.github.io/cuda-quantum/latest/using/backends/nvqc.html)
 
 ## Fully local within `cuda-quantum-dev` container
 
 The first step is usually to run the server in a separate window from the
 client by disabling any sort of auto-launch capabilities.
 
-1. In one window, launch `cudaq-qpud --port 3030 --type nvcf`. You may also
+1. In one window, launch `cudaq-qpud --port 3030`. You may also
    prefix this command with `CUDAQ_LOG_LEVEL=info` to turn on additional
    logging in the server.
 2.
@@ -21,82 +20,3 @@ client by disabling any sort of auto-launch capabilities.
    something like this: `cudaq.set_target('remote-mqpu', url='localhost:3030')`.
    - If you are using C++, change your `nvq++` command to something like this:
    `nvq++ --target remote-mqpu --remote-mqpu-url localhost:3030`.
-
-Note: when you run the server with `--type nvcf`, that means that the
-`cudaq-qpud` process will shut down after every invocation, so you will have to
-manually restart it if you want to invoke it again.
-
-## Fully local testing using a true NVQC image
-
-If you want to run the server in a fully "contained" environment like it is run
-for NVQC, then you can perform the following steps.
-
-_Note: the following steps use a Docker image tag that is primarily intended
-for CUDA-Q developers, not end users. End users can still use these
-instructions to do any testing they would like, but it is recommended that they
-choose a different Docker tag name._
-
-1. Build your NVQC server Docker container using this command: `docker build -t nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom -f docker/release/cudaq.nvqc.Dockerfile .`
-2. Launch the server on your local machine: `docker run -it --rm --gpus all --network=host -e NVQC_REST_PAYLOAD_VERSION=1.1 -e NUM_GPUS=1 -e WATCHDOG_TIMEOUT_SEC=3600 -e RUN_AS_NOBODY=1 nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom`
-   - Note: You need to set the environment variables as intended for your
-    environment. If you are running on a multi-GPU machine, you may
-    want to set `NUM_GPUS=4` (updating `4` to the correct number for your
-    machine) and use something like `--gpus '"device=0,1,2,4"'` (in case your
-    machine has more GPUs than you want activated with `NUM_GPUS`.)
-3. Get the IP address of your host by running `ifconfig`. For the following
-   steps, let's assume your IP address is `172.31.123.45`.
-4. Similar to step 2 in the section above, but changing `localhost` to the
-   correct IP address obtained in step 3:
-   - If you are using Python, change your `cudaq.set_target` line to be
-   something like this: `cudaq.set_target('remote-mqpu', url='172.31.123.45:3030')`.
-   - If you are using C++, change your `nvq++` command to something like this:
-   `nvq++ --target remote-mqpu --remote-mqpu-url 172.31.123.45:3030`.
-
-## Running your own image on the NVQC server
-
-_Note: the following steps use a Docker image tag that is primarily intended
-for CUDA-Q developers, not end users. End users can still use these
-instructions to do any testing they would like, but it is recommended that they
-choose a different Docker tag name._
-
-1. After building your `nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom`
-   image, you can `docker push` it (assuming you have authorized credentials).
-2. Either use the `ngc.nvidia.com` Web GUI to deploy your function, or use the
-   `ngc-cli`. Both are documented [here](https://docs.nvidia.com/cloud-functions/user-guide/latest/cloud-function/function-deployment.html#deploying-a-function).
-3. When done, un-deploy your function and remove your custom image using a command like this: `ngc registry image remove nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom`.
-
-## Special notes about running Python code on the server (`CUDAQ_SER_CODE_EXEC` / `serializedCodeExecutionContext`)
-
-Remote Python execution does not actually use `cudaq-qpud` to execute Python
-code. That is actually achieved by `tools/cudaq-qpud/nvqc_proxy.py` and
-`tools/cudaq-qpud/json_request_runner.py`. `nvqc_proxy.py` is a proxy that sits
-at the front end of the NVQC server. It runs on port `3030` and redirects all
-`cudaq-qpud`-bound traffic to port `3031`. However, if the JSON request contains
-a `serializedCodeExecutionContext` field, then the request is sent to
-`json_request_runner.py`, not `cudaq-qpud`.
-
-The following diagram shows the high-level sequence for these operations.
-
-```mermaid
-sequenceDiagram
-User Program ->> CUDA-Q : Job request
-CUDA-Q ->> CUDA-Q : Serialize data
-CUDA-Q ->> nvqc_proxy : Submit program and data
-alt If request contains 'serializedCodeExecutionContext'
-  nvqc_proxy ->> nvqc_proxy : Write Python code to temp file
-  nvqc_proxy ->> json_request_runner : subprocess.run
-  json_request_runner ->> json_request_runner : Deserialize data
-  json_request_runner ->> json_request_runner : Execute program
-  json_request_runner ->> json_request_runner : Write results to temp file
-  json_request_runner ->> json_request_runner : Shutdown
-  nvqc_proxy ->> nvqc_proxy : Read results from temp file
-else 'serializedCodeExecutionContext' does not exist
-  nvqc_proxy ->> cudaq-qpud : Request
-  cudaq-qpud ->> cudaq-qpud : Process
-  cudaq-qpud ->> nvqc_proxy : Results
-  cudaq-qpud ->> cudaq-qpud : Shutdown and restart
-end
-nvqc_proxy ->> CUDA-Q : Results
-CUDA-Q ->> CUDA-Q : Deserialize data
-CUDA-Q ->> User Program : Results
-```
diff --git a/tools/cudaq-qpud/RestServerMain.cpp b/tools/cudaq-qpud/RestServerMain.cpp
index 9065e6bc871..1e287ca4bed 100644
--- a/tools/cudaq-qpud/RestServerMain.cpp
+++ b/tools/cudaq-qpud/RestServerMain.cpp
@@ -81,9 +81,6 @@ int main(int argc, char **argv) {
       cudaq::registry::get<cudaq::RemoteRuntimeServer>(serverSubType);
 
   if (printRestPayloadVersion) {
-    // IMPORTANT: Don't change this message without updating
-    // `scripts/nvqc_launch.sh`, which relies on the this information to perform
-    // deployment sanity check.
     printf("\nCUDA-Q REST API version: %d.%d\n", restServer->version().first,
            restServer->version().second);
     return 0;
diff --git a/tools/cudaq-qpud/json_request_runner.py b/tools/cudaq-qpud/json_request_runner.py
deleted file mode 100644
index fa0db9d1f9b..00000000000
--- a/tools/cudaq-qpud/json_request_runner.py
+++ /dev/null
@@ -1,173 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-
-# Set `MPLCONFIGDIR` if running as nobody in order to prevent a warning message
-# that is telling the truth about extended loading times.
-import os
-if 'nonexistent' in os.environ['HOME']:
-    os.environ['MPLCONFIGDIR'] = os.getcwd()
-
-import cudaq
-import sys
-import json
-import subprocess
-import importlib
-from datetime import datetime
-import re
-
-# Pattern to detect ANSI escape color code in the error message
-ANSI_PATTERN = re.compile(r'\x1B[@-_][0-?]*[ -/]*[@-~]')
-
-
-def get_deserialized_dict(scoped_dict):
-    deserialized_dict = {}
-
-    # If the scoped_dict is one big JSON string, then load it into a
-    # dictionary-like object.
-    if isinstance(scoped_dict, str):
-        scoped_dict = json.loads(scoped_dict)
-
-    # Do two passes. Save the unpacking of cudaq.kernels for the second pass so
-    # that they can see and utilize global variables unpacked in the first pass.
-    for p in range(2):
-        isFirstPass = (p == 0)
-        for key, val in scoped_dict.items():
-            isKernel = "/" in key and ".PyKernelDecorator" in key
-            try:
-                if "/" in key and ((isFirstPass and not isKernel) or
-                                   (not isFirstPass is isKernel)):
-                    key, val_type = key.split('/')
-                    if val_type.startswith('cudaq.'):
-                        module_name, type_name = val_type.rsplit('.', 1)
-                        module = importlib.import_module(module_name)
-                        type_class = getattr(module, type_name)
-                        if isFirstPass:
-                            result = type_class.from_json(json.dumps(val))
-                        else:
-                            result = type_class.from_json(
-                                json.dumps(val), deserialized_dict)
-                        deserialized_dict[key] = result
-                    else:
-                        raise Exception(f'Invalid val_type in key: {val_type}')
-                elif isFirstPass:
-                    deserialized_dict[key] = val
-            except Exception as e:
-                raise Exception(f"Error deserializing key '{key}': {e}")
-
-    return deserialized_dict
-
-
-if __name__ == "__main__":
-    try:
-        requestStart = int(datetime.now().timestamp() * 1000)
-
-        # Expected command-line arguments:
-        # `sys.argv[0] = json_request_runner.py`
-        # `sys.argv[1] = <json file>`
-        # `sys.argv[2] = --use-mpi=<0|1>`
-        if '--use-mpi=1' in sys.argv:
-            cudaq.mpi.initialize()
-
-        # Read request
-        if len(sys.argv) < 3:
-            raise (Exception('Too few command-line arguments'))
-        jsonFile = sys.argv[1]
-        with open(jsonFile, 'rb') as fp:
-            request = json.load(fp)
-
-        serialized_ctx = request['serializedCodeExecutionContext']
-        source_code = serialized_ctx['source_code']
-
-        # Limit imports for the user code to a small subset of possible imports.
-        imports_code = '\n'.join([
-            'import cudaq', 'from cudaq import spin', 'import math',
-            'import numpy', 'import numpy as np',
-            'from typing import List, Tuple'
-        ])
-
-        # Be sure to do this before running any code from `serialized_ctx`
-        globals_dict = get_deserialized_dict(serialized_ctx['scoped_var_dict'])
-
-        # Determine which target to set
-        sim2target = {
-            'qpp': 'qpp-cpu',
-            'custatevec_fp32': 'nvidia',
-            'custatevec_fp64': 'nvidia-fp64',
-            'tensornet': 'tensornet',
-            'tensornet_mps': 'tensornet-mps',
-            'dm': 'density-matrix-cpu',
-            'nvidia_mgpu': 'nvidia-mgpu',
-            'nvidia_mqpu': 'nvidia-mqpu',
-            'nvidia_mqpu-fp64': 'nvidia-mqpu-fp64'
-        }
-        simulator_name = request['simulator']
-        simulator_name = simulator_name.replace('-', '_')
-        target_name = sim2target[simulator_name]
-
-        # Validate the full source code
-        full_source = f'{imports_code}\n{source_code}'
-        # TODO: validate
-
-        # Execute imports
-        exec(imports_code, globals_dict)
-
-        # Perform setup
-        exec(f'cudaq.set_target("{target_name}")', globals_dict)
-        seed_num = int(request['seed'])
-        if seed_num > 0:
-            exec(f'cudaq.set_random_seed({seed_num})', globals_dict)
-
-        # Initialize output dictionary
-        result = {
-            "status": "success",
-            "executionContext": {
-                "shots": 0,
-                "hasConditionalsOnMeasureResults": False
-            }
-        }
-        globals_dict['_json_request_result'] = result
-
-        # Execute main source_code
-        simulationStart = int(datetime.now().timestamp() * 1000)
-        if target_name == 'nvidia-mgpu' or (
-                not cudaq.mpi.is_initialized()) or cudaq.mpi.rank() == 0:
-            exec(source_code, globals_dict)
-        simulationEnd = int(datetime.now().timestamp() * 1000)
-
-        # Collect results
-        result = globals_dict['_json_request_result']
-        try:
-            cmd_result = subprocess.run(['cudaq-qpud', '--cuda-properties'],
-                                        capture_output=True,
-                                        text=True)
-            deviceProps = json.loads(cmd_result.stdout)
-        except:
-            deviceProps = dict()
-
-        executionInfo = {
-            'requestStart': requestStart,
-            'simulationStart': simulationStart,
-            'simulationEnd': simulationEnd,
-            'deviceProps': deviceProps
-        }
-        result['executionInfo'] = executionInfo
-    except Exception as e:
-        error_message = ANSI_PATTERN.sub('', str(e))
-        result = {
-            'status': 'Failed to process incoming request',
-            'errorMessage': error_message
-        }
-    finally:
-        # Only rank 0 prints the result
-        if not (cudaq.mpi.is_initialized()) or (cudaq.mpi.rank() == 0):
-            with open(jsonFile, 'w') as fp:
-                json.dump(result, fp)
-                fp.flush()
-
-        if cudaq.mpi.is_initialized():
-            cudaq.mpi.finalize()
diff --git a/tools/cudaq-qpud/nvqc_proxy.py b/tools/cudaq-qpud/nvqc_proxy.py
deleted file mode 100644
index ea9933e537a..00000000000
--- a/tools/cudaq-qpud/nvqc_proxy.py
+++ /dev/null
@@ -1,283 +0,0 @@
-# ============================================================================ #
-# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                   #
-# All rights reserved.                                                         #
-#                                                                              #
-# This source code and the accompanying materials are made available under     #
-# the terms of the Apache License 2.0 which accompanies this distribution.     #
-# ============================================================================ #
-
-from http import HTTPStatus
-import http.server
-import json
-import requests
-import socketserver
-import sys
-import time
-import json
-import subprocess
-import os
-import tempfile
-import shutil
-import pathlib
-
-# This reverse proxy application is needed to span the small gaps when
-# `cudaq-qpud` is shutting down and starting up again. This small reverse proxy
-# allows the NVCF port (3030) to remain up while allowing the main `cudaq-qpud`
-# application to restart if necessary.
-PROXY_PORT = 3030
-QPUD_PORT = 3031  # see `scripts/nvqc_launch.sh`
-
-NUM_GPUS = 0
-MPI_FOUND = False
-WATCHDOG_TIMEOUT_SEC = 0
-RUN_AS_NOBODY = False  # Expect this to be overridden to true for NVQC deployment
-SUDO_FOUND = False
-CUDAQ_SER_CODE_EXEC = False
-
-
-def build_command_list(temp_file_name: str) -> list[str]:
-    """
-    Build the command essentially from right to left, pre-pending wrapper
-    commands as necessary for this invocation.
-    """
-    current_script_path = os.path.abspath(__file__)
-    json_req_path = os.path.join(os.path.dirname(current_script_path),
-                                 'json_request_runner.py')
-    cmd_list = [sys.executable, json_req_path, temp_file_name]
-    if NUM_GPUS > 1 and MPI_FOUND:
-        cmd_list = ['mpiexec', '--allow-run-as-root', '-np',
-                    str(NUM_GPUS)] + cmd_list
-        cmd_list += ['--use-mpi=1']  # `--use-mpi` must come at the end
-    else:
-        cmd_list += ['--use-mpi=0']  # `--use-mpi` must come at the end
-    # The timeout must be inside the `su`/`sudo` commands in order to function.
-    if WATCHDOG_TIMEOUT_SEC > 0:
-        cmd_list = ['timeout', str(WATCHDOG_TIMEOUT_SEC)] + cmd_list
-    if RUN_AS_NOBODY:
-        cmd_list = ['su', '-s', '/bin/bash', 'nobody', '-c', ' '.join(cmd_list)]
-        if SUDO_FOUND:
-            cmd_list = ['sudo'] + cmd_list
-
-    return cmd_list
-
-
-class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
-    """Handle requests in a separate thread."""
-
-
-class Server(http.server.SimpleHTTPRequestHandler):
-    protocol_version = 'HTTP/1.1'
-    default_request_version = 'HTTP/1.1'
-
-    # Override this function because we seem to be getting a lot of
-    # ConnectionResetError exceptions in the health monitoring endpoint,
-    # producing lots of ugly stack traces in the logs. Hopefully this will
-    # reduce them.
-    def handle_one_request(self):
-        try:
-            super().handle_one_request()
-        except ConnectionResetError as e:
-            if self.path != '/':
-                print(f"Connection was reset by peer: {e}")
-        except Exception as e:
-            print(f"Unhandled exception: {e}")
-
-    def log_message(self, format, *args):
-        # Don't log the health endpoint queries
-        if len(args) > 0 and args[0] != "GET / HTTP/1.1":
-            super().log_message(format, *args)
-
-    def do_GET(self):
-        # Allow the proxy to automatically handle the health endpoint. The proxy
-        # will exit if the application's /job endpoint is down.
-        if self.path == '/':
-            self.send_response(HTTPStatus.OK)
-            self.send_header('Content-Type', 'application/json')
-            message = json.dumps({"status": "OK"}).encode('utf-8')
-            self.send_header("Content-Length", str(len(message)))
-            self.end_headers()
-            self.wfile.write(message)
-        else:
-            self.send_response(HTTPStatus.NOT_FOUND)
-            self.send_header("Content-Length", "0")
-            self.end_headers()
-
-    def is_serialized_code_execution_request(self, request_json):
-        return 'serializedCodeExecutionContext' in request_json and 'source_code' in request_json[
-            'serializedCodeExecutionContext'] and request_json[
-                'serializedCodeExecutionContext']['source_code'] != ''
-
-    def write_asset_if_necessary(self, message):
-        """
-        If the output message is too large, and if the proxy is servicing NVCF
-        requests, then write the original message to a file and modify the
-        outgoing message to reference that new file.
-        """
-        if 'NVCF-MAX-RESPONSE-SIZE-BYTES' in self.headers:
-            max_response_len = int(self.headers['NVCF-MAX-RESPONSE-SIZE-BYTES'])
-            if len(message) > max_response_len:
-                try:
-                    outputDir = self.headers['NVCF-LARGE-OUTPUT-DIR']
-                    reqId = self.headers['NVCF-REQID']
-                    resultFile = f'{outputDir}/{reqId}_result.json'
-                    with open(resultFile, 'wb') as fp:
-                        fp.write(message)
-                        fp.flush()
-                    result = {'resultFile': resultFile}
-                    message = json.dumps(result).encode('utf-8')
-                except Exception as e:
-                    result = {
-                        'status': 'Exception during output processing',
-                        'errorMessage': str(e)
-                    }
-                    message = json.dumps(result).encode('utf-8')
-        return message
-
-    def read_asset_if_necessary(self, request_data):
-        """
-        If there is an asset ID in the headers, replace the incoming message
-        with the contents of a file read from disk.
-        """
-        asset_id = self.headers.get('NVCF-FUNCTION-ASSET-IDS', '')
-        if len(asset_id) > 0:
-            try:
-                asset_dir = self.headers['NVCF-ASSET-DIR']
-                filename = f'{asset_dir}/{asset_id}'
-                with open(filename, 'rb') as f:
-                    request_data = f.read()
-            except Exception:
-                # If something failed, simply forward the original message
-                pass
-        return request_data
-
-    def do_POST(self):
-        if self.path == '/job':
-            qpud_up = False
-            retries = 0
-            qpud_url = 'http://localhost:' + str(QPUD_PORT)
-            while (not qpud_up):
-                try:
-                    ping_response = requests.get(qpud_url)
-                    qpud_up = (ping_response.status_code == HTTPStatus.OK)
-                except:
-                    qpud_up = False
-                if not qpud_up:
-                    retries += 1
-                    if retries > 100:
-                        print("PROXY EXIT: TOO MANY RETRIES!")
-                        sys.exit()
-                    print(
-                        "Main application is down, retrying (retry_count = {})..."
-                        .format(retries))
-                    time.sleep(0.1)
-
-            content_length = int(self.headers['Content-Length'])
-            if content_length:
-                # Look for any asset references in the job request. If one
-                # exists, then that means the request is actually in a file.
-                request_data = self.rfile.read(content_length)
-                request_data = self.read_asset_if_necessary(request_data)
-                request_json = json.loads(request_data)
-
-                if self.is_serialized_code_execution_request(request_json):
-                    if CUDAQ_SER_CODE_EXEC:
-                        result = {'status': 'uninitialized', 'errorMessage': ''}
-                        with tempfile.NamedTemporaryFile(
-                                dir=temp_dir, delete=False) as temp_file:
-                            temp_file.write(request_data)
-                            temp_file.flush()
-
-                        # Make it world writable so that the `subprocess` can write
-                        # the results to the file.
-                        os.chmod(temp_file.name, 0o666)
-
-                        # We also must get to a directory where "nobody" can see (in
-                        # order to make MPI happy)
-                        save_dir = os.getcwd()
-                        os.chdir(pathlib.Path(temp_file.name).parent)
-                        cmd_list = build_command_list(temp_file.name)
-                        cmd_result = subprocess.run(cmd_list,
-                                                    capture_output=False,
-                                                    text=True)
-
-                        with open(temp_file.name, 'rb') as fp:
-                            result = json.load(fp)
-
-                        if cmd_result.returncode == 124:
-                            result = {
-                                'status':
-                                    'json_request_runner.py time out',
-                                'errorMessage':
-                                    'Timeout occurred during execution'
-                            }
-
-                        # Cleanup
-                        os.chdir(save_dir)
-                        if RUN_AS_NOBODY:
-                            if SUDO_FOUND:
-                                os.system('sudo pkill -9 -u nobody')
-                            else:
-                                os.system('pkill -9 -u nobody')
-                        os.remove(temp_file.name)
-                    else:
-                        result = {
-                            'status':
-                                'Invalid Request',
-                            'errorMessage':
-                                'Server does not support serializedCodeExecutionContext at this time'
-                        }
-
-                    self.send_response(HTTPStatus.OK)
-                    self.send_header('Content-Type', 'application/json')
-                    message = json.dumps(result).encode('utf-8')
-                    message = self.write_asset_if_necessary(message)
-                    self.send_header('Content-Length', str(len(message)))
-                    self.end_headers()
-                    self.wfile.write(message)
-                else:
-                    res = requests.request(method=self.command,
-                                           url=qpud_url + self.path,
-                                           headers=self.headers,
-                                           data=request_data)
-                    self.send_response(HTTPStatus.OK)
-                    self.send_header('Content-Type', 'application/json')
-                    message = json.dumps(res.json()).encode('utf-8')
-                    self.send_header("Content-Length", str(len(message)))
-                    self.end_headers()
-                    self.wfile.write(message)
-            else:
-                self.send_response(HTTPStatus.BAD_REQUEST)
-                self.send_header("Content-Length", "0")
-                self.end_headers()
-        else:
-            self.send_response(HTTPStatus.NOT_FOUND)
-            self.send_header("Content-Length", "0")
-            self.end_headers()
-
-
-if __name__ == "__main__":
-    try:
-        NUM_GPUS = int(subprocess.getoutput('nvidia-smi --list-gpus | wc -l'))
-        if 'NUM_GPUS' in os.environ:
-            NUM_GPUS = min(NUM_GPUS, int(os.environ['NUM_GPUS']))
-    except:
-        NUM_GPUS = 0
-    MPI_FOUND = (shutil.which('mpiexec') != None)
-    SUDO_FOUND = (shutil.which('sudo') != None)
-    WATCHDOG_TIMEOUT_SEC = int(
-        os.environ.get('WATCHDOG_TIMEOUT_SEC', WATCHDOG_TIMEOUT_SEC))
-    RUN_AS_NOBODY = int(os.environ.get('RUN_AS_NOBODY', 0)) > 0
-    CUDAQ_SER_CODE_EXEC = int(
-        os.environ.get('CUDAQ_SER_CODE_EXEC', CUDAQ_SER_CODE_EXEC)) > 0
-
-    temp_dir = tempfile.gettempdir()
-    if RUN_AS_NOBODY:
-        temp_dir = os.path.join(temp_dir, 'nvqc_proxy')
-        os.makedirs(temp_dir, exist_ok=True)
-        os.chmod(temp_dir, 0o777)  # Allow "nobody" to write to this directory.
-
-    Handler = Server
-    with ThreadedHTTPServer(("", PROXY_PORT), Handler) as httpd:
-        print("Serving at port", PROXY_PORT)
-        print("Forward to port", QPUD_PORT)
-        httpd.serve_forever()
diff --git a/tools/nvqpp/nvq++.in b/tools/nvqpp/nvq++.in
index e0f3c98774e..26420c824b7 100644
--- a/tools/nvqpp/nvq++.in
+++ b/tools/nvqpp/nvq++.in
@@ -646,13 +646,6 @@ COMPILER_FLAGS="${CPPSTD} ${COMPILER_FLAGS}"
 # the resultant binary to target that specified backend.
 OBJS_TO_MERGE=""
 if [ -n "${TARGET_CONFIG}" ]; then
-	# Disable compilation on non-x86 machines when targetting NVQC.
-	# See https://github.com/NVIDIA/cuda-quantum/issues/1345 for current status.
-	if [ "${TARGET_CONFIG}" == "nvqc" ]; then
-		if [ "${HOST_TARGET:0:6}" != "x86_64" ]; then
-			error_exit "Cannot use nvqc target from non-x86_64 client at this time"
-		fi
-	fi
 	TARGET_CONFIG_YML_FILE="${install_dir}/targets/${TARGET_CONFIG}.yml"
 	GEN_TARGET_BACKEND=false
 	if [ -f "${TARGET_CONFIG_YML_FILE}" ]; then