diff --git a/.github/workflows/config/spelling_allowlist.txt b/.github/workflows/config/spelling_allowlist.txt index e5d7558dcc5..54f94cfaaa3 100644 --- a/.github/workflows/config/spelling_allowlist.txt +++ b/.github/workflows/config/spelling_allowlist.txt @@ -74,7 +74,6 @@ MyST NGC NVIDIA NVLink -NVQC NVQIR OPX OQC diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index ef2a8451349..f6c80b4412d 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -26,7 +26,6 @@ on: - pasqal - qci - quantinuum - - nvqc single_test_name: type: string required: false @@ -44,11 +43,6 @@ on: type: string required: false description: 'Commit SHA to pull the code (examples/tests) for testing. Default to the commit associated with the CUDA Quantum docker image if left blank' - cudaq_nvqc_deploy_image: - type: string - required: false - default: '' # same as cudaq_test_image if not provided - description: 'CUDA Quantum image to use for NVQC deployment to NVCF. Default to the latest CUDA Quantum nightly image' workflow_id: type: string required: false @@ -66,11 +60,6 @@ on: - cron: 0 3 * * * env: - NGC_QUANTUM_ORG: pnyjrcojiblh - NGC_QUANTUM_TEAM: cuda-quantum - NVQC_FUNCTION_ID: 3bfa0342-7d2a-4f1b-8e81-b6608d28ca7d - # :::: - NGC_NVQC_DEPLOYMENT_SPEC: GFN:L40S:gl40s_1.br25_2xlarge:1:1 python_version: '3.12' jobs: @@ -88,7 +77,6 @@ jobs: outputs: cudaq_test_image: ${{ steps.vars.outputs.cudaq_nightly_image }}@${{ steps.test_image.outputs.digest }} - cudaq_nvqc_deploy_image: ${{ inputs.cudaq_nvqc_deploy_image || format('{0}@{1}', steps.vars.outputs.cudaq_nightly_image, steps.test_image.outputs.digest) }} steps: - name: Set variables @@ -174,135 +162,6 @@ jobs: echo "sha=$(cat $CUDA_QUANTUM_PATH/build_info.txt | grep -o 'source-sha: \S*' | cut -d ' ' -f 2)" >> $GITHUB_OUTPUT fi - build_nvqc_image: - name: Build NVQC deployment image - runs-on: ubuntu-latest - if: (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly') - needs: [setup, metadata] - permissions: - contents: read - packages: write - - environment: ghcr-deployment - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - ref: ${{ needs.metadata.outputs.cudaq_commit }} - fetch-depth: 1 - - - name: Set up context for buildx - run: | - docker context create builder_context - - - name: Set up buildx runner - uses: docker/setup-buildx-action@v3 - with: - endpoint: builder_context - version: v0.19.0 - driver-opts: | - image=moby/buildkit:v0.19.0 - - - name: Login to NGC container registry - uses: docker/login-action@v3 - with: - registry: nvcr.io - username: '$oauthtoken' - password: ${{ secrets.NGC_CREDENTIALS }} - - # Log in to GHCR (in case the base image is a local one) - - name: Log in to the GitHub container registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ github.token }} - - - name: Build NVQC image - id: docker_build - uses: docker/build-push-action@v5 - with: - context: . - file: ./docker/release/cudaq.nvqc.Dockerfile - build-args: | - base_image=${{ needs.setup.outputs.cudaq_nvqc_deploy_image }} - tags: nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/${{ vars.packages_prefix }}cuda-quantum:nightly - platforms: linux/amd64 - provenance: false - push: true - - deploy_nvqc_test_function: - name: Deploy NVQC function - runs-on: ubuntu-latest - needs: [metadata, build_nvqc_image] - if: (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly') - permissions: - contents: read - - # Must have environment protection - environment: ghcr-deployment - - outputs: - nvqc_function_version_id: ${{ steps.deploy.outputs.nvqc_function_version_id }} - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - ref: ${{ needs.metadata.outputs.cudaq_commit }} - fetch-depth: 1 - - - name: Install NGC CLI - uses: ./.github/actions/install-ngc-cli - with: - version: 3.38.0 - checksum: 427c67684d792b673b63882a6d0cbb8777815095c0f2f31559c1570a91187388 - - - name: Deploy NVQC Function - id: deploy - env: - NGC_CLI_API_KEY: ${{ secrets.NGC_CREDENTIALS }} - NGC_CLI_ORG: ${{ env.NGC_QUANTUM_ORG }} - NGC_CLI_TEAM: cuda-quantum - # When a new REST version is introduced, NVQC_REST_PAYLOAD_VERSION needs to be updated in lockstep with the new nightly CUDA Quantum image. - # Otherwise, deployment of the test function will fail. - run: | - # We run with CUDAQ_SER_CODE_EXEC set. The final NVQC deployment may - # or may not have this set, but since we run the client with - # CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 (below), we need to run - # the CI with CUDAQ_SER_CODE_EXEC=1. If we ever remove - # CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 below, we can consider - # removing CUDAQ_SER_CODE_EXEC=1. - create_function_result=$(ngc-cli/ngc cloud-function function create \ - --container-image nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/cuda-quantum:nightly \ - --container-environment-variable NUM_GPUS:1 \ - --container-environment-variable NVQC_REST_PAYLOAD_VERSION:1.1 \ - --container-environment-variable RUN_AS_NOBODY:1 \ - --container-environment-variable CUDAQ_SER_CODE_EXEC:1 \ - --api-body-format CUSTOM \ - --inference-port 3030 \ - --health-uri / \ - --inference-url /job \ - --name cudaq-nightly-integration-test \ - $NVQC_FUNCTION_ID) - version_id=$(echo "$create_function_result" | grep 'Version: \S*' | head -1 | cut -d ':' -f 2 | tr -d ' ') - echo "Create version Id: $version_id" - echo "nvqc_function_version_id=$version_id" >> $GITHUB_OUTPUT - # Deploy it - ngc-cli/ngc cloud-function function deploy create --deployment-specification $NGC_NVQC_DEPLOYMENT_SPEC $NVQC_FUNCTION_ID:$version_id - function_status=DEPLOYING - while [ "$function_status" = "DEPLOYING" ]; do - echo "Waiting for deploying NVQC function version $version_id ..." - sleep 120 - function_info=$(ngc-cli/ngc cloud-function function info $NVQC_FUNCTION_ID:$version_id) - function_status=$(echo "$function_info" | grep 'Status: \S*' | head -1 | cut -d ':' -f 2 | tr -d ' ') - done - if [ "$function_status" != "ACTIVE" ]; then - echo "::error:: Failed to deploy NVQC Test Function" - exit 1 - fi - # Setup job to determine which providers to test provider_matrix_setup: name: Setup provider matrix @@ -813,348 +672,3 @@ jobs: # Clean up rm -f "$HOME/.anyon_config" "$HOME/.quantinuum_config" shell: bash - - nvqc_integration_docker_test: - name: NVQC integration test using Docker image - runs-on: ubuntu-latest - if: (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly') - needs: [setup, metadata, build_nvqc_image, deploy_nvqc_test_function] - permissions: - contents: read - packages: read - - # Must have environment protection - environment: - name: ghcr-deployment - url: ${{ vars.deployment_url }} - - container: - image: ${{ needs.setup.outputs.cudaq_test_image }} - options: --user root - credentials: - username: ${{ github.actor }} - password: ${{ github.token }} - - steps: - - name: Skip NVQC Docker tests (temporary) - id: skip_check - run: | - echo "### Submit to NVQC" >> $GITHUB_STEP_SUMMARY - echo ":warning: NVQC Docker integration tests are temporarily skipped" >> $GITHUB_STEP_SUMMARY - echo "::warning::NVQC Docker integration tests are temporarily skipped" - echo "skipped=true" >> $GITHUB_OUTPUT - shell: bash - - - name: Get code - if: steps.skip_check.outputs.skipped != 'true' - uses: actions/checkout@v4 - with: - ref: ${{ needs.metadata.outputs.cudaq_commit }} - fetch-depth: 1 - - - name: Submit to NVQC - run: | - echo "### Submit to NVQC" >> $GITHUB_STEP_SUMMARY - export NVQC_API_KEY="${{ secrets.NVQC_SERVICE_KEY }}" - export NVQC_FUNCTION_ID="$NVQC_FUNCTION_ID" - export NVQC_FUNCTION_VERSION_ID="${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}" - # When overriding the NVQC_FUNCTION_ID to a function that doesn't - # follow the production naming convenvtions, we need to set the - # following environment variable to tell the client that the server - # has all the remote capabilities. - export CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 - set +e # Allow script to keep going through errors - test_err_sum=0 - # Test all NVQPP execution tests - for filename in `find targettests/execution/ -name '*.cpp'`; do - echo "$filename" - # Only run tests that require execution (not a syntax-only check) - if grep -q "ifndef SYNTAX_CHECK" "$filename"; then - nvq++ -v $filename --target nvqc - test_status=$? - if [ $test_status -eq 0 ]; then - ./a.out - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - else - echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - done - - # Test all remote-sim tests - for filename in `find targettests/Remote-Sim -name '*.cpp'`; do - # unsupport_args is compile error test - # qvector_init_from_state, qvector_init_from_state_lazy, test_trotter: New argument synthesis is not executed for nvqc (https://github.com/NVIDIA/cuda-quantum/issues/2146) - if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]] && [[ "$filename" != *"qvector_init_from_state_lazy"* ]] && [[ "$filename" != *"test_trotter"* ]]; then - echo "$filename" - nvqc_config="" - # Look for a --remote-mqpu-auto-launch to determine the number of QPUs - num_qpus=`cat $filename | grep -oP -m 1 '^//\s*RUN:\s*nvq++.+--remote-mqpu-auto-launch\s+\K\S+'` - if [ -n "$num_qpus" ]; then - echo "Intended to run on '$num_qpus' QPUs." - nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus" - fi - nvq++ -v $filename --target nvqc $nvqc_config - test_status=$? - if [ $test_status -eq 0 ]; then - ./a.out - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - else - echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - done - - # Test C++ examples with NVQC - for filename in `find examples/cpp/ applications/cpp/ targets/cpp/ -name '*.cpp'`; do - if [[ "$filename" == *"nvqc"* ]]; then - echo "$filename" - nvqc_config="" - # Look for a --nvqc-backend flag to nvq++ in the comment block - nvqc_backend=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-backend\s+\K\S+'` - if [ -n "$nvqc_backend" ]; then - echo "Intended for execution on '$nvqc_backend' backend." - nvqc_config="$nvqc_config --nvqc-backend $nvqc_backend" - fi - # Look for a --nvqc-nqpus flag to nvq++ in the comment block - num_qpus=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-nqpus\s+\K\S+'` - if [ -n "$num_qpus" ]; then - echo "Intended to run on '$num_qpus' QPUs." - nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus" - fi - nvq++ -v $filename --target nvqc $nvqc_config - test_status=$? - if [ $test_status -eq 0 ]; then - ./a.out - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - else - echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - done - - # Test NVQC Python examples + Python MLIR execution tests (not IR tests) - python3 -m pip install pytest - for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do - filename=$(basename -- "$ex") - filename="${filename%.*}" - echo "Testing $filename:" - if [[ "$ex" == *"nvqc"* ]]; then - # This is an NVQC example - python3 $ex 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - # building_kernels.py is disabled due to https://github.com/NVIDIA/cuda-quantum/issues/2299. - elif [[ "$ex" != *"building_kernels"* ]]; then - # Only run examples that are not target-specific (e.g., ionq, iqm) - if ! grep -q "set_target" "$ex"; then - # Use --target command line option to run these examples with nvqc - python3 $ex --target nvqc 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - fi - done - - set -e # Re-enable exit code error checking - if [ ! $test_err_sum -eq 0 ]; then - echo "::error::${test_err_sum} tests failed. See step summary for a list of failures" - exit 1 - fi - shell: bash - - nvqc_integration_wheel_test: - name: NVQC integration test using Python wheels - runs-on: ubuntu-latest - if: inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly' - needs: [metadata, build_nvqc_image, deploy_nvqc_test_function] - permissions: - contents: read - - # Must have environment protection - environment: ghcr-deployment - - steps: - - name: Skip NVQC wheel tests (temporary) - id: skip_check - run: | - echo "### Submit to NVQC from Python wheels" >> $GITHUB_STEP_SUMMARY - echo ":warning: NVQC Python wheel integration tests are temporarily skipped" >> $GITHUB_STEP_SUMMARY - echo "::warning::NVQC Python wheel integration tests are temporarily skipped" - echo "skipped=true" >> $GITHUB_OUTPUT - shell: bash - - - name: Get code - if: steps.skip_check.outputs.skipped != 'true' - uses: actions/checkout@v4 - with: - ref: ${{ needs.metadata.outputs.cudaq_commit }} - fetch-depth: 1 - - - name: Install wheel - if: steps.skip_check.outputs.skipped != 'true' - id: install_wheel - run: | - python_version=${{ inputs.python_version || env.python_version }} - workflow_id=${{ inputs.workflow_id }} - # Helper to get the *valid* Publishing run Id for a commit hash - # Notes: runs that have 'CUDA-Q Python wheels' jobs skipped are not considered. - function get_publishing_run_id { - # Find all Publishing runs, we'll look into its jobs' status later - if [[ -z "$1" ]]; then - publishing_run_ids=$(gh run list --workflow Publishing --json databaseId --jq .[].databaseId) - else - publishing_run_ids=$(gh run list --commit $1 --workflow Publishing --json databaseId --jq .[].databaseId) - fi - for run_id in $publishing_run_ids ; do - # Look into its jobs: if "CUDA-Q Python wheels" matrix build was performed, - # then we have multiple jobs, like "CUDA-Q Python wheels (python_arm64....") - cuda_wheel_build_jobs=$(gh run view $run_id --jq '.jobs.[] | select(.name | startswith("CUDA-Q Python wheels (python_")).name' --json jobs) - if [ ! -z "$cuda_wheel_build_jobs" ]; then - # This is a valid run that produces wheel artifacts - echo $run_id - break - fi - done - } - - if [ -z "${workflow_id}" ]; then - workflow_id=$(get_publishing_run_id ${{ needs.metadata.outputs.cudaq_commit }}) - fi - if [ ! -z "$workflow_id" ]; then - echo "Using artifacts from workflow id $workflow_id" - # Allow error when trying to download wheel artifacts since they might be expired. - set +e - gh run download $workflow_id --name "x86_64-cu12-py$python_version-wheels" - retVal=$? - set -e - if [ $retVal -ne 0 ]; then - echo "Failed to download wheels artifact from Publishing workflow run Id $workflow_id. Perhaps the artifacts have been expired." - # This is allowed since there might be a period where no Publishing workflow is run (e.g., no PR merged to main). - echo "skipped=true" >> $GITHUB_OUTPUT - exit 0 - fi - python_version_filename=$(echo "${python_version//.}") - # Install Python and the wheel - apt-get update && apt-get install -y --no-install-recommends python$python_version python3-pip - wheelfile=$(find . -name "cuda_quantum_cu12*cp$python_version_filename*x86_64.whl") - python$python_version -m pip install $wheelfile - echo "skipped=false" >> $GITHUB_OUTPUT - else - echo "Failed to retrieve Publishing workflow run Id for commit ${{ needs.metadata.outputs.cudaq_commit }}" - exit 1 - fi - env: - GH_TOKEN: ${{ github.token }} - - - name: Test NVQC - if: ${{ steps.skip_check.outputs.skipped != 'true' || steps.install_wheel.outputs.skipped != 'true' }} - run: | - echo "### Submit to NVQC from Python wheels" >> $GITHUB_STEP_SUMMARY - python_version=${{ inputs.python_version || env.python_version }} - export NVQC_API_KEY="${{ secrets.NVQC_SERVICE_KEY }}" - export NVQC_FUNCTION_ID="$NVQC_FUNCTION_ID" - export NVQC_FUNCTION_VERSION_ID="${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}" - set +e # Allow script to keep going through errors - python$python_version -m pip install pytest - test_err_sum=0 - for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do - filename=$(basename -- "$ex") - filename="${filename%.*}" - echo "Testing $filename:" - if [[ "$ex" == *"nvqc"* ]]; then - python$python_version $ex 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - # building_kernels.py is disabled due to https://github.com/NVIDIA/cuda-quantum/issues/2299. - elif [[ "$ex" != *"building_kernels"* ]]; then - # Only run examples that are not target-specific (e.g., ionq, iqm) - if ! grep -q "set_target" "$ex"; then - # Use --target command line option to run these examples with nvqc - python$python_version $ex --target nvqc 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - fi - done - set -e # Re-enable exit code error checking - if [ ! $test_err_sum -eq 0 ]; then - echo "::error::${test_err_sum} tests failed. See step summary for a list of failures" - exit 1 - fi - - cleanup_nvqc_resources: - name: Cleanup NVQC resources - runs-on: ubuntu-latest - if: (success() || failure()) && (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly') - needs: [build_nvqc_image, deploy_nvqc_test_function, nvqc_integration_docker_test, nvqc_integration_wheel_test] - permissions: - contents: read - - # Must have environment protection - environment: ghcr-deployment - - steps: - - name: Get code - uses: actions/checkout@v4 - - - name: Install NGC CLI - uses: ./.github/actions/install-ngc-cli - with: - version: 3.38.0 - checksum: 427c67684d792b673b63882a6d0cbb8777815095c0f2f31559c1570a91187388 - - - name: Cleanup - env: - NGC_CLI_API_KEY: ${{ secrets.NGC_CREDENTIALS }} - NGC_CLI_ORG: ${{ env.NGC_QUANTUM_ORG }} - NGC_CLI_TEAM: cuda-quantum - run: | - echo "Version Id: ${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }}" - # Remove deployment (make it inactive) - ngc-cli/ngc cloud-function function deploy remove $NVQC_FUNCTION_ID:${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }} - # Remove the function version - ngc-cli/ngc cloud-function function remove $NVQC_FUNCTION_ID:${{ needs.deploy_nvqc_test_function.outputs.nvqc_function_version_id }} - # Remove the docker image - ngc-cli/ngc registry image remove -y nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/cuda-quantum:nightly diff --git a/.github/workflows/nvqc_regression_tests.yml b/.github/workflows/nvqc_regression_tests.yml deleted file mode 100644 index 055eeb2e723..00000000000 --- a/.github/workflows/nvqc_regression_tests.yml +++ /dev/null @@ -1,356 +0,0 @@ -name: Nvqc regression tests - -concurrency: - group: ${{ github.workflow }}${{ github.event.workflow_run.name }} - cancel-in-progress: false - -# Run on request and every day at 3 AM UTC -on: - workflow_dispatch: - inputs: - cudaq_test_image: - type: string - required: false - default: '' # picked up from repo variable if not provided - description: 'CUDA Quantum image to run the tests in. Default to the latest CUDA Quantum nightly image' - commit_sha: - type: string - required: false - description: 'Commit SHA to pull the code (examples/tests) for testing. Default to the commit associated with the CUDA Quantum docker image if left blank' - workflow_id: - type: string - required: false - description: 'Workflow Id to retrieve the Python wheel for testing. Default to the wheels produced by the Publishing workflow associated with the latest nightly CUDA Quantum Docker image if left blank' - python_version: - type: choice - required: true - description: 'Python version to run wheel test' - options: - - '3.11' - - '3.12' - - '3.13' - - schedule: - - cron: 0 3 * * * - -env: - python_version: '3.12' - -jobs: - # We need this job purely to choose the container image values because the - # `env` context is unavailable outside of "steps" contexts. - setup: - name: Set variables - runs-on: ubuntu-latest - permissions: {} - - outputs: - cudaq_test_image: ${{ steps.vars.outputs.cudaq_test_image }} - - steps: - - name: Set variables - id: vars - run: | - echo "cudaq_test_image=${{ inputs.cudaq_test_image || vars.cudaq_test_image }}" >> $GITHUB_OUTPUT - - metadata: - name: Retrieve commit info - runs-on: ubuntu-latest - needs: setup - permissions: - contents: read - packages: read - - environment: backend-validation - container: - image: ${{ needs.setup.outputs.cudaq_test_image }} - options: --user root - - outputs: - cudaq_commit: ${{ steps.commit-sha.outputs.sha }} - - steps: - - name: Get commit SHA - id: commit-sha - run: | - if [ -n "${{ inputs.commit_sha }}" ]; then - echo "sha=${{ inputs.commit_sha }}" >> $GITHUB_OUTPUT - else - echo "sha=$(cat $CUDA_QUANTUM_PATH/build_info.txt | grep -o 'source-sha: \S*' | cut -d ' ' -f 2)" >> $GITHUB_OUTPUT - fi - - nvqc_integration_docker_test: - name: NVQC integration test using Docker image - runs-on: ubuntu-latest - needs: [setup, metadata] - permissions: - contents: read - packages: read - - # Must have environment protection - environment: backend-validation - container: - image: ${{ needs.setup.outputs.cudaq_test_image }} - options: --user root - - steps: - - name: Get code - uses: actions/checkout@v4 - with: - ref: ${{ needs.metadata.outputs.cudaq_commit }} - fetch-depth: 1 - - - name: Submit to NVQC - run: | - echo "### Submit to NVQC" >> $GITHUB_STEP_SUMMARY - export NVQC_API_KEY="${{ secrets.NVQC_PROD_SERVICE_KEY }}" - set +e # Allow script to keep going through errors - test_err_sum=0 - # Test all NVQPP execution tests - for filename in `find targettests/execution/ -name '*.cpp'`; do - echo "$filename" - # Only run tests that require execution (not a syntax-only check) - if grep -q "ifndef SYNTAX_CHECK" "$filename"; then - nvq++ -v $filename --target nvqc - test_status=$? - if [ $test_status -eq 0 ]; then - ./a.out - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - else - echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - done - - # Test all remote-sim tests - for filename in `find targettests/Remote-Sim -name '*.cpp'`; do - # unsupport_args is compile error test - # pauli_word: https://github.com/NVIDIA/cuda-quantum/issues/1957 - # custom_operation: https://github.com/NVIDIA/cuda-quantum/issues/1985 - # return_values: only supported in 0.8 NVQC service. - # qvector_init_from_vector: only supported in 0.8 NVQC service. - # qvector_init_from_state, qvector_init_from_state_lazy, test_trotter: not supported yet on nvqc: https://github.com/NVIDIA/cuda-quantum/issues/2146 - if [[ "$filename" != *"unsupport_args"* ]] && [[ "$filename" != *"state_overlap"* ]] && [[ "$filename" != *"pauli_word"* ]] && [[ "$filename" != *"custom_operation"* ]] && [[ "$filename" != *"return_values"* ]] && [[ "$filename" != *"qvector_init_from_state"* ]] && [[ "$filename" != *"qvector_init_from_state_lazy"* ]] && [[ "$filename" != *"qvector_init_from_vector"* ]] && [[ "$filename" != *"test_trotter"* ]]; then - echo "$filename" - nvqc_config="" - # Look for a --remote-mqpu-auto-launch to determine the number of QPUs - num_qpus=`cat $filename | grep -oP -m 1 '^//\s*RUN:\s*nvq++.+--remote-mqpu-auto-launch\s+\K\S+'` - if [ -n "$num_qpus" ]; then - echo "Intended to run on '$num_qpus' QPUs." - nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus" - fi - nvq++ -v $filename --target nvqc $nvqc_config - test_status=$? - if [ $test_status -eq 0 ]; then - ./a.out - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - else - echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - done - - # Test C++ examples with NVQC - for filename in `find examples/cpp/ applications/cpp/ targets/cpp/ -name '*.cpp'`; do - if [[ "$filename" == *"nvqc"* ]]; then - echo "$filename" - nvqc_config="" - # Look for a --nvqc-backend flag to nvq++ in the comment block - nvqc_backend=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-backend\s+\K\S+'` - if [ -n "$nvqc_backend" ]; then - echo "Intended for execution on '$nvqc_backend' backend." - nvqc_config="$nvqc_config --nvqc-backend $nvqc_backend" - fi - # Look for a --nvqc-nqpus flag to nvq++ in the comment block - num_qpus=`sed -e '/^$/,$d' $filename | grep -oP -m 1 '^//\s*nvq++.+--nvqc-nqpus\s+\K\S+'` - if [ -n "$num_qpus" ]; then - echo "Intended to run on '$num_qpus' QPUs." - nvqc_config="$nvqc_config --nvqc-nqpus $num_qpus" - fi - nvq++ -v $filename --target nvqc $nvqc_config - test_status=$? - if [ $test_status -eq 0 ]; then - ./a.out - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $filename" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - else - echo ":x: Test failed (failed to compile): $filename" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - done - - # Test NVQC Python examples + Python MLIR execution tests (not IR tests) - python3 -m pip install pytest - # Disabling building_kernels as the state is not yet supported on NVQC - for ex in `find examples/python python/tests/mlir/target -name '*.py' ! -name '*building_kernels*'`; do - filename=$(basename -- "$ex") - filename="${filename%.*}" - echo "Testing $filename:" - if [[ "$ex" == *"nvqc"* ]]; then - # This is an NVQC example - python3 $ex 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - else - # Only run examples that are not target-specific (e.g., ionq, iqm) - if ! grep -q "set_target" "$ex"; then - # Use --target command line option to run these examples with nvqc - python3 $ex --target nvqc 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - fi - done - - set -e # Re-enable exit code error checking - if [ ! $test_err_sum -eq 0 ]; then - echo "::error::${test_err_sum} tests failed. See step summary for a list of failures" - exit 1 - fi - shell: bash - - nvqc_integration_wheel_test: - name: NVQC integration test using Python wheels - runs-on: ubuntu-latest - needs: [metadata] - permissions: - contents: read - - # Must have environment protection - environment: backend-validation - - steps: - - name: Get code - uses: actions/checkout@v4 - with: - ref: ${{ needs.metadata.outputs.cudaq_commit }} - fetch-depth: 1 - - - name: Install wheel - id: install_wheel - run: | - python_version=${{ inputs.python_version || env.python_version }} - workflow_id=${{ inputs.workflow_id }} - # Helper to get the *valid* Publishing run Id for a commit hash - # Notes: runs that have 'CUDA-Q Python wheels' jobs skipped are not considered. - function get_publishing_run_id { - # Find all Publishing runs, we'll look into its jobs' status later - if [[ -z "$1" ]]; then - publishing_run_ids=$(gh run -R NVIDIA/cuda-quantum list --workflow Publishing --json databaseId --jq .[].databaseId) - else - publishing_run_ids=$(gh run -R NVIDIA/cuda-quantum list --commit $1 --workflow Publishing --json databaseId --jq .[].databaseId) - fi - for run_id in $publishing_run_ids ; do - # Look into its jobs: if "CUDA-Q Python wheels" matrix build was performed, - # then we have multiple jobs, like "CUDA-Q Python wheels (python_arm64....") - cuda_wheel_build_jobs=$(gh run -R NVIDIA/cuda-quantum view $run_id --jq '.jobs.[] | select(.name | startswith("CUDA-Q Python wheels (python_")).name' --json jobs) - if [ ! -z "$cuda_wheel_build_jobs" ]; then - # This is a valid run that produces wheel artifacts - echo $run_id - break - fi - done - } - - if [ -z "${workflow_id}" ]; then - workflow_id=$(get_publishing_run_id ${{ needs.metadata.outputs.cudaq_commit }}) - fi - if [ ! -z "$workflow_id" ]; then - echo "Using artifacts from workflow id $workflow_id" - # Allow error when trying to download wheel artifacts since they might be expired. - set +e - gh run -R NVIDIA/cuda-quantum download $workflow_id --name "x86_64-py$python_version-wheels" - retVal=$? - set -e - if [ $retVal -ne 0 ]; then - echo "Failed to download wheels artifact from Publishing workflow run Id $workflow_id. Perhaps the artifacts have been expired." - # This is allowed since there might be a period where no Publishing workflow is run (e.g., no PR merged to main). - echo "skipped=true" >> $GITHUB_OUTPUT - exit 0 - fi - python_version_filename=$(echo "${python_version//.}") - # Install Python and the wheel - apt-get update && apt-get install -y --no-install-recommends python$python_version python3-pip - wheelfile=$(find . -name "cuda_quantum_cu12*cp$python_version_filename*x86_64.whl") - python$python_version -m pip install $wheelfile - echo "skipped=false" >> $GITHUB_OUTPUT - else - echo "Failed to retrieve Publishing workflow run Id for commit ${{ needs.metadata.outputs.cudaq_commit }}" - exit 1 - fi - env: - GH_TOKEN: ${{ github.token }} - - - name: Test NVQC - if: ${{ steps.install_wheel.outputs.skipped != 'true' }} - run: | - echo "### Submit to NVQC from Python wheels" >> $GITHUB_STEP_SUMMARY - python_version=${{ inputs.python_version || env.python_version }} - export NVQC_API_KEY="${{ secrets.NVQC_PROD_SERVICE_KEY }}" - set +e # Allow script to keep going through errors - python$python_version -m pip install pytest - test_err_sum=0 - for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do - filename=$(basename -- "$ex") - filename="${filename%.*}" - echo "Testing $filename:" - if [[ "$ex" == *"nvqc"* ]]; then - python$python_version $ex 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - # building_kernels.py is disabled due to https://github.com/NVIDIA/cuda-quantum/issues/2299. - elif [[ "$ex" != *"building_kernels"* ]]; then - # Only run examples that are not target-specific (e.g., ionq, iqm) - if ! grep -q "set_target" "$ex"; then - # Use --target command line option to run these examples with nvqc - python$python_version $ex --target nvqc 1> /dev/null - test_status=$? - if [ $test_status -eq 0 ]; then - echo ":white_check_mark: Successfully ran test: $ex" >> $GITHUB_STEP_SUMMARY - else - echo ":x: Test failed (failed to execute): $ex" >> $GITHUB_STEP_SUMMARY - test_err_sum=$((test_err_sum+1)) - fi - fi - fi - done - set -e # Re-enable exit code error checking - if [ ! $test_err_sum -eq 0 ]; then - echo "::error::${test_err_sum} tests failed. See step summary for a list of failures" - exit 1 - fi diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml index 3c1bed2e4b0..e21930b6e68 100644 --- a/.github/workflows/python_wheels.yml +++ b/.github/workflows/python_wheels.yml @@ -323,7 +323,7 @@ jobs: docker run --rm -dit --name wheel-validation-snippets wheel_validation:local status_sum=0 - for ex in `find docs/sphinx/snippets/python -name '*.py' -not -path '*/platform/*' -not -path '*/nvqc/*' -not -path '*/backends/*'`; do + for ex in `find docs/sphinx/snippets/python -name '*.py' -not -path '*/platform/*' -not -path '*/backends/*'`; do file="${ex#docs/sphinx/snippets/python/}" echo "__Snippet ${file}:__" >> /tmp/validation.out (docker exec wheel-validation-snippets bash -c "python${{ inputs.python_version }} /tmp/snippets/$file" >> /tmp/validation.out) && success=true || success=false diff --git a/docker/release/cudaq.nvqc.Dockerfile b/docker/release/cudaq.nvqc.Dockerfile deleted file mode 100644 index 069d3c15b84..00000000000 --- a/docker/release/cudaq.nvqc.Dockerfile +++ /dev/null @@ -1,53 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -# This file is used to build CUDA-Q NVQC service container to be deployed to NVCF. -# -# Usage: -# Must be built from the repo root with: -# # to skip prerequisites (default) -# docker build --target=without_tpls -f docker/release/cudaq.nvqc.Dockerfile . -# -# # to install and clone prerequisites -# docker build --target=with_tpls \ -# -f docker/release/cudaq.nvqc.Dockerfile . - -# Base image is CUDA-Q image -ARG base_image=nvcr.io/nvidia/nightly/cuda-quantum:cu12-latest -FROM $base_image AS nvcf_image - -# With prerequisites -FROM $base_image AS with_tpls -RUN echo "Build with prerequisites" -# COPY install_prerequisites into the image -RUN sudo mkdir -p /tmp -COPY --chmod=0755 scripts/install_prerequisites.sh /tmp/install_prerequisites.sh -COPY .gitmodules /tmp/.gitmodules -# Manually run this command locally to create tpls_commits.lock file -# git config --file .gitmodules --get-regexp '^submodule\..*\.path$' \ -# | awk '{print $2}' \ -# | while read p; do printf "%s %s\n" "$(git rev-parse HEAD:$p)" "$p"; done \ -# > tpls_commits.lock -COPY tpls_commits.lock /tmp/tpls_commits.lock -RUN sudo bash /tmp/install_prerequisites.sh -l /tmp/tpls_commits.lock - -# Without prerequisites -FROM $base_image AS without_tpls -RUN echo "Default build without prerequisites" - -# Run the tar command and then uncomment ADD cudaq.tar.gz ... in order to -# override the installation. -# tar czvf /workspaces/cuda-quantum/cudaq.tar.gz -C /usr/local/cudaq . -# ADD cudaq.tar.gz /opt/nvidia/cudaq - -RUN sudo mkdir /nvqc_scripts -ADD tools/cudaq-qpud/nvqc_proxy.py /nvqc_scripts -ADD tools/cudaq-qpud/json_request_runner.py /nvqc_scripts -ADD scripts/nvqc_launch.sh /nvqc_scripts - -ENTRYPOINT ["bash", "-l", "/nvqc_scripts/nvqc_launch.sh"] diff --git a/docs/sphinx/api/languages/cpp_api.rst b/docs/sphinx/api/languages/cpp_api.rst index bc63e68d2d1..36b0088138c 100644 --- a/docs/sphinx/api/languages/cpp_api.rst +++ b/docs/sphinx/api/languages/cpp_api.rst @@ -326,8 +326,6 @@ Platform .. doxygenclass:: cudaq::BaseRemoteSimulatorQPU -.. doxygenclass:: cudaq::BaseNvcfSimulatorQPU - .. doxygenclass:: cudaq::AnalogRemoteRESTQPU .. doxygenclass:: cudaq::FermioniqBaseQPU @@ -340,8 +338,6 @@ Platform .. doxygenstruct:: cudaq::RemoteCapabilities :members: -.. doxygenclass:: cudaq::SerializedCodeExecutionContext - .. doxygentypedef:: cudaq::QuantumTask .. doxygentypedef:: cudaq::QubitConnectivity diff --git a/docs/sphinx/releases.rst b/docs/sphinx/releases.rst index 1a2ac96b33a..952250cfb47 100644 --- a/docs/sphinx/releases.rst +++ b/docs/sphinx/releases.rst @@ -147,7 +147,7 @@ The full change log can be found `here `, +The 0.7.0 release adds support for using NVIDIA Quantum Cloud, giving you access to our most powerful GPU-accelerated simulators even if you don't have an NVIDIA GPU. With 0.7.0, we have furthermore greatly increased expressiveness of the Python and C++ language frontends. Check out our `documentation `__ diff --git a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp b/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp deleted file mode 100644 index 5427c4fd27a..00000000000 --- a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// [Begin Documentation] -#include - -// Define a simple quantum kernel to execute on NVQC. -struct ghz { - // Maximally entangled state between 25 qubits. - auto operator()() __qpu__ { - constexpr int NUM_QUBITS = 25; - cudaq::qvector q(NUM_QUBITS); - h(q[0]); - for (int i = 0; i < NUM_QUBITS - 1; i++) { - x(q[i], q[i + 1]); - } - auto result = mz(q); - } -}; - -int main() { - auto counts = cudaq::sample(ghz{}); - counts.dump(); -} diff --git a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp b/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp deleted file mode 100644 index 5480ff5600e..00000000000 --- a/docs/sphinx/snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -// [Begin Documentation] -#include -#include -#include -#include -#include - -int main() { - cudaq::spin_op h = - 5.907 - 2.1433 * cudaq::spin_op::x(0) * cudaq::spin_op::x(1) - - 2.1433 * cudaq::spin_op::y(0) * cudaq::spin_op::y(1) + - .21829 * cudaq::spin_op::z(0) - 6.125 * cudaq::spin_op::z(1); - - auto [ansatz, theta] = cudaq::make_kernel(); - auto q = ansatz.qalloc(); - auto r = ansatz.qalloc(); - ansatz.x(q); - ansatz.ry(theta, r); - ansatz.x(r, q); - - // Run VQE with a gradient-based optimizer. - // Delegate cost function and gradient computation across different NVQC-based - // QPUs. - // Note: this needs to be compiled with `--nvqc-nqpus 3` create 3 virtual - // QPUs. - cudaq::optimizers::lbfgs optimizer; - auto [opt_val, opt_params] = optimizer.optimize( - /*dim=*/1, /*opt_function*/ [&](const std::vector ¶ms, - std::vector &grads) { - // Queue asynchronous jobs to do energy evaluations across multiple QPUs - auto energy_future = - cudaq::observe_async(/*qpu_id=*/0, ansatz, h, params[0]); - const double paramShift = M_PI_2; - auto plus_future = cudaq::observe_async(/*qpu_id=*/1, ansatz, h, - params[0] + paramShift); - auto minus_future = cudaq::observe_async(/*qpu_id=*/2, ansatz, h, - params[0] - paramShift); - grads[0] = (plus_future.get().expectation() - - minus_future.get().expectation()) / - 2.0; - return energy_future.get().expectation(); - }); - std::cout << "Minimum energy = " << opt_val << " (expected -1.74886).\n"; -} diff --git a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_intro.py b/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_intro.py deleted file mode 100644 index ead26814f23..00000000000 --- a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_intro.py +++ /dev/null @@ -1,23 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # -# [Begin Documentation] -import cudaq - -cudaq.set_target("nvqc") -num_qubits = 25 -# Define a simple quantum kernel to execute on NVQC. -kernel = cudaq.make_kernel() -qubits = kernel.qalloc(num_qubits) -# Maximally entangled state between 25 qubits. -kernel.h(qubits[0]) -for i in range(num_qubits - 1): - kernel.cx(qubits[i], qubits[i + 1]) -kernel.mz(qubits) - -counts = cudaq.sample(kernel) -print(counts) diff --git a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_mqpu.py b/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_mqpu.py deleted file mode 100644 index 2055be2659c..00000000000 --- a/docs/sphinx/snippets/python/using/cudaq/nvqc/nvqc_mqpu.py +++ /dev/null @@ -1,54 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # -# [Begin Documentation] -import cudaq -from cudaq import spin -import math - -# Use NVQC with 3 virtual QPUs -cudaq.set_target("nvqc", nqpus=3) - -print("Number of QPUs:", cudaq.get_target().num_qpus()) -# Create the parameterized ansatz -kernel, theta = cudaq.make_kernel(float) -qreg = kernel.qalloc(2) -kernel.x(qreg[0]) -kernel.ry(theta, qreg[1]) -kernel.cx(qreg[1], qreg[0]) - -# Define its spin Hamiltonian. -hamiltonian = (5.907 - 2.1433 * spin.x(0) * spin.x(1) - - 2.1433 * spin.y(0) * spin.y(1) + 0.21829 * spin.z(0) - - 6.125 * spin.z(1)) - - -def opt_gradient(parameter_vector): - # Evaluate energy and gradient on different remote QPUs - # (i.e., concurrent job submissions to NVQC) - energy_future = cudaq.observe_async(kernel, - hamiltonian, - parameter_vector[0], - qpu_id=0) - plus_future = cudaq.observe_async(kernel, - hamiltonian, - parameter_vector[0] + 0.5 * math.pi, - qpu_id=1) - minus_future = cudaq.observe_async(kernel, - hamiltonian, - parameter_vector[0] - 0.5 * math.pi, - qpu_id=2) - return (energy_future.get().expectation(), [ - (plus_future.get().expectation() - minus_future.get().expectation()) / - 2.0 - ]) - - -optimizer = cudaq.optimizers.LBFGS() -optimal_value, optimal_parameters = optimizer.optimize(1, opt_gradient) -print("Ground state energy =", optimal_value) -print("Optimal parameters =", optimal_parameters) diff --git a/docs/sphinx/targets/cpp/nvqc_qml.cpp b/docs/sphinx/targets/cpp/nvqc_qml.cpp deleted file mode 100644 index c6a12d958b7..00000000000 --- a/docs/sphinx/targets/cpp/nvqc_qml.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// Compile and run with: -// ``` -// nvq++ --target nvqc nvqc_qml.cpp -o out.x -// ./out.x -// ``` -// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment -// variable. Please refer to the documentations for information about how to -// attain NVQC API key. - -// This example demonstrates the simulation of large parameterized quantum -// circuits with NVQC. These parameterized circuits can be applied to quantum -// machine learning to classify data points, e.g. see -// https://arxiv.org/pdf/1906.07682.pdf. - -#include -#include -#include -#include -#include - -struct kernel { - - auto operator()(const int n_qubits, - const std::vector parameters) __qpu__ { - - cudaq::qvector qubits(n_qubits); - h(qubits); - - for (size_t i = 0; i < n_qubits; i++) { - rx(parameters[i], qubits[i]); - } - - for (size_t i = 0; i < n_qubits; i++) { - ry(parameters[i + n_qubits], qubits[i]); - } - - h(qubits); - - for (size_t i = 0; i < n_qubits; i++) { - rz(parameters[i + n_qubits * 2], qubits[i]); - } - - for (size_t i = 0; i < n_qubits; i += 2) { - cx(qubits[i], qubits[i + 1]); - } - - for (size_t i = 0; i < n_qubits; i++) { - rz(parameters[i + n_qubits * 2], qubits[i]); - } - - for (size_t i = 0; i < n_qubits; i += 2) { - cy(qubits[i], qubits[i + 1]); - } - - for (size_t i = 0; i < n_qubits; i++) { - ry(parameters[i + n_qubits], qubits[i]); - } - - for (size_t i = 0; i < n_qubits; i += 2) { - cz(qubits[i], qubits[i + 1]); - } - - x(qubits); - y(qubits); - h(qubits); - } -}; - -std::vector initial_parameters(int n_parameters, int seed) { - - std::default_random_engine generator(seed); - std::uniform_real_distribution distribution(0.0, 1.0); - std::vector parameters(n_parameters); - - for (size_t i = 0; i < n_parameters; i++) { - parameters[i] = distribution(generator); - } - return parameters; -} - -int main() { - - const int n_qubits = 26; - const int n_parameters = 3 * n_qubits; - std::vector parameters = initial_parameters(n_parameters, 13); - auto h = cudaq::spin::z(0); - - auto start = clock(); - auto exp_val = cudaq::observe(kernel{}, h, n_qubits, parameters); - auto end = clock(); - - printf("Expectation Value: %f \n", exp_val.expectation()); - printf("Runtime: %fs \n", float(end - start) / float(CLOCKS_PER_SEC)); - - return 0; -} diff --git a/docs/sphinx/targets/cpp/nvqc_sample.cpp b/docs/sphinx/targets/cpp/nvqc_sample.cpp deleted file mode 100644 index 4e2aa69b36b..00000000000 --- a/docs/sphinx/targets/cpp/nvqc_sample.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Compile and run with: -// ``` -// nvq++ --target nvqc nvqc_sample.cpp -o out.x -// ./out.x -// ``` -// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment -// variable. Please refer to the documentations for information about how to -// attain NVQC API key. - -#include -#include - -// Define a simple quantum kernel to execute on NVQC. -struct ghz { - // Maximally entangled state between 25 qubits. - auto operator()() __qpu__ { - constexpr int NUM_QUBITS = 25; - cudaq::qvector q(NUM_QUBITS); - h(q[0]); - for (int i = 0; i < NUM_QUBITS - 1; i++) { - x(q[i], q[i + 1]); - } - auto result = mz(q); - } -}; - -int main() { - // Submit to NVQC asynchronously (e.g., continue executing - // code in the file until the job has been returned). - auto async_counts_handle = cudaq::sample_async(ghz{}); - // ... classical code to execute in the meantime ... - std::cout << "Waiting for NVQC result...\n"; - - // Calling .get() on the handle to synchronize the result. - auto async_counts = async_counts_handle.get(); - async_counts.dump(); - - // OR: Submit to NVQC synchronously (e.g., wait for the job - // result to be returned before proceeding). - auto counts = cudaq::sample(ghz{}); - counts.dump(); -} diff --git a/docs/sphinx/targets/cpp/nvqc_state.cpp b/docs/sphinx/targets/cpp/nvqc_state.cpp deleted file mode 100644 index d7cb6317f80..00000000000 --- a/docs/sphinx/targets/cpp/nvqc_state.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// Compile and run with: -// ``` -// nvq++ --target nvqc --nvqc-backend tensornet nvqc_state.cpp -o out.x -// ./out.x -// ``` -// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment -// variable. Please refer to the documentations for information about how to -// attain NVQC API key. - -#include "cudaq/algorithms/get_state.h" -#include -#include - -int main() { - auto kernel = cudaq::make_kernel(); - const std::size_t NUM_QUBITS = 20; - auto q = kernel.qalloc(NUM_QUBITS); - kernel.h(q[0]); - for (std::size_t qId = 0; qId < NUM_QUBITS - 1; ++qId) - kernel.x(q[qId], q[qId + 1]); - auto state = cudaq::get_state(kernel); - std::cout << "Amplitude(00..00) = " << state[0] << "\n"; - std::cout << "Amplitude(11..11) = " << state[(1ULL << NUM_QUBITS) - 1] - << "\n"; -} diff --git a/docs/sphinx/targets/cpp/nvqc_vqe.cpp b/docs/sphinx/targets/cpp/nvqc_vqe.cpp deleted file mode 100644 index 72b70767843..00000000000 --- a/docs/sphinx/targets/cpp/nvqc_vqe.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// Compile and run with: -// ``` -// nvq++ --target nvqc --nvqc-nqpus 3 nvqc_vqe.cpp -o out.x -// ./out.x -// ``` -// Note: we set `nqpus` to 3 to establish 3 concurrent NVQC job submission -// pipes. -// Assumes a valid NVQC API key has been set in the `NVQC_API_KEY` environment -// variable. Please refer to the documentations for information about how to -// attain NVQC API key. - -#include -#include -#include -#include -#include - -int main() { - cudaq::spin_op h = - 5.907 - 2.1433 * cudaq::spin_op::x(0) * cudaq::spin_op::x(1) - - 2.1433 * cudaq::spin_op::y(0) * cudaq::spin_op::y(1) + - .21829 * cudaq::spin_op::z(0) - 6.125 * cudaq::spin_op::z(1); - - auto [ansatz, theta] = cudaq::make_kernel(); - auto q = ansatz.qalloc(); - auto r = ansatz.qalloc(); - ansatz.x(q); - ansatz.ry(theta, r); - ansatz.x(r, q); - - // Run VQE with a gradient-based optimizer. - // Delegate cost function and gradient computation across different NVQC-based - // QPUs. Note: depending on the user's account, there might be different - // number of NVQC worker instances available. Hence, although we're making - // concurrent job submissions across multiple QPUs, the speedup would be - // determined by the number of NVQC worker instances. - cudaq::optimizers::lbfgs optimizer; - auto [opt_val, opt_params] = optimizer.optimize( - /*dim=*/1, /*opt_function*/ [&](const std::vector ¶ms, - std::vector &grads) { - // Queue asynchronous jobs to do energy evaluations across multiple QPUs - auto energy_future = - cudaq::observe_async(/*qpu_id=*/0, ansatz, h, params[0]); - const double paramShift = M_PI_2; - auto plus_future = cudaq::observe_async(/*qpu_id=*/1, ansatz, h, - params[0] + paramShift); - auto minus_future = cudaq::observe_async(/*qpu_id=*/2, ansatz, h, - params[0] - paramShift); - grads[0] = (plus_future.get().expectation() - - minus_future.get().expectation()) / - 2.0; - return energy_future.get().expectation(); - }); - std::cout << "Minimum energy = " << opt_val << " (expected -1.74886).\n"; -} diff --git a/docs/sphinx/targets/python/nvqc_mgpu.py b/docs/sphinx/targets/python/nvqc_mgpu.py deleted file mode 100644 index 00b7d806e5e..00000000000 --- a/docs/sphinx/targets/python/nvqc_mgpu.py +++ /dev/null @@ -1,114 +0,0 @@ -import argparse -import cudaq -import random - -# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable. -# If not, you can set the API Key environment variable in the Python script with: -# ``` -# os.environ["NVQC_API_KEY"] = ""` -# ``` - - -def random_bitstring(length: int): - bitstring = "" - for bit in range(length): - bitstring += str(random.randint(0, 1)) - return bitstring - - -def oracle(kernel: cudaq.Kernel, register: cudaq.QuakeValue, - auxillary_qubit: cudaq.QuakeValue, hidden_bitstring: str): - """ - The inner-product oracle for the Bernstein-Vazirani algorithm. - """ - for index, bit in enumerate(hidden_bitstring): - if bit == "0": - # Apply identity operation to the qubit if it's - # in the 0-state. - # In this case, we do nothing. - pass - else: - # Otherwise, apply a `cx` gate with the current qubit as - # the control and the auxillary qubit as the target. - kernel.cx(control=register[index], target=auxillary_qubit) - - -def bernstein_vazirani(qubit_count: int): - """ - Returns a kernel implementing the Bernstein-Vazirani algorithm - for a random, hidden bitstring. - """ - kernel = cudaq.make_kernel() - # Allocate the specified number of qubits - this - # corresponds to the length of the hidden bitstring. - qubits = kernel.qalloc(qubit_count) - # Allocate an extra auxillary qubit. - auxillary_qubit = kernel.qalloc() - - # Prepare the auxillary qubit. - kernel.h(auxillary_qubit) - kernel.z(auxillary_qubit) - - # Place the rest of the register in a superposition state. - kernel.h(qubits) - - # Generate a random, hidden bitstring for the oracle - # to encode. Note: we define the bitstring here so - # as to be able to return it for verification. - hidden_bitstring = random_bitstring(qubit_count) - - # Query the oracle. - oracle(kernel, qubits, auxillary_qubit, hidden_bitstring) - - # Apply another set of Hadamards to the register. - kernel.h(qubits) - - # Apply measurement gates to just the `qubits` - # (excludes the auxillary qubit). - kernel.mz(qubits) - return kernel, hidden_bitstring - - -# This example demonstrated GPU-accelerated simulator backends on NVQC can easily handle a large number of qubits. -if __name__ == '__main__': - parser = argparse.ArgumentParser( - prog='python', - description='Run a Bernstein-Vazirani algorithm using NVQC.', - epilog= - 'For more information about CUDA-Q, see https://nvidia.github.io/cuda-quantum' - ) - parser.add_argument('--size', - type=int, - required=False, - default=30, - help='The number of bits in the secret string.') - parser.add_argument('--ngpus', - type=int, - required=False, - default=1, - help='The number of NVQC GPUs to run the simulation.') - parser.add_argument('--seed', - type=int, - required=False, - default=0, - help='The random seed to generate the secret string.') - args = parser.parse_args() - - # Depending on the number of GPUs requested, you can - # set the size of the secret string to around 31-34 (total qubit count = string length + 1) when - # you pass the `--ngpus` as a command line argument. - qubit_count = args.size - if args.seed != 0: - random.seed(args.seed) - - cudaq.set_target("nvqc", backend="nvidia-mgpu", ngpus=args.ngpus) - - print( - f"Running on NVQC using 'nvidia-mgpu' simulator backend with {args.ngpus} GPU(s) ..." - ) - kernel, hidden_bitstring = bernstein_vazirani(qubit_count) - result = cudaq.sample(kernel) - - print(f"encoded bitstring = {hidden_bitstring}") - print(f"measured state = {result.most_probable()}") - print(f"Were we successful? {hidden_bitstring == result.most_probable()}") diff --git a/docs/sphinx/targets/python/nvqc_sample.py b/docs/sphinx/targets/python/nvqc_sample.py deleted file mode 100644 index 55c11b4f1f3..00000000000 --- a/docs/sphinx/targets/python/nvqc_sample.py +++ /dev/null @@ -1,26 +0,0 @@ -import cudaq - -# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable. -# If not, you can set the API Key environment variable in the Python script with: -# ``` -# os.environ["NVQC_API_KEY"] = ""` -# ``` -cudaq.set_target("nvqc", backend="tensornet") - -# Note: The `tensornet` simulator is capable of distributing tensor contraction operations across multiple GPUs. -# User can use the `ngpus` option to target a multi-GPU NVQC endpoint. -# For example, to use the `tensornet` simulator with 8 GPUs, we can do -# `cudaq.set_target("nvqc", backend="tensornet", ngpus=8)` -# Please refer to your NVQC dashboard for the list of available multi-GPU configurations. -num_qubits = 50 -kernel = cudaq.make_kernel() -qubits = kernel.qalloc(num_qubits) -# Place qubits in superposition state. -kernel.h(qubits[0]) -for i in range(num_qubits - 1): - kernel.cx(qubits[i], qubits[i + 1]) -# Measure. -kernel.mz(qubits) - -counts = cudaq.sample(kernel, shots_count=100) -print(counts) diff --git a/docs/sphinx/targets/python/nvqc_state.py b/docs/sphinx/targets/python/nvqc_state.py deleted file mode 100644 index eae5c423395..00000000000 --- a/docs/sphinx/targets/python/nvqc_state.py +++ /dev/null @@ -1,21 +0,0 @@ -import cudaq - -# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable. -# If not, you can set the API Key environment variable in the Python script with: -# ``` -# os.environ["NVQC_API_KEY"] = ""` -# ``` - -cudaq.set_target("nvqc") - -num_qubits = 20 -kernel = cudaq.make_kernel() -qubits = kernel.qalloc(num_qubits) -# Place qubits in GHZ state. -kernel.h(qubits[0]) -for i in range(num_qubits - 1): - kernel.cx(qubits[i], qubits[i + 1]) - -state = cudaq.get_state(kernel) -print("Amplitude(00..00) =", state[0]) -print("Amplitude(11..11) =", state[2**num_qubits - 1]) diff --git a/docs/sphinx/targets/python/nvqc_vqe.py b/docs/sphinx/targets/python/nvqc_vqe.py deleted file mode 100644 index da90150a6e6..00000000000 --- a/docs/sphinx/targets/python/nvqc_vqe.py +++ /dev/null @@ -1,59 +0,0 @@ -import cudaq -from cudaq import spin -import math - -# This example assumes the NVQC API key has been set in the `NVQC_API_KEY` environment variable. -# If not, you can set the API Key environment variable in the Python script with: -# ``` -# os.environ["NVQC_API_KEY"] = ""` -# ``` - -cudaq.set_target("nvqc", nqpus=3) - -print("Number of QPUs:", cudaq.get_target().num_qpus()) - - -# Note: depending on the user's account, there might be different -# number of NVQC worker instances available. Hence, although we're making -# concurrent job submissions across multiple QPUs, the speedup would be -# determined by the number of NVQC worker instances. -# Create the parameterized ansatz -@cudaq.kernel -def ansatz(theta: float): - qvector = cudaq.qvector(2) - x(qvector[0]) - ry(theta, qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - -# Define its spin Hamiltonian. -hamiltonian = (5.907 - 2.1433 * spin.x(0) * spin.x(1) - - 2.1433 * spin.y(0) * spin.y(1) + 0.21829 * spin.z(0) - - 6.125 * spin.z(1)) - - -def opt_gradient(parameter_vector): - # Evaluate energy and gradient on different remote QPUs - # (i.e., concurrent job submissions to NVQC) - energy_future = cudaq.observe_async(ansatz, - hamiltonian, - parameter_vector[0], - qpu_id=0) - plus_future = cudaq.observe_async(ansatz, - hamiltonian, - parameter_vector[0] + 0.5 * math.pi, - qpu_id=1) - minus_future = cudaq.observe_async(ansatz, - hamiltonian, - parameter_vector[0] - 0.5 * math.pi, - qpu_id=2) - return (energy_future.get().expectation(), [ - (plus_future.get().expectation() - minus_future.get().expectation()) / - 2.0 - ]) - - -optimizer = cudaq.optimizers.LBFGS() -optimal_value, optimal_parameters = optimizer.optimize(1, opt_gradient) -print("Ground state energy =", optimal_value) -print("Optimal parameters =", optimal_parameters) diff --git a/docs/sphinx/using/backends/cloud.rst b/docs/sphinx/using/backends/cloud.rst index 20ef1d74117..4601b1e2da2 100644 --- a/docs/sphinx/using/backends/cloud.rst +++ b/docs/sphinx/using/backends/cloud.rst @@ -7,5 +7,4 @@ CUDA-Q provides a number of options to access hardware resources (GPUs and QPUs) :maxdepth: 1 Amazon Braket (braket) - NVIDIA Quantum Cloud (nvqc) diff --git a/docs/sphinx/using/backends/cloud/nvqc.rst b/docs/sphinx/using/backends/cloud/nvqc.rst deleted file mode 100644 index ba69faef432..00000000000 --- a/docs/sphinx/using/backends/cloud/nvqc.rst +++ /dev/null @@ -1,257 +0,0 @@ -NVIDIA Quantum Cloud -+++++++++++++++++++++ - -NVIDIA Quantum Cloud (NVQC) offers universal access to the world’s most powerful computing platform, -for every quantum researcher to do their life’s work. -To learn more about NVQC visit this `link `__. - -Apply for early access `here `__. -Access to the Quantum Cloud early access program requires an NVIDIA Developer account. - -Quick Start -^^^^^^^^^^^ -Once you have been approved for an early access to NVQC, you will be able to follow these instructions to use it. - -1. Follow the instructions in your NVQC Early Access welcome email to obtain an API Key for NVQC. -You can also find the instructions `here `__ (link available only for approved users) - -2. Set the environment variable `NVQC_API_KEY` to the API Key obtained above. - - .. code-block:: console - - export NVQC_API_KEY= - -You may wish to persist that environment variable between bash sessions, e.g., by adding it to your `$HOME/.bashrc` file. - -3. Run your first NVQC example - -The following is a typical CUDA-Q kernel example. -By selecting the `nvqc` target, the quantum circuit simulation will run on NVQC in the cloud, rather than running locally. - - -.. tab:: Python - - .. literalinclude:: ../../../snippets/python/using/cudaq/nvqc/nvqc_intro.py - :language: python - :start-after: [Begin Documentation] - - .. code-block:: console - - [2024-03-14 19:26:31.438] Submitting jobs to NVQC service with 1 GPU(s). Max execution time: 3600 seconds (excluding queue wait time). - - ================ NVQC Device Info ================ - GPU Device Name: "NVIDIA H100 80GB HBM3" - CUDA Driver Version / Runtime Version: 12.2 / 12.0 - Total global memory (GB): 79.1 - Memory Clock Rate (MHz): 2619.000 - GPU Clock Rate (MHz): 1980.000 - ================================================== - { 1111111111111111111111111:486 0000000000000000000000000:514 } - -.. tab:: C++ - - .. literalinclude:: ../../../snippets/cpp/using/cudaq/nvqc/nvqc_intro.cpp - :language: cpp - :start-after: [Begin Documentation] - - The code above is saved in `nvqc_intro.cpp` and compiled with the following command, targeting the :code:`nvqc` platform - - .. code-block:: console - - nvq++ nvqc_intro.cpp -o nvqc_intro.x --target nvqc - ./nvqc_intro.x - - [2024-03-14 19:25:05.545] Submitting jobs to NVQC service with 1 GPU(s). Max execution time: 3600 seconds (excluding queue wait time). - - ================ NVQC Device Info ================ - GPU Device Name: "NVIDIA H100 80GB HBM3" - CUDA Driver Version / Runtime Version: 12.2 / 12.0 - Total global memory (GB): 79.1 - Memory Clock Rate (MHz): 2619.000 - GPU Clock Rate (MHz): 1980.000 - ================================================== - { - __global__ : { 1111111111111111111111111:487 0000000000000000000000000:513 } - result : { 1111111111111111111111111:487 0000000000000000000000000:513 } - } - - -Simulator Backend Selection -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -NVQC hosts all CUDA-Q simulator backends (see :ref:`simulators `). -You may use the NVQC `backend` (Python) or `--nvqc-backend` (C++) option to select the simulator to be used by the service. - -For example, to request the `tensornet` simulator backend, the user can do the following for C++ or Python. - -.. tab:: Python - - .. code-block:: python - - cudaq.set_target("nvqc", backend="tensornet") - -.. tab:: C++ - - .. code-block:: console - - nvq++ nvqc_sample.cpp -o nvqc_sample.x --target nvqc --nvqc-backend tensornet - - -.. note:: - - By default, the single-GPU single-precision `custatevec-fp32` simulator backend will be selected if backend information is not specified. - -Multiple GPUs -^^^^^^^^^^^^^^ - -Some CUDA-Q simulator backends are capable of multi-GPU distribution as detailed in :ref:`simulators `. -For example, the `nvidia-mgpu` backend can partition and distribute state vector simulation to multiple GPUs to simulate -a larger number of qubits, whose state vector size grows beyond the memory size of a single GPU. - -To select a specific number of GPUs on the NVQC managed service, the following `ngpus` (Python) or `--nvqc-ngpus` (C++) option can be used. - - -.. tab:: Python - - .. code-block:: python - - cudaq.set_target("nvqc", backend="nvidia-mgpu", ngpus=4) - -.. tab:: C++ - - .. code-block:: console - - nvq++ nvqc_sample.cpp -o nvqc_sample.x --target nvqc --nvqc-backend nvidia-mgpu --nvqc-ngpus 4 - - -.. note:: - - If your NVQC subscription does not contain service instances that have the specified number of GPUs, - you may encounter the following error. - - .. code-block:: console - - Unable to find NVQC deployment with 16 GPUs. - Available deployments have {1, 2, 4, 8} GPUs. - Please check your `ngpus` value (Python) or `--nvqc-ngpus` value (C++). - -.. note:: - - Not all simulator backends are capable of utilizing multiple GPUs. - When requesting a multiple-GPU service with a single-GPU simulator backend, - you might encounter the following log message: - - .. code-block:: console - - The requested backend simulator (custatevec-fp32) is not capable of using all 4 GPUs requested. - Only one GPU will be used for simulation. - Please refer to CUDA-Q documentation for a list of multi-GPU capable simulator backends. - - Consider removing the `ngpus` value (Python) or `--nvqc-ngpus` value (C++) to use the default of 1 GPU - if you don't need to use a multi-GPU backend to better utilize NVQC resources. - - Please refer to the table below for a list of backend simulator names along with its multi-GPU capability. - - .. list-table:: Simulator Backends - :widths: 20 50 10 10 - :header-rows: 1 - - * - Name - - Description - - GPU Accelerated - - Multi-GPU - * - `qpp` - - CPU-only state vector simulator - - no - - no - * - `dm` - - CPU-only density matrix simulator - - no - - no - * - `custatevec-fp32` - - Single-precision `cuStateVec` simulator - - yes - - no - * - `custatevec-fp64` - - Double-precision `cuStateVec` simulator - - yes - - no - * - `tensornet` - - Double-precision `cuTensorNet` full tensor network contraction simulator - - yes - - yes - * - `tensornet-mps` - - Double-precision `cuTensorNet` matrix-product state simulator - - yes - - no - * - `nvidia-mgpu` - - Double-precision `cuStateVec` multi-GPU simulator - - yes - - yes - - -Multiple QPUs Asynchronous Execution -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -NVQC provides scalable QPU virtualization services, whereby clients -can submit asynchronous jobs simultaneously to NVQC. These jobs are -handled by a pool of service worker instances. - -For example, in the following code snippet, using the `nqpus` (Python) or `--nvqc-nqpus` (C++) configuration option, -the user instantiates 3 virtual QPU instances to submit simulation jobs to NVQC -calculating the expectation value along with parameter-shift gradients simultaneously. - -.. tab:: Python - - .. literalinclude:: ../../../snippets/python/using/cudaq/nvqc/nvqc_mqpu.py - :language: python - :start-after: [Begin Documentation] - -.. tab:: C++ - - .. literalinclude:: ../../../snippets/cpp/using/cudaq/nvqc/nvqc_mqpu.cpp - :language: cpp - :start-after: [Begin Documentation] - - The code above is saved in `nvqc_vqe.cpp` and compiled with the following command, targeting the :code:`nvqc` platform with 3 virtual QPUs. - - .. code-block:: console - - nvq++ nvqc_vqe.cpp -o nvqc_vqe.x --target nvqc --nvqc-nqpus 3 - ./nvqc_vqe.x - - -.. note:: - - The NVQC managed-service has a pool of worker instances processing incoming requests on a - first-come-first-serve basis. Thus, the attainable speedup using multiple virtual QPUs vs. - sequential execution on a single QPU depends on the NVQC service load. For example, - if the number of free workers is greater than the number of requested virtual QPUs, a linear - (ideal) speedup could be achieved. On the other hand, if all the service workers are busy, - multi-QPU distribution may not deliver any substantial speedup. - -FAQ -^^^^^ - -1. How do I get more information about my NVQC API submission? - -The environment variable `NVQC_LOG_LEVEL` can be used to turn on and off -certain logs. There are three levels: - -- Info (`info`): basic information about NVQC is logged to the console. This is the default. - -- Off (`off` or `0`): disable all NVQC logging. - -- Trace: (`trace`): log additional information for each NVQC job execution (including timing) - -2. I want to persist my API key to a configuration file. - -You may persist your NVQC API Key to a credential configuration file in lieu of -using the `NVQC_API_KEY` environment variable. -The configuration file can be generated as follows, replacing -the `api_key` value with your NVQC API Key. - -.. code:: bash - - echo "key: " >> $HOME/.nvqc_config - diff --git a/python/runtime/cudaq/algorithms/py_optimizer.cpp b/python/runtime/cudaq/algorithms/py_optimizer.cpp index 8cbbae1e770..f83ed438d77 100644 --- a/python/runtime/cudaq/algorithms/py_optimizer.cpp +++ b/python/runtime/cudaq/algorithms/py_optimizer.cpp @@ -9,7 +9,6 @@ #include #include "common/JsonConvert.h" -#include "common/SerializedCodeExecutionContext.h" #include "cudaq/algorithms/gradients/central_difference.h" #include "cudaq/algorithms/gradients/forward_difference.h" #include "cudaq/algorithms/gradients/parameter_shift.h" @@ -20,43 +19,6 @@ namespace cudaq { -/// Form the SerializedCodeExecutionContext -static SerializedCodeExecutionContext -get_serialized_code(std::string &source_code) { - SerializedCodeExecutionContext ctx; - try { - py::object json = py::module_::import("json"); - auto var_dict = get_serializable_var_dict(); - ctx.scoped_var_dict = py::str(json.attr("dumps")(var_dict)); - ctx.source_code = source_code; - } catch (py::error_already_set &e) { - throw std::runtime_error("Failed to serialized data: " + - std::string(e.what())); - } - return ctx; -} - -static std::string -get_required_raw_source_code(const int dim, const py::function &func, - const std::string &optimizer_var_name) { - // Get source code and remove the leading whitespace - std::string source_code = get_source_code(func); - - // Form the Python call to optimizer.optimize - std::ostringstream os; - auto obj_func_name = func.attr("__name__").cast(); - os << "energy, params_at_energy = " << optimizer_var_name << ".optimize(" - << dim << ", " << obj_func_name << ")\n"; - // The _json_request_result dictionary is a special dictionary where outputs - // are saved. Must be serializable to JSON using the JSON structures. - os << "_json_request_result['executionContext']['optResult'] = [energy, " - "params_at_energy]\n"; - auto function_call = os.str(); - - // Return the combined code - return source_code + "\n" + function_call; -} - /// @brief Bind the `cudaq::optimization_result` typedef. void bindOptimizationResult(py::module &mod) { py::class_(mod, "OptimizationResult"); @@ -188,36 +150,6 @@ py::class_ addPyOptimizer(py::module &mod, std::string &&name) { .def( "optimize", [](OptimizerT &opt, const int dim, py::function &func) { - auto &platform = cudaq::get_platform(); - if (platform.get_remote_capabilities().serializedCodeExec && - platform.num_qpus() == 1) { - std::string optimizer_var_name = - cudaq::get_var_name_for_handle(py::cast(&opt)); - if (optimizer_var_name.empty()) - throw std::runtime_error( - "Unable to find desired optimizer object in any " - "namespace. Aborting."); - - auto ctx = std::make_unique("sample", 0); - platform.set_exec_ctx(ctx.get()); - - std::string combined_code = - get_required_raw_source_code(dim, func, optimizer_var_name); - - SerializedCodeExecutionContext serialized_code_execution_object = - get_serialized_code(combined_code); - - platform.launchSerializedCodeExecution( - func.attr("__name__").cast(), - serialized_code_execution_object); - - platform.reset_exec_ctx(); - auto result = cudaq::optimization_result{}; - if (ctx->optResult) - result = std::move(*ctx->optResult); - return result; - } - return opt.optimize(dim, [&](std::vector x, std::vector &grad) { // Call the function. diff --git a/python/runtime/cudaq/algorithms/py_state.cpp b/python/runtime/cudaq/algorithms/py_state.cpp index 966a4fa3c1c..b89de2f2f52 100644 --- a/python/runtime/cudaq/algorithms/py_state.cpp +++ b/python/runtime/cudaq/algorithms/py_state.cpp @@ -783,8 +783,7 @@ index pair. mod.def( "get_state", [&](py::object kernel, py::args args) { - if (holder.getTarget().name == "remote-mqpu" || - holder.getTarget().name == "nvqc") + if (holder.getTarget().name == "remote-mqpu") return pyGetStateRemote(kernel, args); if (holder.getTarget().name == "orca-photonics") return pyGetStateLibraryMode(kernel, args); diff --git a/python/runtime/cudaq/algorithms/py_utils.h b/python/runtime/cudaq/algorithms/py_utils.h index 227f615a9d1..070a2922ae5 100644 --- a/python/runtime/cudaq/algorithms/py_utils.h +++ b/python/runtime/cudaq/algorithms/py_utils.h @@ -16,13 +16,6 @@ namespace py = pybind11; namespace cudaq { -/// @brief Get a JSON-encoded dictionary of a combination of all local -/// and global variables that are JSON compatible -py::dict get_serializable_var_dict(); - -/// @brief Fetch the Python source code from a `py::function` -std::string get_source_code(const py::function &func); - /// @brief Find the variable name for a given Python object handle. It searches /// locally first, walks up the call stack, and finally checks the global /// namespace. If not found, it returns an empty string. diff --git a/python/runtime/cudaq/algorithms/py_vqe.cpp b/python/runtime/cudaq/algorithms/py_vqe.cpp index cf793046d14..d030ffd4a3b 100644 --- a/python/runtime/cudaq/algorithms/py_vqe.cpp +++ b/python/runtime/cudaq/algorithms/py_vqe.cpp @@ -10,12 +10,9 @@ #include #include "common/ArgumentWrapper.h" -#include "common/JsonConvert.h" -#include "common/SerializedCodeExecutionContext.h" #include "cudaq/Optimizer/Dialect/CC/CCTypes.h" #include "cudaq/algorithms/gradient.h" #include "cudaq/algorithms/optimizer.h" -#include "py_utils.h" #include "py_vqe.h" #include "runtime/cudaq/platform/py_alt_launch_kernel.h" #include "utils/OpaqueArguments.h" @@ -173,95 +170,6 @@ pyVQE_remote_cpp(cudaq::quantum_platform &platform, py::object &kernel, return ctx->optResult.value_or(optimization_result{}); } -/// @brief Perform VQE on a remote platform. This function is used for many of -/// the pyVQE variants below, so some of the parameters may be nullptr. -static optimization_result -pyVQE_remote(cudaq::quantum_platform &platform, py::object &kernel, - spin_op &hamiltonian, cudaq::optimizer &optimizer, - cudaq::gradient *gradient, py::function *argumentMapper, - const int n_params, const int shots) { - py::object json = py::module_::import("json"); - py::object inspect = py::module_::import("inspect"); - - // Form scoped_vars_str. This is needed for a) capturing user variables when - // an argumentMapper is provided, and b) automatically capturing all nested - // cudaq.kernels. - py::dict scoped_vars = get_serializable_var_dict(); - -// This macro loads a JSON-like object into scoped_vars[] as -// scoped_vars["__varname"] = varname. This roughly corresponds to the -// following Python code: -// scoped_vars["__varname/module.name"] = json.loads(varname.to_json()) -#define LOAD_VAR(VAR_NAME) \ - do { \ - py::object val = py::cast(VAR_NAME); \ - scoped_vars[py::str( \ - std::string("__" #VAR_NAME "/") + \ - val.get_type().attr("__module__").cast() + "." + \ - val.get_type().attr("__name__").cast())] = \ - json.attr("loads")(val.attr("to_json")()); \ - } while (0) -#define LOAD_VAR_NO_CAST(VAR_NAME) \ - do { \ - scoped_vars[py::str( \ - std::string("__" #VAR_NAME "/") + \ - VAR_NAME.get_type().attr("__module__").cast() + "." + \ - VAR_NAME.get_type().attr("__name__").cast())] = \ - json.attr("loads")(VAR_NAME.attr("to_json")()); \ - } while (0) - - auto spin = cudaq::spin_op::canonicalize(hamiltonian); - LOAD_VAR(spin); - LOAD_VAR(optimizer); - LOAD_VAR_NO_CAST(kernel); - if (gradient) - LOAD_VAR(gradient); - - // Get a string representation of the scoped_vars dictionary. This is - // guaranteed to be a JSON-friendly dictionary, so the conversion should occur - // cleanly. - auto scoped_vars_str = json.attr("dumps")(scoped_vars).cast(); - - // Form SerializedCodeExecutionContext.source_code - std::ostringstream os; - if (argumentMapper) { - std::string source_code = cudaq::get_source_code(*argumentMapper); - // If it is a lambda function and it is used inline with a function call, it - // can sometimes include the trailing comma. Remove that here. - auto end = source_code.find_last_not_of(", \t\r\n"); - if (end != std::string::npos) - source_code.erase(end + 1); - os << "__arg_mapper = " << source_code << '\n'; - } - os << "energy, params_at_energy = cudaq.vqe("; - os << "kernel=__kernel, "; - if (gradient) - os << "gradient_strategy=__gradient, "; - os << "spin_operator=__spin, "; - os << "optimizer=__optimizer, "; - os << "parameter_count=" << n_params << ", "; - if (argumentMapper) - os << "argument_mapper=__arg_mapper, "; - os << "shots=" << shots << ")\n"; - os << "_json_request_result['executionContext']['optResult'] = [energy, " - "params_at_energy]\n"; - auto function_call = os.str(); - - SerializedCodeExecutionContext scCtx; - scCtx.scoped_var_dict = std::move(scoped_vars_str); - scCtx.source_code = std::move(function_call); - - auto ctx = std::make_unique("sample", 0); - platform.set_exec_ctx(ctx.get()); - platform.launchSerializedCodeExecution( - kernel.attr("name").cast(), scCtx); - platform.reset_exec_ctx(); - auto result = cudaq::optimization_result{}; - if (ctx->optResult) - result = std::move(*ctx->optResult); - return result; -} - /// @brief Throw an exception instructing the user how to achieve optimal /// performance static void throwPerformanceError() { @@ -284,10 +192,6 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian, n_params, shots); throwPerformanceError(); } - if (platform.get_remote_capabilities().serializedCodeExec) - return pyVQE_remote(platform, kernel, hamiltonian, optimizer, - /*gradient=*/nullptr, /*argumentMapper=*/nullptr, - n_params, shots); return optimizer.optimize(n_params, [&](const std::vector &x, std::vector &grad_vec) { py::args params = py::make_tuple(x); @@ -310,9 +214,6 @@ optimization_result pyVQE(py::object &kernel, spin_op &hamiltonian, shots); throwPerformanceError(); } - if (platform.get_remote_capabilities().serializedCodeExec) - return pyVQE_remote(platform, kernel, hamiltonian, optimizer, - /*gradient=*/nullptr, &argumentMapper, n_params, shots); return optimizer.optimize(n_params, [&](const std::vector &x, std::vector &grad_vec) { py::args params; @@ -343,9 +244,6 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient, /*argumentMapper=*/nullptr, n_params, shots); throwPerformanceError(); } - if (platform.get_remote_capabilities().serializedCodeExec) - return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient, - /*argumentMapper=*/nullptr, n_params, shots); std::function)> get_expected_value = [&](std::vector x) { py::args params = py::make_tuple(x); @@ -381,9 +279,6 @@ optimization_result pyVQE(py::object &kernel, cudaq::gradient &gradient, &gradient, &argumentMapper, n_params, shots); throwPerformanceError(); } - if (platform.get_remote_capabilities().serializedCodeExec) - return pyVQE_remote(platform, kernel, hamiltonian, optimizer, &gradient, - &argumentMapper, n_params, shots); std::function)> get_expected_value = [&](std::vector x) { py::args params; diff --git a/python/runtime/utils/PyRemoteSimulatorQPU.cpp b/python/runtime/utils/PyRemoteSimulatorQPU.cpp index c0008c9f51f..bbcaee66b48 100644 --- a/python/runtime/utils/PyRemoteSimulatorQPU.cpp +++ b/python/runtime/utils/PyRemoteSimulatorQPU.cpp @@ -15,7 +15,7 @@ using namespace mlir; namespace { // This is a helper function to help reduce duplicated code across -// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU. +// PyRemoteSimulatorQPU. static void launchVqeImpl(cudaq::ExecutionContext *executionContextPtr, std::unique_ptr &m_client, const std::string &m_simName, const std::string &name, @@ -37,15 +37,15 @@ static void launchVqeImpl(cudaq::ExecutionContext *executionContextPtr, std::string errorMsg; const bool requestOkay = m_client->sendRequest( - *mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr, - gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr, - wrapper->rawArgs, /*argSize=*/0, &errorMsg); + *mlirContext, *executionContextPtr, gradient, &optimizer, n_params, + m_simName, name, /*kernelFunc=*/nullptr, wrapper->rawArgs, /*argSize=*/0, + &errorMsg); if (!requestOkay) throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg); } // This is a helper function to help reduce duplicated code across -// PyRemoteSimulatorQPU and PyNvcfSimulatorQPU. +// PyRemoteSimulatorQPU. static void launchKernelImpl(cudaq::ExecutionContext *executionContextPtr, std::unique_ptr &m_client, @@ -68,7 +68,7 @@ launchKernelImpl(cudaq::ExecutionContext *executionContextPtr, executionContextPtr ? *executionContextPtr : defaultContext; std::string errorMsg; const bool requestOkay = m_client->sendRequest( - *mlirContext, executionContext, /*serializedCodeContext=*/nullptr, + *mlirContext, executionContext, /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0, m_simName, name, kernelFunc, wrapper->rawArgs, voidStarSize, &errorMsg); if (!requestOkay) @@ -103,7 +103,7 @@ static void launchKernelStreamlineImpl( actualArgs.erase(actualArgs.begin()); const bool requestOkay = m_client->sendRequest( - *mlirContext, executionContext, /*serializedCodeContext=*/nullptr, + *mlirContext, executionContext, /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0, m_simName, name, nullptr, nullptr, 0, &errorMsg, &actualArgs); if (!requestOkay) @@ -159,57 +159,6 @@ class PyRemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU { virtual ~PyRemoteSimulatorQPU() = default; }; -/// Implementation of QPU subtype that submits simulation request to NVCF. -/// NOTE: This class duplicates the `isEmulated` and `launchKernel` methods from -/// `PyRemoteSimulatorQPU` class above; tried using multiple inheritance, but, -/// got errors from the functionality to register type. -class PyNvcfSimulatorQPU : public cudaq::BaseNvcfSimulatorQPU { -public: - PyNvcfSimulatorQPU() : BaseNvcfSimulatorQPU() {} - - virtual bool isEmulated() override { return true; } - - void launchVQE(const std::string &name, const void *kernelArgs, - cudaq::gradient *gradient, const cudaq::spin_op &H, - cudaq::optimizer &optimizer, const int n_params, - const std::size_t shots) override { - CUDAQ_INFO("PyNvcfSimulatorQPU: Launch VQE kernel named '{}' remote QPU {} " - "(simulator = {})", - name, qpu_id, m_simName); - ::launchVqeImpl(getExecutionContextForMyThread(), m_client, m_simName, name, - kernelArgs, gradient, H, optimizer, n_params, shots); - } - - cudaq::KernelThunkResultType - launchKernel(const std::string &name, cudaq::KernelThunkType kernelFunc, - void *args, std::uint64_t voidStarSize, - std::uint64_t resultOffset, - const std::vector &rawArgs) override { - CUDAQ_INFO("PyNvcfSimulatorQPU: Launch kernel named '{}' remote QPU {} " - "(simulator = {})", - name, qpu_id, m_simName); - ::launchKernelImpl(getExecutionContextForMyThread(), m_client, m_simName, - name, make_degenerate_kernel_type(kernelFunc), args, - voidStarSize, resultOffset, rawArgs); - // TODO: Python should probably support return values too. - return {}; - } - - void launchKernel(const std::string &name, - const std::vector &rawArgs) override { - CUDAQ_INFO("PyNvcfSimulatorQPU: Streamline launch kernel named '{}' " - "remote QPU {} " - "(simulator = {})", - name, qpu_id, m_simName); - ::launchKernelStreamlineImpl(getExecutionContextForMyThread(), m_client, - m_simName, name, rawArgs); - } - - PyNvcfSimulatorQPU(PyNvcfSimulatorQPU &&) = delete; - virtual ~PyNvcfSimulatorQPU() = default; -}; - } // namespace CUDAQ_REGISTER_TYPE(cudaq::QPU, PyRemoteSimulatorQPU, RemoteSimulatorQPU) -CUDAQ_REGISTER_TYPE(cudaq::QPU, PyNvcfSimulatorQPU, NvcfSimulatorQPU) diff --git a/python/runtime/utils/PyRestRemoteClient.cpp b/python/runtime/utils/PyRestRemoteClient.cpp index 852daf4f6c0..5dd5988feb2 100644 --- a/python/runtime/utils/PyRestRemoteClient.cpp +++ b/python/runtime/utils/PyRestRemoteClient.cpp @@ -17,15 +17,6 @@ class PyRestRemoteClient : public cudaq::BaseRemoteRestRuntimeClient { PyRestRemoteClient() : BaseRemoteRestRuntimeClient() {} }; -/// Implementation of QPU subtype that submits simulation request to NVCF. -/// REST client submitting jobs to NVCF-hosted `cudaq-qpud` service. -class PyNvcfRuntimeClient : public cudaq::BaseNvcfRuntimeClient { -public: - /// @brief The constructor - PyNvcfRuntimeClient() : BaseNvcfRuntimeClient() {} -}; - } // namespace CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, PyRestRemoteClient, rest) -CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, PyNvcfRuntimeClient, NVCF) diff --git a/python/tests/remote/test_remote_code_exec.py b/python/tests/remote/test_remote_code_exec.py deleted file mode 100644 index 8ff617f15e4..00000000000 --- a/python/tests/remote/test_remote_code_exec.py +++ /dev/null @@ -1,444 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # -import pytest -import os -import sys -import subprocess -import time -import numpy as np - -import cudaq -from cudaq import spin -import numpy as np - -try: - import requests - import psutil - have_requests = True -except ImportError: - have_requests = False - -skipIfModulesNotInstalled = pytest.mark.skipif( - not have_requests, - reason="please install requests and/or psutil for these tests") - - -def assert_close(want, got, tolerance=1.e-5) -> bool: - return abs(want - got) < tolerance - - -def kill_proc_and_child_processes(parent_proc: subprocess.Popen): - try: - parent = psutil.Process(parent_proc.pid) - except psutil.NoSuchProcess: - return - - # Try to kill the children processes, giving them 3 seconds for a graceful - # exit, and then a forceful kill after that. - children = parent.children(recursive=True) - for child in children: - try: - child.terminate() - except psutil.NoSuchProcess: - continue - - _, still_alive = psutil.wait_procs(children, timeout=3) - - for child in still_alive: - try: - child.kill() - except psutil.NoSuchProcess: - continue - - # Now kill the parent process - parent.terminate() - _, still_alive = psutil.wait_procs([parent], timeout=3) - for p in still_alive: - try: - p.kill() - except psutil.NoSuchProcess: - continue - - -def wait_until_port_active(port: int) -> bool: - port_up = False - retries = 0 - port_url = 'http://localhost:' + str(port) - while (not port_up): - try: - ping_response = requests.get(port_url) - port_up = (ping_response.status_code == 200) - except: - port_up = False - if not port_up: - retries += 1 - if retries > 100: - print("EXIT: TOO MANY RETRIES!") - return False - time.sleep(0.1) - return True - - -@pytest.fixture(scope="session", autouse=True) -def startUpMockServer(): - os.environ['CUDAQ_SER_CODE_EXEC'] = '1' - cudaq_qpud = os.path.dirname(cudaq.__file__) + "/../bin/cudaq-qpud.py" - nvqc_proxy = os.path.dirname(cudaq.__file__) + "/../bin/nvqc_proxy.py" - p1 = subprocess.Popen([sys.executable, nvqc_proxy]) - p2 = subprocess.Popen([sys.executable, cudaq_qpud, '--port', '3031']) - cudaq.set_target("remote-mqpu", url="localhost:3030") - proxy_up = wait_until_port_active(3030) - qpud_up = wait_until_port_active(3031) - - # Shutdown servers if either one fails to come up. The tests will fail - # downstream. - if not proxy_up or not qpud_up: - kill_proc_and_child_processes(p1) - kill_proc_and_child_processes(p2) - - yield - cudaq.reset_target() - kill_proc_and_child_processes(p1) - kill_proc_and_child_processes(p2) - - -@pytest.fixture(autouse=True) -def do_something(): - yield - cudaq.__clearKernelRegistries() - - -@skipIfModulesNotInstalled -def test_setup(): - target = cudaq.get_target() - numQpus = target.num_qpus() - assert numQpus == 1 - - -@skipIfModulesNotInstalled -def test_optimizer(): - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - # Verify that variables can be captured by kernels - testVar = 0 - - @cudaq.kernel - def kernel(angles: list[float]): - qvector = cudaq.qvector(2) - x(qvector[0]) - ry(angles[0] + testVar, qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - optimizer = cudaq.optimizers.Adam() - gradient = cudaq.gradients.CentralDifference() - - def objective_function(parameter_vector: list[float], - hamiltonian=hamiltonian, - gradient_strategy=gradient, - kernel=kernel) -> tuple[float, list[float]]: - get_result = lambda parameter_vector: cudaq.observe( - kernel, hamiltonian, parameter_vector).expectation() - cost = get_result(parameter_vector) - gradient_vector = gradient_strategy.compute(parameter_vector, - get_result, cost) - return cost, gradient_vector - - energy, parameter = optimizer.optimize(dimensions=1, - function=objective_function) - print(f"\nminimized = {round(energy,16)}") - print(f"optimal theta = {round(parameter[0],16)}") - assert assert_close(energy, -1.7483830311526454, 1e-3) - assert assert_close(parameter[0], 0.5840908448487905, 1e-3) - - -@skipIfModulesNotInstalled -def test_optimizer_nested_kernels(): - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - @cudaq.kernel - def kernelA(qvector: cudaq.qview): - x(qvector[0]) - - @cudaq.kernel - def kernelB(angles: list[float]): - qvector = cudaq.qvector(2) - # This x() is done in a nested kernel - # x(qvector[0]) - kernelA(qvector) - ry(angles[0], qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - optimizer = cudaq.optimizers.Adam() - gradient = cudaq.gradients.CentralDifference() - - def nested_obj_func(): - print('Calling nested_obj_func') - - def objective_function(parameter_vector: list[float], - hamiltonian=hamiltonian, - gradient_strategy=gradient, - kernel=kernelB) -> tuple[float, list[float]]: - nested_obj_func() # gratuitous call to a nested function (for testing) - - def another_nested_obj_func(): - print('I am in another_nested_obj_func') - - another_nested_obj_func() - get_result = lambda parameter_vector: cudaq.observe( - kernel, hamiltonian, parameter_vector).expectation() - cost = get_result(parameter_vector) - gradient_vector = gradient_strategy.compute(parameter_vector, - get_result, cost) - return cost, gradient_vector - - energy, parameter = optimizer.optimize(dimensions=1, - function=objective_function) - print(f"\nminimized = {round(energy,16)}") - print(f"optimal theta = {round(parameter[0],16)}") - assert assert_close(energy, -1.7483830311526454, 1e-3) - assert assert_close(parameter[0], 0.5840908448487905, 1e-3) - - -@skipIfModulesNotInstalled -@pytest.mark.parametrize( - "optimizer", [cudaq.optimizers.COBYLA(), - cudaq.optimizers.NelderMead()]) -def test_simple_vqe(optimizer): - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - @cudaq.kernel - def kernel(angles: list[float]): - qvector = cudaq.qvector(2) - x(qvector[0]) - ry(angles[0], qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - energy, parameter = cudaq.vqe(kernel=kernel, - spin_operator=hamiltonian, - optimizer=optimizer, - parameter_count=1) - - print(f"\nminimized = {round(energy,16)}") - print(f"optimal theta = {round(parameter[0],16)}") - want_expectation_value = -1.7487948611472093 - want_optimal_parameters = [0.59] - assert assert_close(want_expectation_value, energy, tolerance=1e-2) - assert all( - assert_close(want_parameter, got_parameter, tolerance=1e-2) - for want_parameter, got_parameter in zip(want_optimal_parameters, - parameter)) - - -@skipIfModulesNotInstalled -@pytest.mark.parametrize( - "optimizer", [cudaq.optimizers.COBYLA(), - cudaq.optimizers.NelderMead()]) -def test_simple_vqe_nested_kernels(optimizer): - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - @cudaq.kernel - def kernelA(qvector: cudaq.qview): - x(qvector[0]) - - @cudaq.kernel - def kernelB(angles: list[float]): - qvector = cudaq.qvector(2) - # This x() is done in a nested kernel - # x(qvector[0]) - kernelA(qvector) - ry(angles[0], qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - energy, parameter = cudaq.vqe(kernel=kernelB, - spin_operator=hamiltonian, - optimizer=optimizer, - parameter_count=1) - - print(f"\nminimized = {round(energy,16)}") - print(f"optimal theta = {round(parameter[0],16)}") - want_expectation_value = -1.7487948611472093 - want_optimal_parameters = [0.59] - assert assert_close(want_expectation_value, energy, tolerance=1e-2) - assert all( - assert_close(want_parameter, got_parameter, tolerance=1e-2) - for want_parameter, got_parameter in zip(want_optimal_parameters, - parameter)) - - -@skipIfModulesNotInstalled -def test_complex_vqe_inline_lambda(): - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - @cudaq.kernel - def kernel(angles: list[float], num_qubits: int): - qvector = cudaq.qvector(num_qubits) - x(qvector[0]) - ry(angles[0], qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - optimizer = cudaq.optimizers.Adam() - grad = cudaq.gradients.CentralDifference() - - num_qubits = 2 - energy, parameter = cudaq.vqe(kernel=kernel, - gradient_strategy=grad, - spin_operator=hamiltonian, - optimizer=optimizer, - argument_mapper=lambda x: (x, num_qubits), - parameter_count=1) - - print(f"\nminimized = {round(energy,16)}") - print(f"optimal theta = {round(parameter[0],16)}") - assert assert_close(energy, -1.7488648395275948, 1e-3) - assert assert_close(parameter[0], 0.5840908448487905, 1e-3) - - -@skipIfModulesNotInstalled -def test_vqe_perf_warning(): - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - @cudaq.kernel - def kernel(num_qubits: int, angles: list[float]): - qvector = cudaq.qvector(num_qubits) - x(qvector[0]) - ry(angles[0], qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - optimizer = cudaq.optimizers.Adam() - grad = cudaq.gradients.CentralDifference() - - num_qubits = 2 - with pytest.raises(RuntimeError) as error: - energy, parameter = cudaq.vqe(kernel=kernel, - gradient_strategy=grad, - spin_operator=hamiltonian, - optimizer=optimizer, - argument_mapper=lambda x: (num_qubits, x), - parameter_count=1) - - -# This is a helper function used by parameterized tests below. -@skipIfModulesNotInstalled -@pytest.mark.skip -def test_complex_vqe_named_lambda(optimizer, gradient): - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - @cudaq.kernel - def kernel(angles: list[float], num_qubits: int): - qvector = cudaq.qvector(num_qubits) - x(qvector[0]) - ry(angles[0], qvector[1]) - x.ctrl(qvector[1], qvector[0]) - - num_qubits = 2 - arg_mapper = lambda x: (x, num_qubits) - energy, parameter = cudaq.vqe(kernel=kernel, - gradient_strategy=gradient, - spin_operator=hamiltonian, - optimizer=optimizer, - argument_mapper=arg_mapper, - parameter_count=1) - - print(f"\nminimized = {round(energy,16)}") - print(f"optimal theta = {round(parameter[0],16)}") - want_expectation_value = -1.7487948611472093 - want_optimal_parameters = [0.59] - assert assert_close(want_expectation_value, energy, tolerance=1e-2) - assert all( - assert_close(want_parameter, got_parameter, tolerance=1e-2) - for want_parameter, got_parameter in zip(want_optimal_parameters, - parameter)) - - -@skipIfModulesNotInstalled -@pytest.mark.parametrize("optimizer", [ - cudaq.optimizers.LBFGS(), - cudaq.optimizers.Adam(), - cudaq.optimizers.GradientDescent(), - cudaq.optimizers.SGD(), -]) -def test_complex_vqe_named_lambda_sweep_opt(optimizer): - test_complex_vqe_named_lambda(optimizer, - cudaq.gradients.CentralDifference()) - - -@skipIfModulesNotInstalled -@pytest.mark.parametrize("gradient", [ - cudaq.gradients.CentralDifference(), - cudaq.gradients.ParameterShift(), - cudaq.gradients.ForwardDifference() -]) -def test_complex_vqe_named_lambda_sweep_grad(gradient): - test_complex_vqe_named_lambda(cudaq.optimizers.Adam(), gradient) - - -@skipIfModulesNotInstalled -def test_state_preparation(): - - @cudaq.kernel - def kernel(vec: list[complex]): - qubits = cudaq.qvector(vec) - - state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] - counts = cudaq.sample(kernel, state) - assert '00' in counts - assert '10' in counts - assert not '01' in counts - assert not '11' in counts - - -@skipIfModulesNotInstalled -def test_state_preparation_builder(): - kernel, state = cudaq.make_kernel(list[complex]) - qubits = kernel.qalloc(state) - - state = [1. / np.sqrt(2.), 1. / np.sqrt(2.), 0., 0.] - counts = cudaq.sample(kernel, state) - assert '00' in counts - assert '10' in counts - assert not '01' in counts - assert not '11' in counts - - -@skipIfModulesNotInstalled -@pytest.mark.skip(reason="https://github.com/NVIDIA/cuda-quantum/issues/1924") -def test_arbitrary_unitary_synthesis(): - cudaq.register_operation("custom_h", - 1. / np.sqrt(2.) * np.array([1, 1, 1, -1])) - cudaq.register_operation("custom_x", np.array([0, 1, 1, 0])) - - @cudaq.kernel - def bell(angles: list[float]): - qubits = cudaq.qvector(2) - custom_h(qubits[0]) - custom_x.ctrl(qubits[0], qubits[1]) - ry(angles[0], qubits[1]) - - hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y( - 0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1) - - optimizer = cudaq.optimizers.Adam() - energy, parameter = cudaq.vqe(kernel=bell, - spin_operator=hamiltonian, - optimizer=optimizer, - parameter_count=1) - print(f"\nminimized = {round(energy,16)}") - print(f"optimal theta = {round(parameter[0],16)}") - - -# leave for gdb debugging -if __name__ == "__main__": - loc = os.path.abspath(__file__) - pytest.main([loc, "-rP"]) diff --git a/runtime/common/BaseRemoteSimulatorQPU.h b/runtime/common/BaseRemoteSimulatorQPU.h index 090d5a00f86..ce9a9fd9cf4 100644 --- a/runtime/common/BaseRemoteSimulatorQPU.h +++ b/runtime/common/BaseRemoteSimulatorQPU.h @@ -13,7 +13,6 @@ #include "common/RemoteKernelExecutor.h" #include "common/Resources.h" #include "common/RuntimeMLIR.h" -#include "common/SerializedCodeExecutionContext.h" #include "cudaq.h" #include "cudaq/Optimizer/Builder/Runtime.h" #include "cudaq/Optimizer/Transforms/Passes.h" @@ -105,9 +104,9 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU { std::string errorMsg; const bool requestOkay = m_client->sendRequest( - *m_mlirContext, *executionContextPtr, /*serializedCodeContext=*/nullptr, - gradient, &optimizer, n_params, m_simName, name, /*kernelFunc=*/nullptr, - kernelArgs, /*argSize=*/0, &errorMsg); + *m_mlirContext, *executionContextPtr, gradient, &optimizer, n_params, + m_simName, name, /*kernelFunc=*/nullptr, kernelArgs, /*argSize=*/0, + &errorMsg); if (!requestOkay) throw std::runtime_error("Failed to launch VQE. Error: " + errorMsg); } @@ -188,7 +187,7 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU { std::string errorMsg; const bool requestOkay = m_client->sendRequest( - *m_mlirContext, executionContext, /*serializedCodeContext=*/nullptr, + *m_mlirContext, executionContext, /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0, m_simName, name, make_degenerate_kernel_type(kernelFunc), args, voidStarSize, &errorMsg, rawArgs); @@ -204,8 +203,7 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU { " bytes overflows the argument buffer."); // Currently, we only support result buffer serialization on LittleEndian // CPUs (x86, ARM, PPC64LE). - // Note: NVQC service will always be using LE. If - // the client (e.g., compiled from source) is built for big-endian, we + // If the client (e.g., compiled from source) is built for big-endian, we // will throw an error if result buffer data is returned. if (llvm::sys::IsBigEndianHost) throw std::runtime_error( @@ -223,40 +221,6 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU { return {}; } - void - launchSerializedCodeExecution(const std::string &name, - cudaq::SerializedCodeExecutionContext - &serializeCodeExecutionObject) override { - CUDAQ_INFO( - "BaseRemoteSimulatorQPU: Launch remote code named '{}' remote QPU {} " - "(simulator = {})", - name, qpu_id, m_simName); - - cudaq::ExecutionContext *executionContextPtr = - getExecutionContextForMyThread(); - - if (executionContextPtr && executionContextPtr->name == "tracer") { - return; - } - - // Default context for a 'fire-and-ignore' kernel launch; i.e., no context - // was set before launching the kernel. Use a static variable per thread to - // set up a single-shot execution context for this case. - static thread_local cudaq::ExecutionContext defaultContext("sample", - /*shots=*/1); - cudaq::ExecutionContext &executionContext = - executionContextPtr ? *executionContextPtr : defaultContext; - - std::string errorMsg; - const bool requestOkay = m_client->sendRequest( - *m_mlirContext, executionContext, &serializeCodeExecutionObject, - /*vqe_gradient=*/nullptr, /*vqe_optimizer=*/nullptr, /*vqe_n_params=*/0, - m_simName, name, /*kernelFunc=*/nullptr, /*args=*/nullptr, - /*voidStarSize=*/0, &errorMsg); - if (!requestOkay) - throw std::runtime_error("Failed to launch kernel. Error: " + errorMsg); - } - void setExecutionContext(cudaq::ExecutionContext *context) override { CUDAQ_INFO("BaseRemoteSimulatorQPU::setExecutionContext QPU {}", qpu_id); std::scoped_lock lock(m_contextMutex); @@ -274,127 +238,4 @@ class BaseRemoteSimulatorQPU : public cudaq::QPU { } }; -/// Implementation of base QPU subtype that submits simulation request to -/// NVCF. -class BaseNvcfSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU { -public: - BaseNvcfSimulatorQPU() : BaseRemoteSimulatorQPU() { - m_client = cudaq::registry::get("NVCF"); - } - - // Encapsulates Nvcf configurations that we need. - // Empty strings mean no config available. - struct NvcfConfig { - std::string apiKey; - std::string functionId; - std::string versionId; - }; - - virtual void setTargetBackend(const std::string &backend) override { - auto parts = cudaq::split(backend, ';'); - if (parts.size() % 2 != 0) - throw std::invalid_argument("Unexpected backend configuration string. " - "Expecting a ';'-separated key-value pairs."); - std::string apiKey, functionId, versionId, ngpus; - - for (std::size_t i = 0; i < parts.size(); i += 2) { - if (parts[i] == "simulator") - m_simName = parts[i + 1]; - // First, check if api key or function Id is provided as target options. - if (parts[i] == "function_id") - functionId = parts[i + 1]; - if (parts[i] == "api_key") - apiKey = parts[i + 1]; - if (parts[i] == "version_id") - versionId = parts[i + 1]; - if (parts[i] == "ngpus") - ngpus = parts[i + 1]; - } - // If none provided, look for them in environment variables or the config - // file. - const auto config = searchNvcfConfig(); - if (apiKey.empty()) - apiKey = config.apiKey; - if (functionId.empty()) - functionId = config.functionId; - if (versionId.empty()) - versionId = config.versionId; - - // API key and function Id are required. - if (apiKey.empty()) - throw std::runtime_error( - "Cannot find NVQC API key. Please refer to the documentation for " - "information about obtaining and using your NVQC API key."); - - if (!apiKey.starts_with("nvapi-")) - std::runtime_error( - "An invalid NVQC API key is provided. Please check your settings."); - std::unordered_map clientConfigs{ - {"api-key", apiKey}}; - if (!functionId.empty()) - clientConfigs.emplace("function-id", functionId); - if (!versionId.empty()) - clientConfigs.emplace("version-id", versionId); - if (!ngpus.empty()) - clientConfigs.emplace("ngpus", ngpus); - - m_client->setConfig(clientConfigs); - } - - // The NVCF version of this function needs to dynamically fetch the remote - // capabilities from the currently deployed servers. - virtual RemoteCapabilities getRemoteCapabilities() const override { - return m_client->getRemoteCapabilities(); - } - -protected: - // Helper to search NVQC config from environment variable or config file. - NvcfConfig searchNvcfConfig() { - NvcfConfig config; - // Search from environment variable - if (auto apiKey = std::getenv("NVQC_API_KEY")) - config.apiKey = std::string(apiKey); - - if (auto funcIdEnv = std::getenv("NVQC_FUNCTION_ID")) - config.functionId = std::string(funcIdEnv); - - if (auto versionIdEnv = std::getenv("NVQC_FUNCTION_VERSION_ID")) - config.versionId = std::string(versionIdEnv); - - std::string nvqcConfig; - // Allow someone to tweak this with an environment variable - if (auto creds = std::getenv("CUDAQ_NVQC_CREDENTIALS")) - nvqcConfig = std::string(creds); - else - nvqcConfig = std::string(getenv("HOME")) + std::string("/.nvqc_config"); - if (cudaq::fileExists(nvqcConfig)) { - std::ifstream stream(nvqcConfig); - std::string contents((std::istreambuf_iterator(stream)), - std::istreambuf_iterator()); - std::vector lines; - lines = cudaq::split(contents, '\n'); - for (const std::string &l : lines) { - std::vector keyAndValue = cudaq::split(l, ':'); - if (keyAndValue.size() != 2) - throw std::runtime_error("Ill-formed configuration file (" + - nvqcConfig + - "). Key-value pairs must be in ` : " - "` format. (One per line)"); - cudaq::trim(keyAndValue[0]); - cudaq::trim(keyAndValue[1]); - if (config.apiKey.empty() && - (keyAndValue[0] == "key" || keyAndValue[0] == "apikey")) - config.apiKey = keyAndValue[1]; - if (config.functionId.empty() && (keyAndValue[0] == "function-id" || - keyAndValue[0] == "Function ID")) - config.functionId = keyAndValue[1]; - if (config.versionId.empty() && - (keyAndValue[0] == "version-id" || keyAndValue[0] == "Version ID")) - config.versionId = keyAndValue[1]; - } - } - return config; - } -}; - } // namespace cudaq diff --git a/runtime/common/BaseRestRemoteClient.h b/runtime/common/BaseRestRemoteClient.h index 70d483f665f..54821313251 100644 --- a/runtime/common/BaseRestRemoteClient.h +++ b/runtime/common/BaseRestRemoteClient.h @@ -12,7 +12,6 @@ #include "common/Environment.h" #include "common/JsonConvert.h" #include "common/Logger.h" -#include "common/NvqcConfig.h" #include "common/RemoteKernelExecutor.h" #include "common/RestClient.h" #include "common/RuntimeMLIR.h" @@ -319,14 +318,11 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient { cudaq::RestRequest constructJobRequest( mlir::MLIRContext &mlirContext, cudaq::ExecutionContext &io_context, - cudaq::SerializedCodeExecutionContext *serializedCodeContext, const std::string &backendSimName, const std::string &kernelName, void (*kernelFunc)(void *), const void *kernelArgs, std::uint64_t argsSize, const std::vector *rawArgs) { cudaq::RestRequest request(io_context, version()); - if (serializedCodeContext) - request.serializedCodeExecutionContext = *serializedCodeContext; request.entryPoint = kernelName; request.passes = serverPasses; request.format = cudaq::CodeFormat::MLIR; @@ -362,7 +358,7 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient { request.entryPoint = stateIrPayload1.entryPoint; // Second kernel of the overlap calculation request.overlapKernel = stateIrPayload2; - } else if (serializedCodeContext == nullptr) { + } else { request.code = constructKernelPayload(mlirContext, kernelName, kernelArgs, argsSize, /*startingArgIdx=*/0, rawArgs); @@ -389,7 +385,6 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient { virtual bool sendRequest(mlir::MLIRContext &mlirContext, cudaq::ExecutionContext &io_context, - cudaq::SerializedCodeExecutionContext *serializedCodeContext, cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer, const int vqe_n_params, const std::string &backendSimName, const std::string &kernelName, void (*kernelFunc)(void *), @@ -406,13 +401,12 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient { return constructVQEJobRequest(mlirContext, io_context, backendSimName, kernelName, kernelArgs, vqe_gradient, *vqe_optimizer, vqe_n_params, rawArgs); - return constructJobRequest(mlirContext, io_context, serializedCodeContext, - backendSimName, kernelName, kernelFunc, - kernelArgs, argsSize, rawArgs); + return constructJobRequest(mlirContext, io_context, backendSimName, + kernelName, kernelFunc, kernelArgs, argsSize, + rawArgs); }(); - if (request.code.empty() && (serializedCodeContext == nullptr || - serializedCodeContext->source_code.empty())) { + if (request.code.empty()) { if (optionalErrorMsg) *optionalErrorMsg = std::string( @@ -485,833 +479,4 @@ class BaseRemoteRestRuntimeClient : public RemoteRuntimeClient { } }; -/// Base class for the REST client submitting jobs to NVCF-hosted `cudaq-qpud` -/// service. -class BaseNvcfRuntimeClient : public cudaq::BaseRemoteRestRuntimeClient { -protected: - // None: Don't log; Info: basic info; Trace: Timing data per invocation. - enum class LogLevel : int { None = 0, Info, Trace }; - // NVQC logging level - // Enabled high-level info log by default (can be set by an environment - // variable) - LogLevel m_logLevel = LogLevel::Info; - // API key for authentication - std::string m_apiKey; - // Rest client to send HTTP request - cudaq::RestClient m_restClient; - // NVCF function Id to use - std::string m_functionId; - // NVCF version Id of that function to use - std::string m_functionVersionId; - // Information about function deployment from environment variable info. - struct FunctionEnvironments { - // These configs should be positive numbers. - int majorVersion{-1}; - int minorVersion{-1}; - int numGpus{-1}; - int timeoutSecs{-1}; - int hasSerializedCodeExec{-1}; // -1 means unknown; 0 = false, 1 = true - std::string name; - }; - // Available functions: function Id to info mapping - using DeploymentInfo = std::unordered_map; - DeploymentInfo m_availableFuncs; - const std::string CUDAQ_NCA_ID = cudaq::getNvqcNcaId(); - // Base URL for NVCF APIs - static inline const std::string m_baseUrl = "api.nvcf.nvidia.com/v2"; - // Return the URL to invoke the function specified in this client - std::string nvcfInvocationUrl() const { - return fmt::format("https://{}/nvcf/exec/functions/{}/versions/{}", - m_baseUrl, m_functionId, m_functionVersionId); - } - // Return the URL to request an Asset upload link - std::string nvcfAssetUrl() const { - return fmt::format("https://{}/nvcf/assets", m_baseUrl); - } - // Return the URL to retrieve status/result of an NVCF request. - std::string - nvcfInvocationStatus(const std::string &invocationRequestId) const { - return fmt::format("https://{}/nvcf/exec/status/{}", m_baseUrl, - invocationRequestId); - } - // Construct the REST headers for calling NVCF REST APIs - std::map getHeaders() const { - std::map header{ - {"Authorization", fmt::format("Bearer {}", m_apiKey)}, - {"Content-type", "application/json"}}; - return header; - }; - // Helper to retrieve the list of all available versions of the specified - // function Id. - std::vector getFunctionVersions() { - auto headers = getHeaders(); - auto versionDataJs = m_restClient.get( - fmt::format("https://{}/nvcf/functions/{}", m_baseUrl, m_functionId), - "/versions", headers, /*enableSsl=*/true); - CUDAQ_INFO("Version data: {}", versionDataJs.dump()); - std::vector versions; - versionDataJs["functions"].get_to(versions); - return versions; - } - DeploymentInfo - getAllAvailableDeployments(const std::string &functionOverride, - const std::string &versionOverride) { - auto headers = getHeaders(); - auto allVisibleFunctions = - m_restClient.get(fmt::format("https://{}/nvcf/functions", m_baseUrl), - "", headers, /*enableSsl=*/true); - const std::string cudaqNvcfFuncNamePrefix = "cuda_quantum"; - DeploymentInfo info; - - // NCA ID Precedence order is: - // 1. CUDAQ_NCA_ID if it was specifically overriden - // 2. Dev org NCA ID if active dev functions are visible with selected key - // 3. Production NCA ID - const std::string ncaIdToSearch = [&]() { - // Check for override - if (isNvqcNcaIdOverridden()) - return CUDAQ_NCA_ID; - // Check to see if dev NCA ID functions are available - for (auto funcInfo : allVisibleFunctions["functions"]) { - if (funcInfo["ncaId"].get() == - std::string(DEV_NVQC_NCA_ID) && - funcInfo["status"].get() == "ACTIVE" && - funcInfo["name"].get().starts_with( - cudaqNvcfFuncNamePrefix)) { - return std::string(DEV_NVQC_NCA_ID); - } - } - // Fallback on production NCA ID - return CUDAQ_NCA_ID; - }(); - - // Only add functions that are the latest minor version for the major - // version matched by the client. - // I.e. If client 1.x sees server 1.2 and 1.3, choose 1.3. - int highestMinorVersion = 0; - for (auto funcInfo : allVisibleFunctions["functions"]) { - bool matchesOverride = - funcInfo["id"].get() == functionOverride || - funcInfo["versionId"].get() == versionOverride; - bool matchesWithoutOverride = - funcInfo["ncaId"].get() == ncaIdToSearch && - funcInfo["status"].get() == "ACTIVE" && - funcInfo["name"].get().starts_with( - cudaqNvcfFuncNamePrefix); - if (matchesOverride || matchesWithoutOverride) { - const auto containerEnvs = [&]() -> FunctionEnvironments { - FunctionEnvironments envs; - // Function name convention: - // Example: cuda_quantum_v1_t3600_8x - // ------------ - ---- - - // Prefix | | | - // Version __| | | - // Timeout (secs) __| | - // Number of GPUs __| - // Also supported: cuda_quantum_v1-1_t3600_8x - // Also supported: cuda_quantum_suffix_v1-1_t3600_8x - const std::regex funcNameRegex( - R"(^cuda_quantum_.*v([\d\-]+)_t(\d+)_(\d+)x$)"); - // The first match is the whole string. - constexpr std::size_t expectedNumMatches = 4; - std::smatch baseMatch; - const std::string fname = funcInfo["name"].get(); - auto getMajorMinorVersion = [](const std::string &versionStr) { - std::size_t pos = versionStr.find('-'); - int majorVersion = 0; - int minorVersion = 0; - if (pos != std::string::npos) { - majorVersion = std::stoi(versionStr.substr(0, pos)); - minorVersion = std::stoi(versionStr.substr(pos + 1)); - } else { - // If it doesn't say x.y, then assume it is x.0 - majorVersion = std::stoi(versionStr); - minorVersion = 0; - } - return std::make_pair(majorVersion, minorVersion); - }; - // If the function name matches 'Production' naming convention, - // retrieve deployment information from the name. - envs.name = fname; - if (std::regex_match(fname, baseMatch, funcNameRegex) && - baseMatch.size() == expectedNumMatches) { - std::tie(envs.majorVersion, envs.minorVersion) = - getMajorMinorVersion(baseMatch[1].str()); - envs.timeoutSecs = std::stoi(baseMatch[2].str()); - envs.numGpus = std::stoi(baseMatch[3].str()); - envs.hasSerializedCodeExec = - fname.starts_with("cuda_quantum_remote_py") ? 1 : 0; - } else if (funcInfo.contains("containerEnvironment")) { - // Otherwise, retrieve the info from deployment configurations. - // TODO: at some point, we may want to consolidate these two paths - // (name vs. meta-data). We keep it here since function metadata - // (similar to `containerEnvironment`) will be supported in the near - // future. - // Convert to unordered_map - std::unordered_map containerEnvironment; - for (auto it : funcInfo["containerEnvironment"]) - containerEnvironment[it["key"].get()] = - it["value"].get(); - // Fetch values - const auto getIntIfFound = [&](const std::string &envKey, - int &varToSet) { - if (auto it = containerEnvironment.find(envKey); - it != containerEnvironment.end()) - varToSet = std::stoi(it->second); - }; - getIntIfFound("NUM_GPUS", envs.numGpus); - getIntIfFound("WATCHDOG_TIMEOUT_SEC", envs.timeoutSecs); - getIntIfFound("CUDAQ_SER_CODE_EXEC", envs.hasSerializedCodeExec); - if (auto it = - containerEnvironment.find("NVQC_REST_PAYLOAD_VERSION"); - it != containerEnvironment.end()) - std::tie(envs.majorVersion, envs.minorVersion) = - getMajorMinorVersion(it->second); - } - - // Note: invalid/uninitialized FunctionEnvironments will be - // discarded, i.e., not added to the valid deployment list, since the - // API version number will not match. - return envs; - }(); - - // Only add functions that match client version, unless overridden - if (matchesOverride || containerEnvs.majorVersion == version()) { - info[funcInfo["id"].get()] = containerEnvs; - highestMinorVersion = - std::max(highestMinorVersion, containerEnvs.minorVersion); - } - } - } - - // Now make a pass through info and remove all the lower minor versions. - if (functionOverride.empty()) { - std::vector funcsToRemove; - for (auto &iter : info) - if (iter.second.minorVersion != highestMinorVersion) - funcsToRemove.push_back(iter.first); - for (auto &funcToRemove : funcsToRemove) - info.erase(funcToRemove); - } - - return info; - } - - std::optional getQueueDepth(const std::string &funcId, - const std::string &verId) { - auto headers = getHeaders(); - try { - auto queueDepthInfo = m_restClient.get( - fmt::format("https://{}/nvcf/queues/functions/{}/versions/{}", - m_baseUrl, funcId, verId), - "", headers, /*enableSsl=*/true); - - if (queueDepthInfo.contains("functionId") && - queueDepthInfo["functionId"] == funcId && - queueDepthInfo.contains("queues")) { - for (auto queueInfo : queueDepthInfo["queues"]) { - if (queueInfo.contains("functionVersionId") && - queueInfo["functionVersionId"] == verId && - queueInfo.contains("queueDepth")) { - return queueInfo["queueDepth"].get(); - } - } - } - return std::nullopt; - } catch (...) { - // Make this non-fatal. Returns null, i.e., unknown. - return std::nullopt; - } - } - - // Fetch the queue position of the given request ID. If the job has already - // begun execution, it will return `std::nullopt`. - std::optional getQueuePosition(const std::string &requestId) { - auto headers = getHeaders(); - try { - auto queuePos = - m_restClient.get(fmt::format("https://{}/nvcf/queues/{}/position", - m_baseUrl, requestId), - "", headers, /*enableSsl=*/true); - if (queuePos.contains("positionInQueue")) - return queuePos["positionInQueue"].get(); - // When the job enters execution, it returns "status": 400 and "title": - // "Bad Request", so translate that to `std::nullopt`. - return std::nullopt; - } catch (...) { - // Make this non-fatal. Returns null, i.e., unknown. - return std::nullopt; - } - } - -public: - virtual void setConfig( - const std::unordered_map &configs) override { - { - // Check if user set a specific log level (e.g., disable logging) - if (auto logConfigEnv = std::getenv("NVQC_LOG_LEVEL")) { - auto logConfig = std::string(logConfigEnv); - std::transform(logConfig.begin(), logConfig.end(), logConfig.begin(), - [](unsigned char c) { return std::tolower(c); }); - if (logConfig == "0" || logConfig == "off" || logConfig == "false" || - logConfig == "no" || logConfig == "none") - m_logLevel = LogLevel::None; - if (logConfig == "trace") - m_logLevel = LogLevel::Trace; - if (logConfig == "info") - m_logLevel = LogLevel::Info; - } - } - { - const auto apiKeyIter = configs.find("api-key"); - if (apiKeyIter != configs.end()) - m_apiKey = apiKeyIter->second; - if (m_apiKey.empty()) - throw std::runtime_error("No NVQC API key is provided."); - } - - // Save some iterators to be used later - const auto funcIdIter = configs.find("function-id"); - const auto versionIdIter = configs.find("version-id"); - const auto nGpusIter = configs.find("ngpus"); - // Default is 1 GPU if none specified - const int numGpusRequested = - (nGpusIter != configs.end()) ? std::stoi(nGpusIter->second) : 1; - - // Override strings for function id and function version - const auto functionOverride = [&]() -> std::string { - if (funcIdIter == configs.end()) - return ""; - return funcIdIter->second; - }(); - const auto versionOverride = [&]() -> std::string { - if (versionIdIter == configs.end()) - return ""; - return versionIdIter->second; - }(); - - // Pass the optional overrides to getAllAvailableDeployments so that it will - // return information about functions if they are manually specified by the - // user, even if they don't conform to naming conventions. - m_availableFuncs = - getAllAvailableDeployments(functionOverride, versionOverride); - for (const auto &[funcId, info] : m_availableFuncs) - CUDAQ_INFO("Function Id {} (API version {}.{}) has {} GPUs.", funcId, - info.majorVersion, info.minorVersion, info.numGpus); - { - if (funcIdIter != configs.end()) { - // User overrides a specific function Id. - m_functionId = funcIdIter->second; - if (m_logLevel > LogLevel::None) { - // Print out the configuration - cudaq::log("Submitting jobs to NVQC using function Id {}.", - m_functionId); - } - } else { - // Output an error message if no deployments can be found. - if (m_availableFuncs.empty()) - throw std::runtime_error( - "Unable to find any active NVQC deployments for this key. Check " - "if you see any active functions on ngc.nvidia.com in the cloud " - "functions tab, or try to regenerate the key."); - - // Determine the function Id based on the number of GPUs - CUDAQ_INFO("Looking for an NVQC deployment that has {} GPUs.", - numGpusRequested); - for (const auto &[funcId, info] : m_availableFuncs) { - if (info.numGpus == numGpusRequested) { - m_functionId = funcId; - if (m_logLevel > LogLevel::None) { - // Print out the configuration - cudaq::log( - "Submitting jobs to NVQC service with {} GPU(s). Max " - "execution time: {} seconds (excluding queue wait time).", - info.numGpus, info.timeoutSecs); - } - break; - } - } - if (m_functionId.empty()) { - // Make sure that we sort the GPU count list - std::set gpuCounts; - for (const auto &[funcId, info] : m_availableFuncs) { - gpuCounts.emplace(info.numGpus); - } - std::stringstream ss; - ss << "Unable to find NVQC deployment with " << numGpusRequested - << " GPUs.\nAvailable deployments have "; - ss << fmt::format("{}", gpuCounts) << " GPUs.\n"; - ss << "Please check your 'ngpus' value (Python) or `--nvqc-ngpus` " - "value (C++).\n"; - throw std::runtime_error(ss.str()); - } - } - } - { - auto versions = getFunctionVersions(); - // Check if a version Id is set - if (versionIdIter != configs.end()) { - m_functionVersionId = versionIdIter->second; - // Do a sanity check that this is an active version (i.e., usable). - const auto versionInfoIter = - std::find_if(versions.begin(), versions.end(), - [&](const cudaq::NvcfFunctionVersionInfo &info) { - return info.versionId == m_functionVersionId; - }); - // Invalid version Id. - if (versionInfoIter == versions.end()) - throw std::runtime_error( - fmt::format("Version Id '{}' is not valid for NVQC function Id " - "'{}'. Please check your NVQC configurations.", - m_functionVersionId, m_functionId)); - // The version is not active/deployed. - if (versionInfoIter->status != cudaq::FunctionStatus::ACTIVE) - throw std::runtime_error( - fmt::format("Version Id '{}' of NVQC function Id " - "'{}' is not ACTIVE. Please check your NVQC " - "configurations or contact support.", - m_functionVersionId, m_functionId)); - } else { - // No version Id is set. Just pick the latest version of the function - // Id. The timestamp is an ISO 8601 string, e.g., - // 2024-01-25T04:14:46.360Z. To sort it from latest to oldest, we can - // use string sorting. - std::sort(versions.begin(), versions.end(), - [](const auto &a, const auto &b) { - return a.createdAt > b.createdAt; - }); - for (const auto &versionInfo : versions) - CUDAQ_INFO("Found version Id {}, created at {}", - versionInfo.versionId, versionInfo.createdAt); - - auto activeVersions = - versions | - std::ranges::views::filter( - [](const cudaq::NvcfFunctionVersionInfo &info) { - return info.status == cudaq::FunctionStatus::ACTIVE; - }); - - if (activeVersions.empty()) - throw std::runtime_error( - fmt::format("No active version available for NVQC function Id " - "'{}'. Please check your function Id.", - m_functionId)); - - m_functionVersionId = activeVersions.front().versionId; - CUDAQ_INFO("Selected the latest version Id {} for function Id {}", - m_functionVersionId, m_functionId); - } - } - } - - // The NVCF version of this function needs to dynamically determine the remote - // capabilities based on the servers currently deployed. - virtual RemoteCapabilities getRemoteCapabilities() const override { - // Allow the user to override to all true. - if (getEnvBool("CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE", false)) - return RemoteCapabilities(/*initValues=*/true); - // Else determine capabilities based on server deployment info. - RemoteCapabilities capabilities(/*initValues=*/false); - if (!m_availableFuncs.contains(m_functionId)) { - // The user has manually overridden an NVQC function selection, but it - // wasn't found in m_availableFuncs. - CUDAQ_INFO( - "Function id overriden ({}) but cannot retrieve its remote " - "capabilities because a deployment for it was not found. Will assume " - "all optional remote capabilities are unsupported. You can set " - "CUDAQ_CLIENT_REMOTE_CAPABILITY_OVERRIDE=1 if you wish to override " - "this.", - m_functionId); - return capabilities; - } - const auto &funcEnv = m_availableFuncs.at(m_functionId); - capabilities.serializedCodeExec = funcEnv.hasSerializedCodeExec > 0; - capabilities.stateOverlap = - funcEnv.majorVersion > 1 || - (funcEnv.majorVersion >= 1 && funcEnv.minorVersion >= 1); - capabilities.vqe = funcEnv.majorVersion > 1 || - (funcEnv.majorVersion >= 1 && funcEnv.minorVersion >= 1); - capabilities.isRemoteSimulator = true; - return capabilities; - } - - virtual bool - sendRequest(mlir::MLIRContext &mlirContext, - cudaq::ExecutionContext &io_context, - cudaq::SerializedCodeExecutionContext *serializedCodeContext, - cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer, - const int vqe_n_params, const std::string &backendSimName, - const std::string &kernelName, void (*kernelFunc)(void *), - const void *kernelArgs, std::uint64_t argsSize, - std::string *optionalErrorMsg, - const std::vector *rawArgs) override { - if (isDisallowed(io_context.name)) - throw std::runtime_error( - io_context.name + - " operation is not supported with cudaq target nvqc!"); - - static const std::vector MULTI_GPU_BACKENDS = { - "tensornet", "nvidia-mgpu", "nvidia-mqpu"}; - { - // Print out a message if users request a multi-GPU deployment while - // setting the backend to a single-GPU one. Only print once in case this - // is a execution loop. - static bool printOnce = false; - if (m_availableFuncs[m_functionId].numGpus > 1 && - std::find(MULTI_GPU_BACKENDS.begin(), MULTI_GPU_BACKENDS.end(), - backendSimName) == MULTI_GPU_BACKENDS.end() && - !printOnce) { - std::cout << "The requested backend simulator (" << backendSimName - << ") is not capable of using all " - << m_availableFuncs[m_functionId].numGpus - << " GPUs requested.\n"; - std::cout << "Only one GPU will be used for simulation.\n"; - std::cout << "Please refer to CUDA-Q documentation for a list of " - "multi-GPU capable simulator backends.\n"; - printOnce = true; - } - } - // Construct the base `cudaq-qpud` request payload. - cudaq::RestRequest request = [&]() { - if (vqe_n_params > 0) - return constructVQEJobRequest(mlirContext, io_context, backendSimName, - kernelName, kernelArgs, vqe_gradient, - *vqe_optimizer, vqe_n_params, rawArgs); - return constructJobRequest(mlirContext, io_context, serializedCodeContext, - backendSimName, kernelName, kernelFunc, - kernelArgs, argsSize, rawArgs); - }(); - - if (request.code.empty() && (serializedCodeContext == nullptr || - serializedCodeContext->source_code.empty())) { - if (optionalErrorMsg) - *optionalErrorMsg = - std::string( - "Failed to construct/retrieve kernel IR for kernel named ") + - kernelName; - return false; - } - - if (request.format != cudaq::CodeFormat::MLIR && - serializedCodeContext == nullptr) { - // The `.config` file may have been tampered with. - std::cerr << "Internal error: unsupported kernel IR detected.\nThis may " - "indicate a corrupted CUDA-Q installation."; - std::abort(); - } - - // Max message size that we can send in the body - constexpr std::size_t MAX_SIZE_BYTES = 250000; // 250 KB - json requestJson; - auto jobHeader = getHeaders(); - std::optional assetId; - // Make sure that we delete the asset that we've uploaded when this - // `sendRequest` function exits (success or not). - ScopeExit deleteAssetOnExit([&]() { - if (assetId.has_value()) { - CUDAQ_INFO("Deleting NVQC Asset Id {}", assetId.value()); - auto headers = getHeaders(); - m_restClient.del(nvcfAssetUrl(), std::string("/") + assetId.value(), - headers, /*enableLogging=*/false, /*enableSsl=*/true); - } - }); - - // Upload this request as an NVCF asset if needed. - // Note: The majority of the payload is the IR code. Hence, first checking - // if it exceed the size limit. Otherwise, if the code is small, make sure - // that the total payload doesn't exceed that limit as well by constructing - // a temporary JSON object of the full payload. - if (request.code.size() > MAX_SIZE_BYTES || - json(request).dump().size() > MAX_SIZE_BYTES) { - assetId = uploadRequest(request); - if (!assetId.has_value()) { - if (optionalErrorMsg) - *optionalErrorMsg = "Failed to upload request to NVQC as NVCF assets"; - return false; - } - json requestBody; - // Use NVCF `inputAssetReferences` field to specify the asset that needs - // to be pulled in when invoking this function. - requestBody["inputAssetReferences"] = - std::vector{assetId.value()}; - requestJson["requestBody"] = requestBody; - requestJson["requestHeader"] = requestBody; - } else { - requestJson["requestBody"] = request; - } - - try { - // Making the request - CUDAQ_DBG("Sending NVQC request to {}", nvcfInvocationUrl()); - auto lastQueuePos = std::numeric_limits::max(); - - if (m_logLevel > LogLevel::Info) - cudaq::log("Posting NVQC request now"); - auto resultJs = - m_restClient.post(nvcfInvocationUrl(), "", requestJson, jobHeader, - /*enableLogging=*/false, /*enableSsl=*/true); - CUDAQ_DBG("Response: {}", resultJs.dump()); - - // Call getQueuePosition() until we're at the front of the queue. If log - // level is "none", then skip all this because we don't need to show the - // status to the user, and we don't need to know the precise - // requestStartTime. - if (m_logLevel > LogLevel::None) { - if (resultJs.contains("status") && - resultJs["status"] == "pending-evaluation") { - const std::string reqId = resultJs["reqId"]; - auto queuePos = getQueuePosition(reqId); - while (queuePos.has_value() && queuePos.value() > 0) { - if (queuePos.value() != lastQueuePos) { - // Position in queue has changed. - if (lastQueuePos == std::numeric_limits::max()) { - // If lastQueuePos hasn't been populated with a true value yet, - // it means we have not fetched the queue depth or displayed - // anything to the user yet. - cudaq::log("Number of jobs ahead of yours in the NVQC queue: " - "{}. Your job will start executing once it gets to " - "the head of the queue.", - queuePos.value()); - } else { - cudaq::log("Position in queue for request {} has changed from " - "{} to {}", - reqId, lastQueuePos, queuePos.value()); - } - lastQueuePos = queuePos.value(); - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - queuePos = getQueuePosition(reqId); - } - } - if (lastQueuePos != std::numeric_limits::max()) - cudaq::log("Your job is finished waiting in the queue and will now " - "begin execution."); - } - - const auto requestStartTime = std::chrono::system_clock::now(); - bool needToPrintNewline = false; - while (resultJs.contains("status") && - resultJs["status"] == "pending-evaluation") { - const std::string reqId = resultJs["reqId"]; - const int elapsedTimeSecs = - std::chrono::duration_cast( - std::chrono::system_clock::now() - requestStartTime) - .count(); - // Warns if the remaining time is less than this threshold. - constexpr int TIMEOUT_WARNING_SECS = 5 * 60; // 5 minutes. - const int remainingSecs = - m_availableFuncs[m_functionId].timeoutSecs - elapsedTimeSecs; - std::string additionalInfo; - if (remainingSecs < 0) - fmt::format_to(std::back_inserter(additionalInfo), - ". Exceeded wall time limit ({} seconds), but time " - "spent waiting in queue is not counted. Proceeding.", - m_availableFuncs[m_functionId].timeoutSecs); - else if (remainingSecs < TIMEOUT_WARNING_SECS) - fmt::format_to(std::back_inserter(additionalInfo), - ". Approaching the wall time limit ({} seconds). " - "Remaining time: {} seconds.", - m_availableFuncs[m_functionId].timeoutSecs, - remainingSecs); - // If NVQC log level is high enough or if we have additional info to - // print, then print the full message; else print a simple "." - if (m_logLevel > LogLevel::Info || !additionalInfo.empty()) { - if (needToPrintNewline) - std::cout << "\n"; - needToPrintNewline = false; - cudaq::log("Polling NVQC result data for Request Id {}{}", reqId, - additionalInfo); - } else if (m_logLevel > LogLevel::None) { - std::cout << "."; - std::cout.flush(); - needToPrintNewline = true; - } - // Wait 1 sec then poll the result - std::this_thread::sleep_for(std::chrono::seconds(1)); - resultJs = m_restClient.get(nvcfInvocationStatus(reqId), "", jobHeader, - /*enableSsl=*/true); - } - - if (needToPrintNewline) - std::cout << "\n"; - - if (!resultJs.contains("status") || resultJs["status"] != "fulfilled") { - if (optionalErrorMsg) - *optionalErrorMsg = - std::string( - "Failed to complete the simulation request. Status: ") + - (resultJs.contains("status") ? std::string(resultJs["status"]) - : std::string("unknown")); - return false; - } - - // If there is a `responseReference` field, this is a large response. - // Hence, need to download result .zip file from the provided URL. - if (resultJs.contains("responseReference")) { - // This is a large response that needs to be downloaded - const std::string downloadUrl = resultJs["responseReference"]; - const std::string reqId = resultJs["reqId"]; - CUDAQ_INFO("Download result for Request Id {} at {}", reqId, - downloadUrl); - llvm::SmallString<32> tempDir; - llvm::sys::path::system_temp_directory(/*ErasedOnReboot*/ true, - tempDir); - std::filesystem::path resultFilePath = - std::filesystem::path(tempDir.c_str()) / (reqId + ".zip"); - m_restClient.download(downloadUrl, resultFilePath.string(), - /*enableLogging=*/false, /*enableSsl=*/true); - CUDAQ_INFO("Downloaded zip file {}", resultFilePath.string()); - std::filesystem::path unzipDir = - std::filesystem::path(tempDir.c_str()) / reqId; - // Unzip the response - cudaq::utils::unzip(resultFilePath, unzipDir); - std::filesystem::path resultJsonFile = - unzipDir / (reqId + "_result.json"); - if (!std::filesystem::exists(resultJsonFile)) { - if (optionalErrorMsg) - *optionalErrorMsg = - "Unexpected response file: missing the result JSON file."; - return false; - } - std::ifstream t(resultJsonFile.string()); - std::string resultJsonFromFile((std::istreambuf_iterator(t)), - std::istreambuf_iterator()); - try { - resultJs["response"] = json::parse(resultJsonFromFile); - } catch (...) { - if (optionalErrorMsg) - *optionalErrorMsg = - fmt::format("Failed to parse the response JSON from file '{}'.", - resultJsonFile.string()); - return false; - } - CUDAQ_INFO( - "Delete response zip file {} and its inflated contents in {}", - resultFilePath.c_str(), unzipDir.c_str()); - std::filesystem::remove(resultFilePath); - std::filesystem::remove_all(unzipDir); - } - - if (!resultJs.contains("response")) { - if (optionalErrorMsg) - *optionalErrorMsg = "Unexpected response from the NVQC invocation. " - "Missing the 'response' field."; - return false; - } - if (!resultJs["response"].contains("executionContext")) { - if (optionalErrorMsg) { - if (resultJs["response"].contains("errorMessage")) { - *optionalErrorMsg = fmt::format( - "NVQC failed to handle request. Server error: {}", - resultJs["response"]["errorMessage"].get()); - } else { - *optionalErrorMsg = - "Unexpected response from the NVQC response. " - "Missing the required field 'executionContext'."; - } - } - return false; - } - if (m_logLevel > LogLevel::None && - resultJs["response"].contains("executionInfo")) { - try { - // We only print GPU device info once if logging is not disabled. - static bool printDeviceInfoOnce = false; - cudaq::NvcfExecutionInfo info; - resultJs["response"]["executionInfo"].get_to(info); - if (!printDeviceInfoOnce) { - std::size_t totalWidth = 50; - std::string message = "NVQC Device Info"; - auto strLen = message.size() + 2; // Account for surrounding spaces - auto leftSize = (totalWidth - strLen) / 2; - auto rightSize = (totalWidth - strLen) - leftSize; - std::string leftSide(leftSize, '='); - std::string rightSide(rightSize, '='); - auto &platform = cudaq::get_platform(); - std::ostream &os = - platform.getLogStream() ? *platform.getLogStream() : std::cout; - os << fmt::format("\n{} {} {}\n", leftSide, message, rightSide); - os << fmt::format("GPU Device Name: \"{}\"\n", - info.deviceProps.deviceName); - os << fmt::format( - "CUDA Driver Version / Runtime Version: {}.{} / {}.{}\n", - info.deviceProps.driverVersion / 1000, - (info.deviceProps.driverVersion % 100) / 10, - info.deviceProps.runtimeVersion / 1000, - (info.deviceProps.runtimeVersion % 100) / 10); - os << fmt::format("Total global memory (GB): {:.1f}\n", - (float)(info.deviceProps.totalGlobalMemMbytes) / - 1024.0); - os << fmt::format("Memory Clock Rate (MHz): {:.3f}\n", - info.deviceProps.memoryClockRateMhz); - os << fmt::format("GPU Clock Rate (MHz): {:.3f}\n", - info.deviceProps.clockRateMhz); - os << fmt::format("{}\n", std::string(totalWidth, '=')); - // Only print this device info once. - printDeviceInfoOnce = true; - } - - // If trace logging mode is enabled, log timing data for each request. - if (m_logLevel == LogLevel::Trace) { - fmt::print("\n===== NVQC Execution Timing ======\n"); - fmt::print(" - Pre-processing: {} milliseconds \n", - info.simulationStart - info.requestStart); - fmt::print(" - Execution: {} milliseconds \n", - info.simulationEnd - info.simulationStart); - fmt::print("==================================\n"); - } - } catch (...) { - fmt::print("Unable to parse NVQC execution info metadata.\n"); - } - } - resultJs["response"]["executionContext"].get_to(io_context); - return true; - } catch (std::exception &e) { - if (optionalErrorMsg) - *optionalErrorMsg = e.what(); - return false; - } catch (...) { - std::string exType = __cxxabiv1::__cxa_current_exception_type()->name(); - auto demangledPtr = - __cxxabiv1::__cxa_demangle(exType.c_str(), nullptr, nullptr, nullptr); - if (demangledPtr && optionalErrorMsg) { - std::string demangledName(demangledPtr); - *optionalErrorMsg = "Unhandled exception of type " + demangledName; - } else if (optionalErrorMsg) { - *optionalErrorMsg = "Unhandled exception of unknown type"; - } - return false; - } - } - - // Upload a job request as an NVCF asset. - // Return asset Id on success. Otherwise, return null. - std::optional - uploadRequest(const cudaq::RestRequest &jobRequest) { - json requestJson; - requestJson["contentType"] = "application/json"; - requestJson["description"] = "cudaq-nvqc-job"; - try { - auto headers = getHeaders(); - auto resultJs = - m_restClient.post(nvcfAssetUrl(), "", requestJson, headers, - /*enableLogging=*/false, /*enableSsl=*/true); - const std::string uploadUrl = resultJs["uploadUrl"]; - const std::string assetId = resultJs["assetId"]; - CUDAQ_INFO("Upload NVQC job request as NVCF Asset Id {} to {}", assetId, - uploadUrl); - std::map uploadHeader; - // This must match the request to create the upload link - uploadHeader["Content-Type"] = "application/json"; - uploadHeader["x-amz-meta-nvcf-asset-description"] = "cudaq-nvqc-job"; - json jobRequestJs = jobRequest; - m_restClient.put(uploadUrl, "", jobRequestJs, uploadHeader, - /*enableLogging=*/false, /*enableSsl=*/true); - return assetId; - } catch (...) { - return {}; - } - } -}; - } // namespace cudaq diff --git a/runtime/common/JsonConvert.h b/runtime/common/JsonConvert.h index 3340f2965dc..56de1bd3718 100644 --- a/runtime/common/JsonConvert.h +++ b/runtime/common/JsonConvert.h @@ -10,7 +10,6 @@ #include "GPUInfo.h" #include "common/ExecutionContext.h" #include "common/FmtCore.h" -#include "common/SerializedCodeExecutionContext.h" #include "cudaq/Support/Version.h" #include "cudaq/gradients.h" #include "cudaq/optimizers.h" @@ -573,8 +572,6 @@ class RestRequest { // (2) Breaking changes in the runtime, which make JIT execution incompatible, // e.g., changing the simulator names (.so files), changing signatures of // QIR functions, etc. - // IMPORTANT: When a new version is defined, a new NVQC deployment will be - // needed. static constexpr std::size_t REST_PAYLOAD_VERSION = 1; static constexpr std::size_t REST_PAYLOAD_MINOR_VERSION = 1; RestRequest(ExecutionContext &context, int versionNumber) @@ -613,10 +610,6 @@ class RestRequest { std::size_t version; // Version of the runtime client submitting the request. std::string clientVersion; - // The SerializedCodeExecutionContext to compile and to execute a limited - // subset of Python source code. The server will execute serialized code in - // this context - std::optional serializedCodeExecutionContext; friend void to_json(json &j, const RestRequest &p) { TO_JSON_HELPER(version); @@ -631,7 +624,6 @@ class RestRequest { TO_JSON_HELPER(seed); TO_JSON_HELPER(passes); TO_JSON_HELPER(clientVersion); - TO_JSON_OPT_HELPER(serializedCodeExecutionContext); } friend void from_json(const json &j, RestRequest &p) { @@ -647,53 +639,7 @@ class RestRequest { FROM_JSON_HELPER(seed); FROM_JSON_HELPER(passes); FROM_JSON_HELPER(clientVersion); - FROM_JSON_OPT_HELPER(serializedCodeExecutionContext); } }; -/// NVCF function version status -enum class FunctionStatus { ACTIVE, DEPLOYING, ERROR, INACTIVE, DELETED }; -NLOHMANN_JSON_SERIALIZE_ENUM(FunctionStatus, - { - {FunctionStatus::ACTIVE, "ACTIVE"}, - {FunctionStatus::DEPLOYING, "DEPLOYING"}, - {FunctionStatus::ERROR, "ERROR"}, - {FunctionStatus::INACTIVE, "INACTIVE"}, - {FunctionStatus::DELETED, "DELETED"}, - }); - -// Encapsulates a function version info -// Note: we only parse a subset of required fields (always present). There may -// be other fields, which are not required. -struct NvcfFunctionVersionInfo { - // Function Id - std::string id; - // NVIDIA NGC Org Id (NCA Id) - std::string ncaId; - // Version Id - std::string versionId; - // Function name - std::string name; - // Status of this particular function version - FunctionStatus status; - // Function version creation timestamp (ISO 8601 string) - // e.g., "2024-02-05T00:09:51.154Z" - std::string createdAt; - NLOHMANN_DEFINE_TYPE_INTRUSIVE(NvcfFunctionVersionInfo, id, ncaId, versionId, - name, status, createdAt); -}; - -// NVCF execution metadata. -struct NvcfExecutionInfo { - // Time point (milliseconds since epoch) when the request handling starts. - std::size_t requestStart; - // Time point (milliseconds since epoch) when the execution starts (JIT - // completed). - std::size_t simulationStart; - // Time point (milliseconds since epoch) when the execution finishes. - std::size_t simulationEnd; - CudaDeviceProperties deviceProps; - NLOHMANN_DEFINE_TYPE_INTRUSIVE(NvcfExecutionInfo, requestStart, - simulationStart, simulationEnd, deviceProps); -}; } // namespace cudaq diff --git a/runtime/common/NvqcConfig.h b/runtime/common/NvqcConfig.h deleted file mode 100644 index 1019e2a2201..00000000000 --- a/runtime/common/NvqcConfig.h +++ /dev/null @@ -1,30 +0,0 @@ -/****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#pragma once -#include -#include -namespace cudaq { - -static constexpr const char *NVQC_NCA_ID_ENV_VAR = "NVQC_NCA_ID"; -static constexpr const char *DEV_NVQC_NCA_ID = - "mZraB3k06kOd8aPhD6MVXJwBVZ67aXDLsfmDo4MYXDs"; -static constexpr const char *PROD_NVQC_NCA_ID = - "audj0Ow_82RT0BbiewKaIryIdZWiSrOqiiDSaA8w7a8"; - -inline bool isNvqcNcaIdOverridden() { - return std::getenv(NVQC_NCA_ID_ENV_VAR) != nullptr; -} - -inline std::string getNvqcNcaId() { - // Allows runtime override by environment variable. - if (auto ncaIdVar = std::getenv(NVQC_NCA_ID_ENV_VAR)) - return std::string(ncaIdVar); - return PROD_NVQC_NCA_ID; -} -} // namespace cudaq diff --git a/runtime/common/RemoteKernelExecutor.h b/runtime/common/RemoteKernelExecutor.h index e703a989cb5..18b980e0d42 100644 --- a/runtime/common/RemoteKernelExecutor.h +++ b/runtime/common/RemoteKernelExecutor.h @@ -28,7 +28,6 @@ namespace cudaq { class ExecutionContext; class gradient; class optimizer; -class SerializedCodeExecutionContext; /// Base interface encapsulating a CUDA-Q runtime server capable of /// running kernel IR code. @@ -103,7 +102,6 @@ class RemoteRuntimeClient // if this was a local execution. virtual bool sendRequest(mlir::MLIRContext &mlirContext, ExecutionContext &io_context, - SerializedCodeExecutionContext *serializedCodeContext, cudaq::gradient *vqe_gradient, cudaq::optimizer *vqe_optimizer, const int vqe_n_params, const std::string &backendSimName, const std::string &kernelName, void (*kernelFunc)(void *), diff --git a/runtime/common/SerializedCodeExecutionContext.h b/runtime/common/SerializedCodeExecutionContext.h deleted file mode 100644 index 9a17591d50d..00000000000 --- a/runtime/common/SerializedCodeExecutionContext.h +++ /dev/null @@ -1,38 +0,0 @@ -/****************************************************************-*- C++ -*-**** - * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * - * All rights reserved. * - * * - * This source code and the accompanying materials are made available under * - * the terms of the Apache License 2.0 which accompanies this distribution. * - ******************************************************************************/ - -#pragma once - -#include "nlohmann/json.hpp" -#include -#include -#include - -using json = nlohmann::json; - -namespace cudaq { - -/// @brief The SerializedCodeExecutionContext is an abstraction to indicate -/// how a serialized code should be executed. -class SerializedCodeExecutionContext { -public: - /// @brief All variables visible to the Python \p source_code to execute, as a - /// JSON-like string object. - std::string scoped_var_dict; - - /// @brief The source code of the objective function and its call as a string. - std::string source_code; - - SerializedCodeExecutionContext() = default; - ~SerializedCodeExecutionContext() = default; - - // Serialization - NLOHMANN_DEFINE_TYPE_INTRUSIVE(SerializedCodeExecutionContext, - scoped_var_dict, source_code); -}; -} // namespace cudaq diff --git a/runtime/cudaq/algorithms/gradient.h b/runtime/cudaq/algorithms/gradient.h index d2c6a13a5a7..73998fa811c 100644 --- a/runtime/cudaq/algorithms/gradient.h +++ b/runtime/cudaq/algorithms/gradient.h @@ -76,7 +76,7 @@ class gradient { /// Take the quantum kernel and concrete arguments for all arguments except /// the first std::vector argument, which is used for the variational /// parameters for the gradient. Serialize and save those arguments into this - /// object. (Useful for NVQC.) + /// object. template void setArgs(QuantumKernel &kernel, Args &&...args) { static_assert( diff --git a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp index d52eadb0ecf..4315c30e236 100644 --- a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp +++ b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteClient.cpp @@ -17,14 +17,6 @@ class RemoteRestRuntimeClient : public cudaq::BaseRemoteRestRuntimeClient { RemoteRestRuntimeClient() : BaseRemoteRestRuntimeClient() {} }; -/// REST client submitting jobs to NVCF-hosted `cudaq-qpud` service. -class NvcfRuntimeClient : public cudaq::BaseNvcfRuntimeClient { -public: - /// @brief The constructor - NvcfRuntimeClient() : BaseNvcfRuntimeClient() {} -}; - } // namespace CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, RemoteRestRuntimeClient, rest) -CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeClient, NvcfRuntimeClient, NVCF) diff --git a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp index 91127b05e83..21454199df1 100644 --- a/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp +++ b/runtime/cudaq/platform/default/rest_server/helpers/RestRemoteServer.cpp @@ -169,68 +169,11 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer { std::string mutableReq; for (const auto &[k, v] : headers) CUDAQ_INFO("Request Header: {} : {}", k, v); - // Checking if this request has its body sent on as NVCF assets. - const auto dirIter = headers.find("NVCF-ASSET-DIR"); - const auto assetIdIter = headers.find("NVCF-FUNCTION-ASSET-IDS"); - if (dirIter != headers.end() && assetIdIter != headers.end()) { - const std::string dir = dirIter->second; - const auto ids = cudaq::split(assetIdIter->second, ','); - if (ids.size() != 1) { - json js; - js["status"] = - fmt::format("Invalid asset Id data: {}", assetIdIter->second); - return js; - } - // Load the asset file - std::filesystem::path assetFile = - std::filesystem::path(dir) / ids[0]; - if (!std::filesystem::exists(assetFile)) { - json js; - js["status"] = fmt::format("Unable to find the asset file {}", - assetFile.string()); - return js; - } - std::ifstream t(assetFile); - std::string requestFromFile((std::istreambuf_iterator(t)), - std::istreambuf_iterator()); - mutableReq = requestFromFile; - } else { - mutableReq = reqBody; - } + mutableReq = reqBody; if (m_hasMpi) cudaq::mpi::broadcast(mutableReq, 0); auto resultJs = processRequest(mutableReq); - // Check whether we have a limit in terms of response size. - if (headers.contains("NVCF-MAX-RESPONSE-SIZE-BYTES")) { - const std::size_t maxResponseSizeBytes = std::stoll( - headers.find("NVCF-MAX-RESPONSE-SIZE-BYTES")->second); - if (resultJs.dump().size() > maxResponseSizeBytes) { - // If the response size is larger than the limit, write it to the - // large output directory rather than sending it back as an HTTP - // response. - const auto outputDirIter = headers.find("NVCF-LARGE-OUTPUT-DIR"); - const auto reqIdIter = headers.find("NVCF-REQID"); - if (outputDirIter == headers.end() || - reqIdIter == headers.end()) { - json js; - js["status"] = - "Failed to locate output file location for large response."; - return js; - } - - const std::string outputDir = outputDirIter->second; - const std::string fileName = reqIdIter->second + "_result.json"; - const std::filesystem::path outputFile = - std::filesystem::path(outputDir) / fileName; - std::ofstream file(outputFile.string()); - file << resultJs.dump(); - file.flush(); - json js; - js["resultFile"] = fileName; - return js; - } - } return resultJs; }); @@ -835,64 +778,6 @@ class RemoteRestRuntimeServer : public cudaq::RemoteRuntimeServer { } }; -// Runtime server for NVCF -class NvcfRuntimeServer : public RemoteRestRuntimeServer { -public: - NvcfRuntimeServer() : RemoteRestRuntimeServer() { exitAfterJob = true; } - -protected: - virtual bool filterRequest(const cudaq::RestRequest &in_request, - std::string &outValidationMessage) const override { - // We only support MLIR payload on the NVCF server. - if (in_request.format != cudaq::CodeFormat::MLIR) { - outValidationMessage = - "Unsupported input format: only CUDA-Q MLIR data is allowed."; - return false; - } - - if (!in_request.passes.empty()) { - outValidationMessage = - "Unsupported passes: server-side compilation passes are not allowed."; - return false; - } - - return true; - } - -protected: - virtual json processRequest(const std::string &reqBody, - bool forceLog = false) override { - // When calling RemoteRestRuntimeServer::processRequest, set forceLog=true - // so that incoming requests are always logged, regardless of what log level - // we're running the server at. - auto executionResult = - RemoteRestRuntimeServer::processRequest(reqBody, /*forceLog=*/true); - // Amend execution information - executionResult["executionInfo"] = constructExecutionInfo(); - return executionResult; - } - -private: - cudaq::NvcfExecutionInfo constructExecutionInfo() { - cudaq::NvcfExecutionInfo info; - const auto optionalTimePointToInt = - [](const auto &optionalTimePoint) -> std::size_t { - return optionalTimePoint.has_value() - ? std::chrono::duration_cast( - optionalTimePoint.value().time_since_epoch()) - .count() - : 0; - }; - info.requestStart = optionalTimePointToInt(requestStart); - info.simulationStart = optionalTimePointToInt(simulationStart); - info.simulationEnd = optionalTimePointToInt(simulationEnd); - const auto deviceProps = cudaq::getCudaProperties(); - if (deviceProps.has_value()) - info.deviceProps = deviceProps.value(); - return info; - } -}; } // namespace CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeServer, RemoteRestRuntimeServer, rest) -CUDAQ_REGISTER_TYPE(cudaq::RemoteRuntimeServer, NvcfRuntimeServer, nvcf) diff --git a/runtime/cudaq/platform/mqpu/CMakeLists.txt b/runtime/cudaq/platform/mqpu/CMakeLists.txt index 47ef0ab14b3..29da3c61e33 100644 --- a/runtime/cudaq/platform/mqpu/CMakeLists.txt +++ b/runtime/cudaq/platform/mqpu/CMakeLists.txt @@ -45,4 +45,3 @@ endif() install(TARGETS ${LIBRARY_NAME} DESTINATION lib) add_target_config(remote-mqpu) -add_target_config(nvqc) diff --git a/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp b/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp index a1f28ec5f8c..e8cb0bb7620 100644 --- a/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp +++ b/runtime/cudaq/platform/mqpu/MultiQPUPlatform.cpp @@ -154,53 +154,7 @@ class MultiQPUQuantumPlatform : public cudaq::quantum_platform { fmt::format("Unable to retrieve {} QPU implementation. Please " "check your installation.", qpuSubType)); - if (qpuSubType == "NvcfSimulatorQPU") { - platformQPUs.clear(); - threadToQpuId.clear(); - platformCurrentQPU = 0; - auto simName = getOpt(description, "backend"); - if (simName.empty()) - simName = "custatevec-fp32"; - std::string configStr = - fmt::format("target;nvqc;simulator;{}", simName); - auto getOptAndSetConfig = [&](const std::string &key) { - auto val = getOpt(description, key); - if (!val.empty()) - configStr += fmt::format(";{};{}", key, val); - }; - getOptAndSetConfig("api_key"); - getOptAndSetConfig("function_id"); - getOptAndSetConfig("version_id"); - - auto numQpusStr = getOpt(description, "nqpus"); - int numQpus = numQpusStr.empty() ? 1 : std::stoi(numQpusStr); - - if (simName.find("nvidia-mqpu") != std::string::npos && numQpus > 1) { - // If the backend simulator is an MQPU simulator (like nvidia-mqpu), - // then use "nqpus" to determine the number of GPUs to request for the - // backend. This allows us to seamlessly translate requests for MQPU - // requests to the NVQC platform. - configStr += fmt::format(";{};{}", "ngpus", numQpus); - // Now change numQpus to 1 for the downstream code, which will make a - // single NVQC QPU. - numQpus = 1; - } else { - getOptAndSetConfig("ngpus"); - } - - if (numQpus < 1) - throw std::invalid_argument("Number of QPUs must be greater than 0."); - for (int qpuId = 0; qpuId < numQpus; ++qpuId) { - // Populate the information and add the QPUs - auto qpu = cudaq::registry::get("NvcfSimulatorQPU"); - qpu->setId(qpuId); - qpu->setTargetBackend(configStr); - threadToQpuId[std::hash{}( - qpu->getExecutionThreadId())] = qpuId; - platformQPUs.emplace_back(std::move(qpu)); - } - platformNumQPUs = platformQPUs.size(); - } else if (qpuSubType == "orca") { + if (qpuSubType == "orca") { auto urls = cudaq::split(getOpt(description, "url"), ','); platformQPUs.clear(); threadToQpuId.clear(); diff --git a/runtime/cudaq/platform/mqpu/nvqc.yml b/runtime/cudaq/platform/mqpu/nvqc.yml deleted file mode 100644 index b983f1679d7..00000000000 --- a/runtime/cudaq/platform/mqpu/nvqc.yml +++ /dev/null @@ -1,51 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -name: nvqc -description: - "The NVQC Target provides access to simulated QPU services hosted on the NVIDIA Quantum Cloud." - -target-arguments: - - key: backend - required: false - type: string - platform-arg: backend - help-string: "Specify the remote simulator backend." - - key: nqpus - required: false - type: integer - platform-arg: nqpus - help-string: "Specify the number of virtual NVQC QPUs." - - key: ngpus - required: false - type: integer - platform-arg: ngpus - help-string: "Specify the number of GPUs required." - - key: function-id - required: false - type: uuid - platform-arg: function_id - help-string: "Specify the NVQC function Id." - - key: function-version-id - required: false - type: uuid - platform-arg: version_id - help-string: "Specify the NVQC function version Id." - - key: api-key - required: false - type: string - platform-arg: api_key - help-string: "Specify NVQC API key." - -config: - gen-target-backend: true - platform-library: mqpu - platform-qpu: NvcfSimulatorQPU - library-mode: false - preprocessor-defines: ["-D CUDAQ_REMOTE_SIM"] - link-libs: ["-lcudaq-remote-simulator-qpu"] diff --git a/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp b/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp index a15d318c8c4..b49c138e949 100644 --- a/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp +++ b/runtime/cudaq/platform/mqpu/remote/RemoteSimulatorQPU.cpp @@ -25,17 +25,6 @@ class RemoteSimulatorQPU : public cudaq::BaseRemoteSimulatorQPU { virtual ~RemoteSimulatorQPU() = default; }; -/// Implementation of QPU subtype that submits simulation request to NVCF. -class NvcfSimulatorQPU : public cudaq::BaseNvcfSimulatorQPU { -public: - NvcfSimulatorQPU() : BaseNvcfSimulatorQPU() { - m_mlirContext = cudaq::initializeMLIR(); - } - - NvcfSimulatorQPU(NvcfSimulatorQPU &&) = delete; - virtual ~NvcfSimulatorQPU() = default; -}; } // namespace CUDAQ_REGISTER_TYPE(cudaq::QPU, RemoteSimulatorQPU, RemoteSimulatorQPU) -CUDAQ_REGISTER_TYPE(cudaq::QPU, NvcfSimulatorQPU, NvcfSimulatorQPU) diff --git a/runtime/cudaq/platform/qpu.h b/runtime/cudaq/platform/qpu.h index a40b745bb18..0db13ec46c6 100644 --- a/runtime/cudaq/platform/qpu.h +++ b/runtime/cudaq/platform/qpu.h @@ -22,7 +22,6 @@ namespace cudaq { class gradient; class optimizer; -class SerializedCodeExecutionContext; /// Expose the function that will return the current ExecutionManager ExecutionManager *getExecutionManager(); @@ -193,12 +192,6 @@ class QPU : public registry::RegisteredType { "simulated QPU. This is not supported."); } - /// Launch serialized code for remote execution. Subtypes that support this - /// should override this function. - virtual void launchSerializedCodeExecution( - const std::string &name, - cudaq::SerializedCodeExecutionContext &serializeCodeExecutionObject) {} - /// @brief Notify the QPU that a new random seed value is set. /// By default do nothing, let subclasses override. virtual void onRandomSeedSet(std::size_t seed) {} diff --git a/runtime/cudaq/platform/quantum_platform.cpp b/runtime/cudaq/platform/quantum_platform.cpp index e0a2a2319ea..133482dbff5 100644 --- a/runtime/cudaq/platform/quantum_platform.cpp +++ b/runtime/cudaq/platform/quantum_platform.cpp @@ -223,22 +223,6 @@ void quantum_platform::launchKernel(const std::string &kernelName, qpu->launchKernel(kernelName, rawArgs); } -void quantum_platform::launchSerializedCodeExecution( - const std::string &name, - SerializedCodeExecutionContext &serializeCodeExecutionObject) { - std::size_t qpu_id = 0; - - auto tid = std::hash{}(std::this_thread::get_id()); - { - std::shared_lock lock(threadToQpuIdMutex); - auto iter = threadToQpuId.find(tid); - if (iter != threadToQpuId.end()) - qpu_id = iter->second; - } - auto &qpu = platformQPUs[qpu_id]; - qpu->launchSerializedCodeExecution(name, serializeCodeExecutionObject); -} - void quantum_platform::onRandomSeedSet(std::size_t seed) { // Send on the notification to all QPUs. for (auto &qpu : platformQPUs) diff --git a/runtime/cudaq/platform/quantum_platform.h b/runtime/cudaq/platform/quantum_platform.h index 6e4fdb4cec9..30cc5f85da6 100644 --- a/runtime/cudaq/platform/quantum_platform.h +++ b/runtime/cudaq/platform/quantum_platform.h @@ -29,7 +29,6 @@ namespace cudaq { class QPU; class gradient; class optimizer; -class SerializedCodeExecutionContext; struct RuntimeTarget; /// Typedefs for defining the connectivity structure of a QPU @@ -164,12 +163,6 @@ class quantum_platform { std::uint64_t resultOffset, const std::vector &rawArgs); void launchKernel(const std::string &kernelName, const std::vector &); - // This method is the hook for executing SerializedCodeExecutionContext - // objects. - void launchSerializedCodeExecution( - const std::string &name, - SerializedCodeExecutionContext &serializeCodeExecutionObject); - /// List all available platforms static std::vector list_platforms(); diff --git a/runtime/cudaq/remote_capabilities.h b/runtime/cudaq/remote_capabilities.h index ef96f7d6cfd..32795102333 100644 --- a/runtime/cudaq/remote_capabilities.h +++ b/runtime/cudaq/remote_capabilities.h @@ -15,9 +15,6 @@ namespace cudaq { struct RemoteCapabilities { /// True if the remote can perform state overlap operations. bool stateOverlap = false; - /// True if the remote can perform serialized code execution (raw Python - /// commands). - bool serializedCodeExec = false; /// True if the remote can perform an entire VQE operation without and /// back-and-forth client/server communications. bool vqe = false; @@ -26,8 +23,7 @@ struct RemoteCapabilities { bool isRemoteSimulator = false; /// Constructor that broadcasts \p initValue to all fields. RemoteCapabilities(bool initValue) - : stateOverlap(initValue), serializedCodeExec(initValue), vqe(initValue), - isRemoteSimulator(initValue) {} + : stateOverlap(initValue), vqe(initValue), isRemoteSimulator(initValue) {} }; } // namespace cudaq diff --git a/scripts/install_prerequisites.sh b/scripts/install_prerequisites.sh index 957ec64be70..2db42c8a1dd 100644 --- a/scripts/install_prerequisites.sh +++ b/scripts/install_prerequisites.sh @@ -35,7 +35,6 @@ toolchain='' exclude_prereq='' install_all=true -lock_file="" this_file_dir=`dirname "$(readlink -f "${BASH_SOURCE[0]}")"` __optind__=$OPTIND OPTIND=1 @@ -47,19 +46,6 @@ while getopts ":e:t:ml:-:" opt; do ;; m) install_all=false ;; - l) lock_file="$OPTARG" - ;; - -) case $OPTARG in - lock-file) - lock_file="${!OPTIND}" - OPTIND=$((OPTIND + 1)) - ;; - *) - echo "Invalid long option --$OPTARG" >&2 - (return 0 2>/dev/null) && return 1 || exit 1 - ;; - esac - ;; :) echo "Option -$OPTARG requires an argument." (return 0 2>/dev/null) && return 1 || exit 1 ;; @@ -70,63 +56,6 @@ while getopts ":e:t:ml:-:" opt; do done OPTIND=$__optind__ -lookup_tpls_sha() { - local path="$1" - - # Using lock file - if [[ -f $lock_file ]]; then - awk -v p="$path" '$2==p{print $1}' "$lock_file" && return 0 - fi -} - -# Clone the third-party libraries to include its source code in the NVQC docker image. -if [ -n "$lock_file" ]; then - if [ ! -f "$lock_file" ]; then - echo "Lock file $lock_file not found." - (return 0 2>/dev/null) && return 1 || exit 1 - fi - - echo "Using lock file: $lock_file" - - tpls_root="${CUDAQ_INSTALL_PREFIX:-/opt/cuda}" - tpls_dir="$tpls_root/tpls" - mkdir -p "$tpls_dir" - this_file_dir=`dirname "$(readlink -f "${BASH_SOURCE[0]}")"` - - echo "Cloning additional third-party libraries into $tpls_dir..." - mkdir -p "$tpls_dir" - # make sure we are at the repo root - cd "$this_file_dir" - - # for each submodule..url in .gitmodules - git config --file .gitmodules --get-regexp 'submodule\..*\.url' | \ - while read -r key url; do - # key = "submodule.tpls/foo.url" - sub=${key#submodule.} # -> "tpls/foo.url" - sub=${sub%.url} # -> "tpls/foo" - path=$(git config --file .gitmodules --get "submodule.$sub.path") - lib=$(basename "$path") # -> "foo" - dest="$tpls_dir/$lib" - - echo "Processing submodule $lib at path $path ..." - repo="$(git config --file=.gitmodules submodule.$path.url)" - echo "Repository URL: $repo" - - commit="$(lookup_tpls_sha "$path")" || { - echo "ERROR: could not resolve pinned commit for $path. Aborting $lib." >&2 - exit 1 - } - echo "Using commit $commit for $lib." - - echo "Cloning $lib@$commit from $repo into $dest ..." - git clone --no-checkout --filter=tree:0 "$repo" "$dest" \ - && git -C "$dest" fetch --depth 1 origin "$commit" \ - && git -C "$dest" checkout --detach FETCH_HEAD \ - || { echo "Failed to clone $lib"; continue; } - done - (return 0 2>/dev/null) && return 0 || exit 0 -fi - if $install_all; then LLVM_INSTALL_PREFIX=${LLVM_INSTALL_PREFIX:-/opt/llvm} PYBIND11_INSTALL_PREFIX=${PYBIND11_INSTALL_PREFIX:-/usr/local/pybind11} diff --git a/scripts/nvqc_launch.sh b/scripts/nvqc_launch.sh deleted file mode 100644 index 81b5ea45691..00000000000 --- a/scripts/nvqc_launch.sh +++ /dev/null @@ -1,39 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -cat /opt/nvidia/cudaq/build_info.txt - -# Launch script: launch cudaq-qpud (nvcf mode) with MPI ranks == Number of NVIDIA GPUs -# IMPORTANT: -# (1) NVCF function must set container environment variable `NUM_GPUS` -# equal to the number of GPUs on the target platform. This will allow clients to query -# the function capability (number of GPUs) by looking at function info. The below -# entry point script helps prevent mis-configuration by checking that functions are -# created and deployed appropriately. -# (2) NVCF function must set container environment variable `NVQC_REST_PAYLOAD_VERSION` equal -# to the RestRequest payload version with which `cudaq-qpud` in the deployment Docker image was compiled. -# Failure to do so will result in early exits of the entry point command, thus deployment failure. -EXPECTED_REST_PAYLOAD_VERSION="$(cudaq-qpud --type nvcf --schema-version | grep -o "CUDA-Q REST API version: \S*" | cut -d ":" -f 2 | tr -d " ")" -if [[ "$NVQC_REST_PAYLOAD_VERSION" != "$EXPECTED_REST_PAYLOAD_VERSION" ]]; then - echo "Invalid Deployment: NVQC_REST_PAYLOAD_VERSION environment variable ($NVQC_REST_PAYLOAD_VERSION) does not match cudaq-qpud (expected $EXPECTED_REST_PAYLOAD_VERSION)." - exit 1 -fi - -python3 /nvqc_scripts/nvqc_proxy.py & - -NUM_ACTUAL_GPUS=$(nvidia-smi --list-gpus | wc -l) -if [[ "$NUM_GPUS" == "$NUM_ACTUAL_GPUS" ]]; then - cd /tmp - CMDSTR="mpiexec -np $NUM_ACTUAL_GPUS cudaq-qpud --type nvcf --port 3031" - while true; do - echo "export PATH=${PATH}; $CMDSTR" | sudo su -s /bin/bash nobody - done -else - echo "Invalid Deployment: Number of GPUs does not match the hardware" - exit 1 -fi diff --git a/scripts/release.sh b/scripts/release.sh index 9980f0f808a..429dc68fadc 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -51,7 +51,7 @@ done # Check that all three workflows above completed successfully. # Once the publishing completes, you should see a draft release on GitHub for the new version. -# Check that all nightly integration tests and NVQC regression tests are enabled and run successfully with the release image. +# Check that all nightly integration tests are enabled and run successfully with the release image. # Work with QA to get the release candidate fully validated. # Go to the draft release on GitHub and download the python wheels and metapackages in the draft release. diff --git a/scripts/validate_container.sh b/scripts/validate_container.sh index 301cbc62454..53149eaadbf 100644 --- a/scripts/validate_container.sh +++ b/scripts/validate_container.sh @@ -81,7 +81,7 @@ available_backends=`\ qpu=${platform##* } requirements=$(cat $file | grep "gpu-requirements:") gpus=${requirements##* } - if [ "${qpu}" != "remote_rest" ] && [ "${qpu}" != "NvcfSimulatorQPU" ] \ + if [ "${qpu}" != "remote_rest" ] \ && [ "${qpu}" != "fermioniq" ] && [ "${qpu}" != "orca" ] \ && [ "${qpu}" != "pasqal" ] && [ "${qpu}" != "quera" ] \ && ($gpu_available || [ -z "$gpus" ] || [ "${gpus,,}" == "false" ]); then \ @@ -217,11 +217,6 @@ do echo "Skipping $t target due to incomplete MPI installation."; echo ":white_flag: $filename: Incomplete MPI installation. Test skipped." >> "${tmpFile}_$(echo $t | tr - _)" continue - - else - # TODO: remove this once the nvqc backend is part of the validation - # tracked in https://github.com/NVIDIA/cuda-quantum/issues/1283 - target_flag+=" --enable-mlir" fi fi @@ -409,8 +404,8 @@ fi # Python snippet validation if [ -d "snippets/" ]; then - # Skip NVQC and multi-GPU snippets. - for ex in `find snippets/ -name '*.py' -not -path '*/nvqc/*' -not -path '*/multi_gpu_workflows/*' | sort`; + # Skip multi-GPU snippets. + for ex in `find snippets/ -name '*.py' -not -path '*/multi_gpu_workflows/*' | sort`; do filename=$(basename -- "$ex") filename="${filename%.*}" diff --git a/scripts/validate_pycudaq.sh b/scripts/validate_pycudaq.sh index d13bac2068f..145085de35e 100644 --- a/scripts/validate_pycudaq.sh +++ b/scripts/validate_pycudaq.sh @@ -32,7 +32,6 @@ # Note: To run the target tests, make sure to set all necessary API keys: # COPY docs/sphinx/targets/python /tmp/targets/ -# ENV NVQC_API_KEY=... # ENV ... __optind__=$OPTIND @@ -177,7 +176,7 @@ fi # Run snippets in docs # Some snippets generate plots python3 -m pip install --user matplotlib -for ex in `find "$root_folder/snippets" -name '*.py' -not -path '*/nvqc/*'`; do +for ex in `find "$root_folder/snippets" -name '*.py'`; do echo "Executing $ex" python3 "$ex" if [ ! $? -eq 0 ]; then @@ -185,16 +184,6 @@ for ex in `find "$root_folder/snippets" -name '*.py' -not -path '*/nvqc/*'`; do status_sum=$((status_sum+1)) fi done -if [ -n "${NVQC_API_KEY}" ]; then - for ex in `find "$root_folder/snippets" -name '*.py' -path '*/nvqc/*'`; do - echo "Executing $ex" - python3 "$ex" - if [ ! $? -eq 0 ]; then - echo -e "\e[01;31mFailed to execute $ex.\e[0m" >&2 - status_sum=$((status_sum+1)) - fi - done -fi # Run examples # Some examples generate plots @@ -208,9 +197,6 @@ for ex in `find "$root_folder/examples" -name '*.py'`; do # to submit a (paid) job to Amazon Braket (includes QuEra). echo -e "\e[01;31mWarning: Explicitly set target braket or quera in $ex; skipping validation due to paid submission.\e[0m" >&2 skip_example=true - elif [ "$t" == "nvqc" ] && [ -z "${NVQC_API_KEY}" ]; then - echo -e "\e[01;31mWarning: Explicitly set target nvqc in $ex; skipping validation due to missing API key.\e[0m" >&2 - skip_example=true elif [ "$t" == "pasqal" ] && [ -z "${PASQAL_PASSWORD}" ]; then echo -e "\e[01;31mWarning: Explicitly set target pasqal in $ex; skipping validation due to missing token.\e[0m" >&2 skip_example=true @@ -246,9 +232,6 @@ if [ -d "$root_folder/targets" ]; then elif [ "$t" == "oqc" ] && [ -z "${OQC_URL}" ]; then echo -e "\e[01;31mWarning: Explicitly set target oqc in $ex; skipping validation due to missing URL.\e[0m" >&2 skip_example=true - elif [ "$t" == "nvqc" ] && [ -z "${NVQC_API_KEY}" ]; then - echo -e "\e[01;31mWarning: Explicitly set target nvqc in $ex; skipping validation due to missing API key.\e[0m" >&2 - skip_example=true elif [ "$t" == "pasqal" ] && [ -z "${PASQAL_PASSWORD}" ]; then echo -e "\e[01;31mWarning: Explicitly set target pasqal in $ex; skipping validation due to missing token.\e[0m" >&2 skip_example=true diff --git a/targettests/Remote-Sim/args_synthesis.cpp b/targettests/Remote-Sim/args_synthesis.cpp index bf09524ce2d..f7e3b7134d0 100644 --- a/targettests/Remote-Sim/args_synthesis.cpp +++ b/targettests/Remote-Sim/args_synthesis.cpp @@ -13,8 +13,7 @@ // clang-format on // This is a comprehensive set of tests for kernel argument synthesis for remote -// platforms. Note: we use the remote-mqpu platform in MLIR mode as a mock -// environment for NVQC. +// platforms. #include #include diff --git a/tools/cudaq-qpud/CMakeLists.txt b/tools/cudaq-qpud/CMakeLists.txt index 7fc3769ad58..4c74b7a1cea 100644 --- a/tools/cudaq-qpud/CMakeLists.txt +++ b/tools/cudaq-qpud/CMakeLists.txt @@ -33,5 +33,3 @@ export_executable_symbols_for_plugins(${TOOL_NAME}) install(TARGETS ${TOOL_NAME} DESTINATION bin) install(FILES ${TOOL_NAME}.py DESTINATION bin) -install(FILES nvqc_proxy.py DESTINATION bin) -install(FILES json_request_runner.py DESTINATION bin) diff --git a/tools/cudaq-qpud/README.md b/tools/cudaq-qpud/README.md index 9e43f25d738..e399cd1d8f4 100644 --- a/tools/cudaq-qpud/README.md +++ b/tools/cudaq-qpud/README.md @@ -1,19 +1,18 @@ -# `remote-mqpu` / `nvqc` Debugging Tips +# `remote-mqpu` Debugging Tips This file contains tips and tricks for when you are performing manual testing/ -debugging for `remote-mqpu` or `nvqc` targets. This file is primarily intended +debugging for `remote-mqpu` targets. This file is primarily intended for **CUDA-Q developers, not end users**. See the user-facing docs here: - [`remote-mqpu`](https://nvidia.github.io/cuda-quantum/latest/using/backends/platform.html#remote-mqpu-platform) -- [`nvqc`](https://nvidia.github.io/cuda-quantum/latest/using/backends/nvqc.html) ## Fully local within `cuda-quantum-dev` container The first step is usually to run the server in a separate window from the client by disabling any sort of auto-launch capabilities. -1. In one window, launch `cudaq-qpud --port 3030 --type nvcf`. You may also +1. In one window, launch `cudaq-qpud --port 3030`. You may also prefix this command with `CUDAQ_LOG_LEVEL=info` to turn on additional logging in the server. 2. @@ -21,82 +20,3 @@ client by disabling any sort of auto-launch capabilities. something like this: `cudaq.set_target('remote-mqpu', url='localhost:3030')`. - If you are using C++, change your `nvq++` command to something like this: `nvq++ --target remote-mqpu --remote-mqpu-url localhost:3030`. - -Note: when you run the server with `--type nvcf`, that means that the -`cudaq-qpud` process will shut down after every invocation, so you will have to -manually restart it if you want to invoke it again. - -## Fully local testing using a true NVQC image - -If you want to run the server in a fully "contained" environment like it is run -for NVQC, then you can perform the following steps. - -_Note: the following steps use a Docker image tag that is primarily intended -for CUDA-Q developers, not end users. End users can still use these -instructions to do any testing they would like, but it is recommended that they -choose a different Docker tag name._ - -1. Build your NVQC server Docker container using this command: `docker build -t nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom -f docker/release/cudaq.nvqc.Dockerfile .` -2. Launch the server on your local machine: `docker run -it --rm --gpus all --network=host -e NVQC_REST_PAYLOAD_VERSION=1.1 -e NUM_GPUS=1 -e WATCHDOG_TIMEOUT_SEC=3600 -e RUN_AS_NOBODY=1 nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom` - - Note: You need to set the environment variables as intended for your - environment. If you are running on a multi-GPU machine, you may - want to set `NUM_GPUS=4` (updating `4` to the correct number for your - machine) and use something like `--gpus '"device=0,1,2,4"'` (in case your - machine has more GPUs than you want activated with `NUM_GPUS`.) -3. Get the IP address of your host by running `ifconfig`. For the following - steps, let's assume your IP address is `172.31.123.45`. -4. Similar to step 2 in the section above, but changing `localhost` to the - correct IP address obtained in step 3: - - If you are using Python, change your `cudaq.set_target` line to be - something like this: `cudaq.set_target('remote-mqpu', url='172.31.123.45:3030')`. - - If you are using C++, change your `nvq++` command to something like this: - `nvq++ --target remote-mqpu --remote-mqpu-url 172.31.123.45:3030`. - -## Running your own image on the NVQC server - -_Note: the following steps use a Docker image tag that is primarily intended -for CUDA-Q developers, not end users. End users can still use these -instructions to do any testing they would like, but it is recommended that they -choose a different Docker tag name._ - -1. After building your `nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom` - image, you can `docker push` it (assuming you have authorized credentials). -2. Either use the `ngc.nvidia.com` Web GUI to deploy your function, or use the - `ngc-cli`. Both are documented [here](https://docs.nvidia.com/cloud-functions/user-guide/latest/cloud-function/function-deployment.html#deploying-a-function). -3. When done, un-deploy your function and remove your custom image using a command like this: `ngc registry image remove nvcr.io/pnyjrcojiblh/cuda-quantum/cuda-quantum:custom`. - -## Special notes about running Python code on the server (`CUDAQ_SER_CODE_EXEC` / `serializedCodeExecutionContext`) - -Remote Python execution does not actually use `cudaq-qpud` to execute Python -code. That is actually achieved by `tools/cudaq-qpud/nvqc_proxy.py` and -`tools/cudaq-qpud/json_request_runner.py`. `nvqc_proxy.py` is a proxy that sits -at the front end of the NVQC server. It runs on port `3030` and redirects all -`cudaq-qpud`-bound traffic to port `3031`. However, if the JSON request contains -a `serializedCodeExecutionContext` field, then the request is sent to -`json_request_runner.py`, not `cudaq-qpud`. - -The following diagram shows the high-level sequence for these operations. - -```mermaid -sequenceDiagram -User Program ->> CUDA-Q : Job request -CUDA-Q ->> CUDA-Q : Serialize data -CUDA-Q ->> nvqc_proxy : Submit program and data -alt If request contains 'serializedCodeExecutionContext' - nvqc_proxy ->> nvqc_proxy : Write Python code to temp file - nvqc_proxy ->> json_request_runner : subprocess.run - json_request_runner ->> json_request_runner : Deserialize data - json_request_runner ->> json_request_runner : Execute program - json_request_runner ->> json_request_runner : Write results to temp file - json_request_runner ->> json_request_runner : Shutdown - nvqc_proxy ->> nvqc_proxy : Read results from temp file -else 'serializedCodeExecutionContext' does not exist - nvqc_proxy ->> cudaq-qpud : Request - cudaq-qpud ->> cudaq-qpud : Process - cudaq-qpud ->> nvqc_proxy : Results - cudaq-qpud ->> cudaq-qpud : Shutdown and restart -end -nvqc_proxy ->> CUDA-Q : Results -CUDA-Q ->> CUDA-Q : Deserialize data -CUDA-Q ->> User Program : Results -``` diff --git a/tools/cudaq-qpud/RestServerMain.cpp b/tools/cudaq-qpud/RestServerMain.cpp index 9065e6bc871..1e287ca4bed 100644 --- a/tools/cudaq-qpud/RestServerMain.cpp +++ b/tools/cudaq-qpud/RestServerMain.cpp @@ -81,9 +81,6 @@ int main(int argc, char **argv) { cudaq::registry::get(serverSubType); if (printRestPayloadVersion) { - // IMPORTANT: Don't change this message without updating - // `scripts/nvqc_launch.sh`, which relies on the this information to perform - // deployment sanity check. printf("\nCUDA-Q REST API version: %d.%d\n", restServer->version().first, restServer->version().second); return 0; diff --git a/tools/cudaq-qpud/json_request_runner.py b/tools/cudaq-qpud/json_request_runner.py deleted file mode 100644 index fa0db9d1f9b..00000000000 --- a/tools/cudaq-qpud/json_request_runner.py +++ /dev/null @@ -1,173 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -# Set `MPLCONFIGDIR` if running as nobody in order to prevent a warning message -# that is telling the truth about extended loading times. -import os -if 'nonexistent' in os.environ['HOME']: - os.environ['MPLCONFIGDIR'] = os.getcwd() - -import cudaq -import sys -import json -import subprocess -import importlib -from datetime import datetime -import re - -# Pattern to detect ANSI escape color code in the error message -ANSI_PATTERN = re.compile(r'\x1B[@-_][0-?]*[ -/]*[@-~]') - - -def get_deserialized_dict(scoped_dict): - deserialized_dict = {} - - # If the scoped_dict is one big JSON string, then load it into a - # dictionary-like object. - if isinstance(scoped_dict, str): - scoped_dict = json.loads(scoped_dict) - - # Do two passes. Save the unpacking of cudaq.kernels for the second pass so - # that they can see and utilize global variables unpacked in the first pass. - for p in range(2): - isFirstPass = (p == 0) - for key, val in scoped_dict.items(): - isKernel = "/" in key and ".PyKernelDecorator" in key - try: - if "/" in key and ((isFirstPass and not isKernel) or - (not isFirstPass is isKernel)): - key, val_type = key.split('/') - if val_type.startswith('cudaq.'): - module_name, type_name = val_type.rsplit('.', 1) - module = importlib.import_module(module_name) - type_class = getattr(module, type_name) - if isFirstPass: - result = type_class.from_json(json.dumps(val)) - else: - result = type_class.from_json( - json.dumps(val), deserialized_dict) - deserialized_dict[key] = result - else: - raise Exception(f'Invalid val_type in key: {val_type}') - elif isFirstPass: - deserialized_dict[key] = val - except Exception as e: - raise Exception(f"Error deserializing key '{key}': {e}") - - return deserialized_dict - - -if __name__ == "__main__": - try: - requestStart = int(datetime.now().timestamp() * 1000) - - # Expected command-line arguments: - # `sys.argv[0] = json_request_runner.py` - # `sys.argv[1] = ` - # `sys.argv[2] = --use-mpi=<0|1>` - if '--use-mpi=1' in sys.argv: - cudaq.mpi.initialize() - - # Read request - if len(sys.argv) < 3: - raise (Exception('Too few command-line arguments')) - jsonFile = sys.argv[1] - with open(jsonFile, 'rb') as fp: - request = json.load(fp) - - serialized_ctx = request['serializedCodeExecutionContext'] - source_code = serialized_ctx['source_code'] - - # Limit imports for the user code to a small subset of possible imports. - imports_code = '\n'.join([ - 'import cudaq', 'from cudaq import spin', 'import math', - 'import numpy', 'import numpy as np', - 'from typing import List, Tuple' - ]) - - # Be sure to do this before running any code from `serialized_ctx` - globals_dict = get_deserialized_dict(serialized_ctx['scoped_var_dict']) - - # Determine which target to set - sim2target = { - 'qpp': 'qpp-cpu', - 'custatevec_fp32': 'nvidia', - 'custatevec_fp64': 'nvidia-fp64', - 'tensornet': 'tensornet', - 'tensornet_mps': 'tensornet-mps', - 'dm': 'density-matrix-cpu', - 'nvidia_mgpu': 'nvidia-mgpu', - 'nvidia_mqpu': 'nvidia-mqpu', - 'nvidia_mqpu-fp64': 'nvidia-mqpu-fp64' - } - simulator_name = request['simulator'] - simulator_name = simulator_name.replace('-', '_') - target_name = sim2target[simulator_name] - - # Validate the full source code - full_source = f'{imports_code}\n{source_code}' - # TODO: validate - - # Execute imports - exec(imports_code, globals_dict) - - # Perform setup - exec(f'cudaq.set_target("{target_name}")', globals_dict) - seed_num = int(request['seed']) - if seed_num > 0: - exec(f'cudaq.set_random_seed({seed_num})', globals_dict) - - # Initialize output dictionary - result = { - "status": "success", - "executionContext": { - "shots": 0, - "hasConditionalsOnMeasureResults": False - } - } - globals_dict['_json_request_result'] = result - - # Execute main source_code - simulationStart = int(datetime.now().timestamp() * 1000) - if target_name == 'nvidia-mgpu' or ( - not cudaq.mpi.is_initialized()) or cudaq.mpi.rank() == 0: - exec(source_code, globals_dict) - simulationEnd = int(datetime.now().timestamp() * 1000) - - # Collect results - result = globals_dict['_json_request_result'] - try: - cmd_result = subprocess.run(['cudaq-qpud', '--cuda-properties'], - capture_output=True, - text=True) - deviceProps = json.loads(cmd_result.stdout) - except: - deviceProps = dict() - - executionInfo = { - 'requestStart': requestStart, - 'simulationStart': simulationStart, - 'simulationEnd': simulationEnd, - 'deviceProps': deviceProps - } - result['executionInfo'] = executionInfo - except Exception as e: - error_message = ANSI_PATTERN.sub('', str(e)) - result = { - 'status': 'Failed to process incoming request', - 'errorMessage': error_message - } - finally: - # Only rank 0 prints the result - if not (cudaq.mpi.is_initialized()) or (cudaq.mpi.rank() == 0): - with open(jsonFile, 'w') as fp: - json.dump(result, fp) - fp.flush() - - if cudaq.mpi.is_initialized(): - cudaq.mpi.finalize() diff --git a/tools/cudaq-qpud/nvqc_proxy.py b/tools/cudaq-qpud/nvqc_proxy.py deleted file mode 100644 index ea9933e537a..00000000000 --- a/tools/cudaq-qpud/nvqc_proxy.py +++ /dev/null @@ -1,283 +0,0 @@ -# ============================================================================ # -# Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. # -# All rights reserved. # -# # -# This source code and the accompanying materials are made available under # -# the terms of the Apache License 2.0 which accompanies this distribution. # -# ============================================================================ # - -from http import HTTPStatus -import http.server -import json -import requests -import socketserver -import sys -import time -import json -import subprocess -import os -import tempfile -import shutil -import pathlib - -# This reverse proxy application is needed to span the small gaps when -# `cudaq-qpud` is shutting down and starting up again. This small reverse proxy -# allows the NVCF port (3030) to remain up while allowing the main `cudaq-qpud` -# application to restart if necessary. -PROXY_PORT = 3030 -QPUD_PORT = 3031 # see `scripts/nvqc_launch.sh` - -NUM_GPUS = 0 -MPI_FOUND = False -WATCHDOG_TIMEOUT_SEC = 0 -RUN_AS_NOBODY = False # Expect this to be overridden to true for NVQC deployment -SUDO_FOUND = False -CUDAQ_SER_CODE_EXEC = False - - -def build_command_list(temp_file_name: str) -> list[str]: - """ - Build the command essentially from right to left, pre-pending wrapper - commands as necessary for this invocation. - """ - current_script_path = os.path.abspath(__file__) - json_req_path = os.path.join(os.path.dirname(current_script_path), - 'json_request_runner.py') - cmd_list = [sys.executable, json_req_path, temp_file_name] - if NUM_GPUS > 1 and MPI_FOUND: - cmd_list = ['mpiexec', '--allow-run-as-root', '-np', - str(NUM_GPUS)] + cmd_list - cmd_list += ['--use-mpi=1'] # `--use-mpi` must come at the end - else: - cmd_list += ['--use-mpi=0'] # `--use-mpi` must come at the end - # The timeout must be inside the `su`/`sudo` commands in order to function. - if WATCHDOG_TIMEOUT_SEC > 0: - cmd_list = ['timeout', str(WATCHDOG_TIMEOUT_SEC)] + cmd_list - if RUN_AS_NOBODY: - cmd_list = ['su', '-s', '/bin/bash', 'nobody', '-c', ' '.join(cmd_list)] - if SUDO_FOUND: - cmd_list = ['sudo'] + cmd_list - - return cmd_list - - -class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): - """Handle requests in a separate thread.""" - - -class Server(http.server.SimpleHTTPRequestHandler): - protocol_version = 'HTTP/1.1' - default_request_version = 'HTTP/1.1' - - # Override this function because we seem to be getting a lot of - # ConnectionResetError exceptions in the health monitoring endpoint, - # producing lots of ugly stack traces in the logs. Hopefully this will - # reduce them. - def handle_one_request(self): - try: - super().handle_one_request() - except ConnectionResetError as e: - if self.path != '/': - print(f"Connection was reset by peer: {e}") - except Exception as e: - print(f"Unhandled exception: {e}") - - def log_message(self, format, *args): - # Don't log the health endpoint queries - if len(args) > 0 and args[0] != "GET / HTTP/1.1": - super().log_message(format, *args) - - def do_GET(self): - # Allow the proxy to automatically handle the health endpoint. The proxy - # will exit if the application's /job endpoint is down. - if self.path == '/': - self.send_response(HTTPStatus.OK) - self.send_header('Content-Type', 'application/json') - message = json.dumps({"status": "OK"}).encode('utf-8') - self.send_header("Content-Length", str(len(message))) - self.end_headers() - self.wfile.write(message) - else: - self.send_response(HTTPStatus.NOT_FOUND) - self.send_header("Content-Length", "0") - self.end_headers() - - def is_serialized_code_execution_request(self, request_json): - return 'serializedCodeExecutionContext' in request_json and 'source_code' in request_json[ - 'serializedCodeExecutionContext'] and request_json[ - 'serializedCodeExecutionContext']['source_code'] != '' - - def write_asset_if_necessary(self, message): - """ - If the output message is too large, and if the proxy is servicing NVCF - requests, then write the original message to a file and modify the - outgoing message to reference that new file. - """ - if 'NVCF-MAX-RESPONSE-SIZE-BYTES' in self.headers: - max_response_len = int(self.headers['NVCF-MAX-RESPONSE-SIZE-BYTES']) - if len(message) > max_response_len: - try: - outputDir = self.headers['NVCF-LARGE-OUTPUT-DIR'] - reqId = self.headers['NVCF-REQID'] - resultFile = f'{outputDir}/{reqId}_result.json' - with open(resultFile, 'wb') as fp: - fp.write(message) - fp.flush() - result = {'resultFile': resultFile} - message = json.dumps(result).encode('utf-8') - except Exception as e: - result = { - 'status': 'Exception during output processing', - 'errorMessage': str(e) - } - message = json.dumps(result).encode('utf-8') - return message - - def read_asset_if_necessary(self, request_data): - """ - If there is an asset ID in the headers, replace the incoming message - with the contents of a file read from disk. - """ - asset_id = self.headers.get('NVCF-FUNCTION-ASSET-IDS', '') - if len(asset_id) > 0: - try: - asset_dir = self.headers['NVCF-ASSET-DIR'] - filename = f'{asset_dir}/{asset_id}' - with open(filename, 'rb') as f: - request_data = f.read() - except Exception: - # If something failed, simply forward the original message - pass - return request_data - - def do_POST(self): - if self.path == '/job': - qpud_up = False - retries = 0 - qpud_url = 'http://localhost:' + str(QPUD_PORT) - while (not qpud_up): - try: - ping_response = requests.get(qpud_url) - qpud_up = (ping_response.status_code == HTTPStatus.OK) - except: - qpud_up = False - if not qpud_up: - retries += 1 - if retries > 100: - print("PROXY EXIT: TOO MANY RETRIES!") - sys.exit() - print( - "Main application is down, retrying (retry_count = {})..." - .format(retries)) - time.sleep(0.1) - - content_length = int(self.headers['Content-Length']) - if content_length: - # Look for any asset references in the job request. If one - # exists, then that means the request is actually in a file. - request_data = self.rfile.read(content_length) - request_data = self.read_asset_if_necessary(request_data) - request_json = json.loads(request_data) - - if self.is_serialized_code_execution_request(request_json): - if CUDAQ_SER_CODE_EXEC: - result = {'status': 'uninitialized', 'errorMessage': ''} - with tempfile.NamedTemporaryFile( - dir=temp_dir, delete=False) as temp_file: - temp_file.write(request_data) - temp_file.flush() - - # Make it world writable so that the `subprocess` can write - # the results to the file. - os.chmod(temp_file.name, 0o666) - - # We also must get to a directory where "nobody" can see (in - # order to make MPI happy) - save_dir = os.getcwd() - os.chdir(pathlib.Path(temp_file.name).parent) - cmd_list = build_command_list(temp_file.name) - cmd_result = subprocess.run(cmd_list, - capture_output=False, - text=True) - - with open(temp_file.name, 'rb') as fp: - result = json.load(fp) - - if cmd_result.returncode == 124: - result = { - 'status': - 'json_request_runner.py time out', - 'errorMessage': - 'Timeout occurred during execution' - } - - # Cleanup - os.chdir(save_dir) - if RUN_AS_NOBODY: - if SUDO_FOUND: - os.system('sudo pkill -9 -u nobody') - else: - os.system('pkill -9 -u nobody') - os.remove(temp_file.name) - else: - result = { - 'status': - 'Invalid Request', - 'errorMessage': - 'Server does not support serializedCodeExecutionContext at this time' - } - - self.send_response(HTTPStatus.OK) - self.send_header('Content-Type', 'application/json') - message = json.dumps(result).encode('utf-8') - message = self.write_asset_if_necessary(message) - self.send_header('Content-Length', str(len(message))) - self.end_headers() - self.wfile.write(message) - else: - res = requests.request(method=self.command, - url=qpud_url + self.path, - headers=self.headers, - data=request_data) - self.send_response(HTTPStatus.OK) - self.send_header('Content-Type', 'application/json') - message = json.dumps(res.json()).encode('utf-8') - self.send_header("Content-Length", str(len(message))) - self.end_headers() - self.wfile.write(message) - else: - self.send_response(HTTPStatus.BAD_REQUEST) - self.send_header("Content-Length", "0") - self.end_headers() - else: - self.send_response(HTTPStatus.NOT_FOUND) - self.send_header("Content-Length", "0") - self.end_headers() - - -if __name__ == "__main__": - try: - NUM_GPUS = int(subprocess.getoutput('nvidia-smi --list-gpus | wc -l')) - if 'NUM_GPUS' in os.environ: - NUM_GPUS = min(NUM_GPUS, int(os.environ['NUM_GPUS'])) - except: - NUM_GPUS = 0 - MPI_FOUND = (shutil.which('mpiexec') != None) - SUDO_FOUND = (shutil.which('sudo') != None) - WATCHDOG_TIMEOUT_SEC = int( - os.environ.get('WATCHDOG_TIMEOUT_SEC', WATCHDOG_TIMEOUT_SEC)) - RUN_AS_NOBODY = int(os.environ.get('RUN_AS_NOBODY', 0)) > 0 - CUDAQ_SER_CODE_EXEC = int( - os.environ.get('CUDAQ_SER_CODE_EXEC', CUDAQ_SER_CODE_EXEC)) > 0 - - temp_dir = tempfile.gettempdir() - if RUN_AS_NOBODY: - temp_dir = os.path.join(temp_dir, 'nvqc_proxy') - os.makedirs(temp_dir, exist_ok=True) - os.chmod(temp_dir, 0o777) # Allow "nobody" to write to this directory. - - Handler = Server - with ThreadedHTTPServer(("", PROXY_PORT), Handler) as httpd: - print("Serving at port", PROXY_PORT) - print("Forward to port", QPUD_PORT) - httpd.serve_forever() diff --git a/tools/nvqpp/nvq++.in b/tools/nvqpp/nvq++.in index e0f3c98774e..26420c824b7 100644 --- a/tools/nvqpp/nvq++.in +++ b/tools/nvqpp/nvq++.in @@ -646,13 +646,6 @@ COMPILER_FLAGS="${CPPSTD} ${COMPILER_FLAGS}" # the resultant binary to target that specified backend. OBJS_TO_MERGE="" if [ -n "${TARGET_CONFIG}" ]; then - # Disable compilation on non-x86 machines when targetting NVQC. - # See https://github.com/NVIDIA/cuda-quantum/issues/1345 for current status. - if [ "${TARGET_CONFIG}" == "nvqc" ]; then - if [ "${HOST_TARGET:0:6}" != "x86_64" ]; then - error_exit "Cannot use nvqc target from non-x86_64 client at this time" - fi - fi TARGET_CONFIG_YML_FILE="${install_dir}/targets/${TARGET_CONFIG}.yml" GEN_TARGET_BACKEND=false if [ -f "${TARGET_CONFIG_YML_FILE}" ]; then