From b4b06192b01dcffb57318e710f8f38eaebbca9c1 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 5 Mar 2025 16:17:06 -0600 Subject: [PATCH 01/17] cuml-cpu: enable conda import tests, update dependencies to match cuml --- ci/build_python.sh | 5 ++--- conda/recipes/cuml-cpu/meta.yaml | 13 +++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/build_python.sh b/ci/build_python.sh index 3730d826c5..4f5e094516 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -30,14 +30,13 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry build \ sccache --show-adv-stats -# Build cuml-cpu only in CUDA 11 jobs since it only depends on python +# Build cuml-cpu only in CUDA 12 jobs since it only depends on python # version RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" -if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then +if [[ ${RAPIDS_CUDA_MAJOR} == "12" ]]; then sccache --zero-stats RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry build \ - --no-test \ conda/recipes/cuml-cpu sccache --show-adv-stats diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml index a4c6950171..00d63ad7ea 100644 --- a/conda/recipes/cuml-cpu/meta.yaml +++ b/conda/recipes/cuml-cpu/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set py_version = environ['CONDA_PY'] %} @@ -33,14 +33,15 @@ requirements: - python x.x - numpy>=1.23,<3.0a0 - pandas - - scikit-learn=1.2 - - hdbscan>=0.8.38,<0.8.39 + - scikit-learn=1.5.* + - hdbscan>=0.8.39,<0.8.40 - umap-learn=0.5.6 - nvtx -tests: # [linux64] - imports: # [linux64] - - cuml # [linux64] +tests: + # test that the package is installable and these modules are importable + imports: + - cuml about: home: https://rapids.ai/ From 2c8f9071efe49a4085ab4e382af21ffc8b263338 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 5 Mar 2025 17:09:40 -0600 Subject: [PATCH 02/17] test key is 'test:' not 'tests:' --- conda/recipes/cuml-cpu/meta.yaml | 2 +- conda/recipes/cuml/meta.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml index 00d63ad7ea..a08b698150 100644 --- a/conda/recipes/cuml-cpu/meta.yaml +++ b/conda/recipes/cuml-cpu/meta.yaml @@ -38,7 +38,7 @@ requirements: - umap-learn=0.5.6 - nvtx -tests: +test: # test that the package is installable and these modules are importable imports: - cuml diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml index b76b05e060..ae083b738e 100644 --- a/conda/recipes/cuml/meta.yaml +++ b/conda/recipes/cuml/meta.yaml @@ -95,7 +95,7 @@ requirements: - treelite {{ treelite_version }} - rapids-logger =0.1 -tests: +test: requirements: - cuda-version ={{ cuda_version }} imports: From 796c15e1627f9f515c8085ac674869ebc23998ef Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 6 Mar 2025 11:24:50 -0600 Subject: [PATCH 03/17] declare dependency on 'packagin' --- conda/recipes/cuml-cpu/meta.yaml | 1 + conda/recipes/cuml/meta.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml index a08b698150..37fb5b0b61 100644 --- a/conda/recipes/cuml-cpu/meta.yaml +++ b/conda/recipes/cuml-cpu/meta.yaml @@ -32,6 +32,7 @@ requirements: run: - python x.x - numpy>=1.23,<3.0a0 + - packaging - pandas - scikit-learn=1.5.* - hdbscan>=0.8.39,<0.8.40 diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml index ae083b738e..0c97e0e56c 100644 --- a/conda/recipes/cuml/meta.yaml +++ b/conda/recipes/cuml/meta.yaml @@ -88,6 +88,7 @@ requirements: - libcuml ={{ version }} - libcumlprims ={{ minor_version }} - numpy >=1.23,<3.0a0 + - packaging - pylibraft ={{ minor_version }} - python x.x - raft-dask ={{ minor_version }} From dbd1a316832fb8c948065ec301ab9e532dc31434 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 7 Mar 2025 16:48:13 -0600 Subject: [PATCH 04/17] try putting cupy import in a TYPE_CHECKING block --- python/cuml/cuml/model_selection/_split.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/cuml/cuml/model_selection/_split.py b/python/cuml/cuml/model_selection/_split.py index 227f0eb297..6de2cb4a9e 100644 --- a/python/cuml/cuml/model_selection/_split.py +++ b/python/cuml/cuml/model_selection/_split.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. # -from typing import Optional, Union, List, Tuple +from typing import Optional, Union, List, Tuple, TYPE_CHECKING from cuml.common import input_to_cuml_array from cuml.internals.input_utils import ( @@ -31,12 +31,15 @@ cudf = gpu_only_import("cudf") cp = gpu_only_import("cupy") -cupyx = gpu_only_import("cupyx") np = cpu_only_import("numpy") cuda = gpu_only_import_from("numba", "cuda") +if TYPE_CHECKING: + import cupy as cp + + def _compute_stratify_split_indices( indices: cp.ndarray, stratify: CumlArray, From 6cdc2d51b8e06f8f256baecf5471ec02f1be1e6e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 10 Mar 2025 09:53:25 -0500 Subject: [PATCH 05/17] defer type hint evaluation for cupy types --- python/cuml/cuml/model_selection/_split.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cuml/cuml/model_selection/_split.py b/python/cuml/cuml/model_selection/_split.py index 6de2cb4a9e..d8e45348aa 100644 --- a/python/cuml/cuml/model_selection/_split.py +++ b/python/cuml/cuml/model_selection/_split.py @@ -41,12 +41,12 @@ def _compute_stratify_split_indices( - indices: cp.ndarray, + indices: "cp.ndarray", stratify: CumlArray, n_train: int, n_test: int, - random_state: cp.random.RandomState, -) -> Tuple[cp.ndarray]: + random_state: "cp.random.RandomState", +) -> "Tuple[cp.ndarray]": """ Compute the indices for stratified split based on stratify keys. Based on scikit-learn stratified split implementation. @@ -186,7 +186,7 @@ def train_test_split( train_size: Optional[Union[float, int]] = None, shuffle: bool = True, random_state: Optional[ - Union[int, cp.random.RandomState, np.random.RandomState] + Union[int, "cp.random.RandomState", np.random.RandomState] ] = None, stratify=None, ): From 9313e50a1a522164ccb3e2bcd9eb65dd02324eb1 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 10 Mar 2025 10:57:51 -0500 Subject: [PATCH 06/17] use deferred evaluation of type hints, make cupyx a GPU-only import --- .github/workflows/pr.yaml | 313 +++++++++--------- python/cuml/cuml/model_selection/_split.py | 18 +- .../cuml/cuml/preprocessing/LabelEncoder.py | 2 +- .../cuml/cuml/thirdparty_adapters/adapters.py | 6 +- 4 files changed, 168 insertions(+), 171 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 26b181f86f..b7f65b4645 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -13,124 +13,123 @@ jobs: # Please keep pr-builder as the top job here pr-builder: needs: - - check-nightly-ci - - changed-files - - checks - - clang-tidy + # - check-nightly-ci + # - changed-files + # - checks + # - clang-tidy - conda-cpp-build - - conda-cpp-tests - - conda-cpp-checks + # - conda-cpp-tests + # - conda-cpp-checks - conda-python-build - conda-python-tests-singlegpu - conda-python-tests-dask - conda-notebook-tests - docs-build - - telemetry-setup - - wheel-build-libcuml - - wheel-build-cuml - - wheel-tests-cuml - - devcontainer + # - telemetry-setup + # - wheel-build-libcuml + # - wheel-build-cuml + # - wheel-tests-cuml + # - devcontainer secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.04 if: always() with: needs: ${{ toJSON(needs) }} - telemetry-setup: - runs-on: ubuntu-latest - continue-on-error: true - env: - OTEL_SERVICE_NAME: "pr-cuml" - steps: - - name: Telemetry setup - if: ${{ vars.TELEMETRY_ENABLED == 'true' }} - uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main - check-nightly-ci: - # Switch to ubuntu-latest once it defaults to a version of Ubuntu that - # provides at least Python 3.11 (see - # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) - runs-on: ubuntu-24.04 - env: - RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - name: Check if nightly CI is passing - uses: rapidsai/shared-actions/check_nightly_success/dispatch@main - with: - repo: cuml - # TODO: remove this once upstream issues in RAFT are resolved on 11.4. - # The limit was temporarily increased to unblock work. - max_days_without_success: 30 - changed-files: - secrets: inherit - needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.04 - with: - files_yaml: | - test_cpp: - - '**' - - '!.devcontainer/**' - - '!.pre-commit-config.yaml' - - '!CONTRIBUTING.md' - - '!README.md' - - '!docs/**' - - '!img/**' - - '!notebooks/**' - - '!python/**' - - '!thirdparty/LICENSES/**' - test_notebooks: - - '**' - - '!.devcontainer/**' - - '!.pre-commit-config.yaml' - - '!CONTRIBUTING.md' - - '!README.md' - - '!thirdparty/LICENSES/**' - test_python: - - '**' - - '!.devcontainer/**' - - '!.pre-commit-config.yaml' - - '!CONTRIBUTING.md' - - '!README.md' - - '!docs/**' - - '!img/**' - - '!notebooks/**' - - '!thirdparty/LICENSES/**' - checks: - secrets: inherit - needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.04 - with: - enable_check_generated_files: false - ignored_pr_jobs: >- - optional-job-conda-python-tests-cudf-pandas-integration telemetry-summarize - clang-tidy: - needs: checks - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04 - with: - build_type: pull-request - node_type: "cpu8" - arch: "amd64" - container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10" - run_script: "ci/run_clang_tidy.sh" + # telemetry-setup: + # runs-on: ubuntu-latest + # continue-on-error: true + # env: + # OTEL_SERVICE_NAME: "pr-cuml" + # steps: + # - name: Telemetry setup + # if: ${{ vars.TELEMETRY_ENABLED == 'true' }} + # uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main + # check-nightly-ci: + # # Switch to ubuntu-latest once it defaults to a version of Ubuntu that + # # provides at least Python 3.11 (see + # # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) + # runs-on: ubuntu-24.04 + # env: + # RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # steps: + # - name: Check if nightly CI is passing + # uses: rapidsai/shared-actions/check_nightly_success/dispatch@main + # with: + # repo: cuml + # # TODO: remove this once upstream issues in RAFT are resolved on 11.4. + # # The limit was temporarily increased to unblock work. + # max_days_without_success: 30 + # changed-files: + # secrets: inherit + # needs: telemetry-setup + # uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.04 + # with: + # files_yaml: | + # test_cpp: + # - '**' + # - '!.devcontainer/**' + # - '!.pre-commit-config.yaml' + # - '!CONTRIBUTING.md' + # - '!README.md' + # - '!docs/**' + # - '!img/**' + # - '!notebooks/**' + # - '!python/**' + # - '!thirdparty/LICENSES/**' + # test_notebooks: + # - '**' + # - '!.devcontainer/**' + # - '!.pre-commit-config.yaml' + # - '!CONTRIBUTING.md' + # - '!README.md' + # - '!thirdparty/LICENSES/**' + # test_python: + # - '**' + # - '!.devcontainer/**' + # - '!.pre-commit-config.yaml' + # - '!CONTRIBUTING.md' + # - '!README.md' + # - '!docs/**' + # - '!img/**' + # - '!notebooks/**' + # - '!thirdparty/LICENSES/**' + # checks: + # secrets: inherit + # needs: telemetry-setup + # uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.04 + # with: + # enable_check_generated_files: false + # ignored_pr_jobs: >- + # optional-job-conda-python-tests-cudf-pandas-integration telemetry-summarize + # clang-tidy: + # needs: checks + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04 + # with: + # build_type: pull-request + # node_type: "cpu8" + # arch: "amd64" + # container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10" + # run_script: "ci/run_clang_tidy.sh" conda-cpp-build: - needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.04 with: build_type: pull-request - conda-cpp-tests: - needs: [conda-cpp-build, changed-files] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.04 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp - with: - build_type: pull-request - conda-cpp-checks: - needs: conda-cpp-build - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.04 - with: - build_type: pull-request - enable_check_symbols: true + # conda-cpp-tests: + # needs: [conda-cpp-build, changed-files] + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.04 + # if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp + # with: + # build_type: pull-request + # conda-cpp-checks: + # needs: conda-cpp-build + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.04 + # with: + # build_type: pull-request + # enable_check_symbols: true conda-python-build: needs: conda-cpp-build secrets: inherit @@ -138,7 +137,7 @@ jobs: with: build_type: pull-request conda-python-tests-singlegpu: - needs: [conda-python-build, changed-files] + needs: [conda-python-build] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python @@ -146,7 +145,7 @@ jobs: build_type: pull-request script: "ci/test_python_singlegpu.sh" optional-job-conda-python-tests-cudf-pandas-integration: - needs: [conda-python-build, changed-files] + needs: [conda-python-build] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python @@ -155,7 +154,7 @@ jobs: build_type: pull-request script: "ci/test_python_integration.sh" conda-python-tests-dask: - needs: [conda-python-build, changed-files] + needs: [conda-python-build] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python @@ -163,7 +162,7 @@ jobs: build_type: pull-request script: "ci/test_python_dask.sh" conda-notebook-tests: - needs: [conda-python-build, changed-files] + needs: [conda-python-build] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks @@ -183,55 +182,55 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - wheel-build-libcuml: - needs: checks - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 - with: - build_type: pull-request - branch: ${{ inputs.branch }} - sha: ${{ inputs.sha }} - date: ${{ inputs.date }} - script: ci/build_wheel_libcuml.sh - extra-repo: rapidsai/cumlprims_mg - extra-repo-sha: branch-25.04 - extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY - # build for every combination of arch and CUDA version, but only for the latest Python - matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) - wheel-build-cuml: - needs: [checks, wheel-build-libcuml] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 - with: - build_type: pull-request - script: ci/build_wheel_cuml.sh - wheel-tests-cuml: - needs: [wheel-build-cuml, changed-files] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.04 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python - with: - build_type: pull-request - script: ci/test_wheel.sh - devcontainer: - needs: telemetry-setup - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.04 - with: - arch: '["amd64"]' - cuda: '["12.8"]' - extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY - build_command: | - sccache -z; - build-all --verbose; - sccache -s; + # wheel-build-libcuml: + # needs: checks + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 + # with: + # build_type: pull-request + # branch: ${{ inputs.branch }} + # sha: ${{ inputs.sha }} + # date: ${{ inputs.date }} + # script: ci/build_wheel_libcuml.sh + # extra-repo: rapidsai/cumlprims_mg + # extra-repo-sha: branch-25.04 + # extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY + # # build for every combination of arch and CUDA version, but only for the latest Python + # matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + # wheel-build-cuml: + # needs: [checks, wheel-build-libcuml] + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 + # with: + # build_type: pull-request + # script: ci/build_wheel_cuml.sh + # wheel-tests-cuml: + # needs: [wheel-build-cuml, changed-files] + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.04 + # if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python + # with: + # build_type: pull-request + # script: ci/test_wheel.sh + # devcontainer: + # needs: telemetry-setup + # secrets: inherit + # uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.04 + # with: + # arch: '["amd64"]' + # cuda: '["12.8"]' + # extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY + # build_command: | + # sccache -z; + # build-all --verbose; + # sccache -s; - telemetry-summarize: - # This job must use a self-hosted runner to record telemetry traces. - runs-on: linux-amd64-cpu4 - needs: pr-builder - if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} - continue-on-error: true - steps: - - name: Telemetry summarize - uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main + # telemetry-summarize: + # # This job must use a self-hosted runner to record telemetry traces. + # runs-on: linux-amd64-cpu4 + # needs: pr-builder + # if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} + # continue-on-error: true + # steps: + # - name: Telemetry summarize + # uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main diff --git a/python/cuml/cuml/model_selection/_split.py b/python/cuml/cuml/model_selection/_split.py index d8e45348aa..6e452c93ab 100644 --- a/python/cuml/cuml/model_selection/_split.py +++ b/python/cuml/cuml/model_selection/_split.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -from typing import Optional, Union, List, Tuple, TYPE_CHECKING +from typing import Optional, Union, List, Tuple from cuml.common import input_to_cuml_array from cuml.internals.input_utils import ( determine_array_type, determine_df_obj_type, - output_to_df_obj_like, ) +from cuml.internals.output_utils import output_to_df_obj_like from cuml.internals.mem_type import MemoryType from cuml.internals.array import array_to_memory_order, CumlArray from cuml.internals.safe_imports import ( @@ -31,22 +32,19 @@ cudf = gpu_only_import("cudf") cp = gpu_only_import("cupy") +cupyx = gpu_only_import("cupyx") np = cpu_only_import("numpy") cuda = gpu_only_import_from("numba", "cuda") -if TYPE_CHECKING: - import cupy as cp - - def _compute_stratify_split_indices( - indices: "cp.ndarray", + indices: cp.ndarray, stratify: CumlArray, n_train: int, n_test: int, - random_state: "cp.random.RandomState", -) -> "Tuple[cp.ndarray]": + random_state: cp.random.RandomState, +) -> Tuple[cp.ndarray]: """ Compute the indices for stratified split based on stratify keys. Based on scikit-learn stratified split implementation. @@ -186,7 +184,7 @@ def train_test_split( train_size: Optional[Union[float, int]] = None, shuffle: bool = True, random_state: Optional[ - Union[int, "cp.random.RandomState", np.random.RandomState] + Union[int, cp.random.RandomState, np.random.RandomState] ] = None, stratify=None, ): diff --git a/python/cuml/cuml/preprocessing/LabelEncoder.py b/python/cuml/cuml/preprocessing/LabelEncoder.py index ebc25c7041..9b2d0e2a40 100644 --- a/python/cuml/cuml/preprocessing/LabelEncoder.py +++ b/python/cuml/cuml/preprocessing/LabelEncoder.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations from typing import TYPE_CHECKING @@ -21,7 +22,6 @@ from cuml.common.exceptions import NotFittedError from cuml.internals.safe_imports import ( cpu_only_import, - cpu_only_import_from, gpu_only_import, ) diff --git a/python/cuml/cuml/thirdparty_adapters/adapters.py b/python/cuml/cuml/thirdparty_adapters/adapters.py index 8963a7a497..6b6d818fed 100644 --- a/python/cuml/cuml/thirdparty_adapters/adapters.py +++ b/python/cuml/cuml/thirdparty_adapters/adapters.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,12 +14,10 @@ # limitations under the License. # -from cupyx.scipy import sparse as gpu_sparse from scipy import sparse as cpu_sparse from scipy.sparse import csc_matrix as cpu_coo_matrix from scipy.sparse import csc_matrix as cpu_csc_matrix from cuml.internals.safe_imports import cpu_only_import_from -from cupyx.scipy.sparse import csc_matrix as gpu_coo_matrix from cuml.internals.safe_imports import gpu_only_import_from from cuml.internals.global_settings import GlobalSettings from cuml.internals.input_utils import input_to_cupy_array, input_to_host_array @@ -28,6 +26,8 @@ np = cpu_only_import("numpy") cp = gpu_only_import("cupy") +gpu_sparse = gpu_only_import("cupyx.scipy.sparse") +gpu_coo_matrix = gpu_only_import_from("cupyx.scipy.sparse", "coo_matrix") gpu_csr_matrix = gpu_only_import_from("cupyx.scipy.sparse", "csr_matrix") gpu_csc_matrix = gpu_only_import_from("cupyx.scipy.sparse", "csc_matrix") cpu_csr_matrix = cpu_only_import_from("scipy.sparse", "csr_matrix") From a75cb8a1ff30bb82c6c70a0cd4c5f6180aa1f401 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 10 Mar 2025 12:20:14 -0500 Subject: [PATCH 07/17] try deferred evaluation with strings instead --- python/cuml/cuml/preprocessing/LabelEncoder.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/python/cuml/cuml/preprocessing/LabelEncoder.py b/python/cuml/cuml/preprocessing/LabelEncoder.py index 9b2d0e2a40..5b43085311 100644 --- a/python/cuml/cuml/preprocessing/LabelEncoder.py +++ b/python/cuml/cuml/preprocessing/LabelEncoder.py @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from __future__ import annotations - from typing import TYPE_CHECKING from cuml import Base @@ -209,7 +207,7 @@ def fit(self, y, _classes=None): self.dtype = y.dtype if y.dtype != cp.dtype("O") else str return self - def transform(self, y) -> cudf.Series: + def transform(self, y) -> "cudf.Series": """ Transform an input into its categorical keys. @@ -244,7 +242,7 @@ def transform(self, y) -> cudf.Series: return encoded - def fit_transform(self, y, z=None) -> cudf.Series: + def fit_transform(self, y, z=None) -> "cudf.Series": """ Simultaneously fit and transform an input @@ -260,7 +258,7 @@ def fit_transform(self, y, z=None) -> cudf.Series: return y.cat.codes - def inverse_transform(self, y: cudf.Series) -> cudf.Series: + def inverse_transform(self, y: cudf.Series) -> "cudf.Series": """ Revert ordinal label to original label From ea05d3440fc54c63bcc1ea1cbd8f52dfb9539a64 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 10 Mar 2025 13:31:48 -0500 Subject: [PATCH 08/17] one more hint --- python/cuml/cuml/preprocessing/LabelEncoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/cuml/preprocessing/LabelEncoder.py b/python/cuml/cuml/preprocessing/LabelEncoder.py index 5b43085311..37e2099171 100644 --- a/python/cuml/cuml/preprocessing/LabelEncoder.py +++ b/python/cuml/cuml/preprocessing/LabelEncoder.py @@ -258,7 +258,7 @@ def fit_transform(self, y, z=None) -> "cudf.Series": return y.cat.codes - def inverse_transform(self, y: cudf.Series) -> "cudf.Series": + def inverse_transform(self, y: "cudf.Series") -> "cudf.Series": """ Revert ordinal label to original label From ed28c4077c3c5af2e797964745ca762fd3e70898 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 10 Mar 2025 15:38:32 -0500 Subject: [PATCH 09/17] get a more informative error --- python/cuml/cuml/internals/base_return_types.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cuml/cuml/internals/base_return_types.py b/python/cuml/cuml/internals/base_return_types.py index 00c796a64c..7e2f96ffbc 100644 --- a/python/cuml/cuml/internals/base_return_types.py +++ b/python/cuml/cuml/internals/base_return_types.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -97,7 +97,8 @@ def _get_base_return_type(class_name, attr): if attr.__annotations__["return"].replace("'", "") == class_name: return "base" except Exception: - assert False, "Shouldn't get here" - return None + raise AssertionError( + f"Failed to determine return type for {class_name}.{attr}. This is a bug in cuML, please report it." + ) return None From 35a47e0af0bbe913a7426888a9370d4751581519 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 10 Mar 2025 17:34:57 -0500 Subject: [PATCH 10/17] try removing return type hints with cuDF types --- python/cuml/cuml/internals/base_return_types.py | 2 +- python/cuml/cuml/preprocessing/LabelEncoder.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cuml/cuml/internals/base_return_types.py b/python/cuml/cuml/internals/base_return_types.py index 7e2f96ffbc..fca75abca9 100644 --- a/python/cuml/cuml/internals/base_return_types.py +++ b/python/cuml/cuml/internals/base_return_types.py @@ -98,7 +98,7 @@ def _get_base_return_type(class_name, attr): return "base" except Exception: raise AssertionError( - f"Failed to determine return type for {class_name}.{attr}. This is a bug in cuML, please report it." + f"Failed to determine return type for {attr} (class = '${class_name}'). This is a bug in cuML, please report it." ) return None diff --git a/python/cuml/cuml/preprocessing/LabelEncoder.py b/python/cuml/cuml/preprocessing/LabelEncoder.py index 37e2099171..9d4c50e38c 100644 --- a/python/cuml/cuml/preprocessing/LabelEncoder.py +++ b/python/cuml/cuml/preprocessing/LabelEncoder.py @@ -207,7 +207,7 @@ def fit(self, y, _classes=None): self.dtype = y.dtype if y.dtype != cp.dtype("O") else str return self - def transform(self, y) -> "cudf.Series": + def transform(self, y): """ Transform an input into its categorical keys. @@ -242,7 +242,7 @@ def transform(self, y) -> "cudf.Series": return encoded - def fit_transform(self, y, z=None) -> "cudf.Series": + def fit_transform(self, y, z=None): """ Simultaneously fit and transform an input @@ -258,7 +258,7 @@ def fit_transform(self, y, z=None) -> "cudf.Series": return y.cat.codes - def inverse_transform(self, y: "cudf.Series") -> "cudf.Series": + def inverse_transform(self, y: "cudf.Series"): """ Revert ordinal label to original label From d62060a83c46ce471b46b47ffdef7db766da843d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 11 Mar 2025 09:27:36 -0500 Subject: [PATCH 11/17] remove more unconditional imports of cudf --- python/cuml/cuml/dask/common/input_utils.py | 3 +-- .../cuml/feature_extraction/_vectorizers.py | 5 ++-- python/cuml/cuml/preprocessing/encoders.py | 27 ++++++++++--------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/python/cuml/cuml/dask/common/input_utils.py b/python/cuml/cuml/dask/common/input_utils.py index 40650f96a0..4d93ceeece 100644 --- a/python/cuml/cuml/dask/common/input_utils.py +++ b/python/cuml/cuml/dask/common/input_utils.py @@ -27,7 +27,6 @@ from dask_cudf import Series as dcSeries from dask.dataframe import Series as daskSeries from dask.dataframe import DataFrame as daskDataFrame -from cudf import Series from cuml.internals.safe_imports import gpu_only_import_from from collections import OrderedDict from cuml.internals.memory_utils import with_cupy_rmm @@ -197,7 +196,7 @@ def _get_datatype_from_inputs(data): @with_cupy_rmm def concatenate(objs, axis=0): - if isinstance(objs[0], DataFrame) or isinstance(objs[0], Series): + if isinstance(objs[0], DataFrame) or isinstance(objs[0], cudf.Series): if len(objs) == 1: return objs[0] else: diff --git a/python/cuml/cuml/feature_extraction/_vectorizers.py b/python/cuml/cuml/feature_extraction/_vectorizers.py index 0133195b20..bf4ccd7308 100644 --- a/python/cuml/cuml/feature_extraction/_vectorizers.py +++ b/python/cuml/cuml/feature_extraction/_vectorizers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ # from cuml.internals.safe_imports import cpu_only_import import cuml.internals.logger as logger -from cudf.utils.dtypes import min_signed_type from cuml.internals.type_utils import CUPY_SPARSE_DTYPES import numbers from cuml.internals.safe_imports import gpu_only_import @@ -256,7 +255,7 @@ def _compute_empty_doc_ids(self, count_df, n_doc): of documents. """ remaining_docs = count_df["doc_id"].unique() - dtype = min_signed_type(n_doc) + dtype = cudf.utils.dtypes.min_signed_type(n_doc) doc_ids = cudf.DataFrame( data={"all_ids": cp.arange(0, n_doc, dtype=dtype)}, dtype=dtype ) diff --git a/python/cuml/cuml/preprocessing/encoders.py b/python/cuml/cuml/preprocessing/encoders.py index 0a75f54b0e..04d5e3ac72 100644 --- a/python/cuml/cuml/preprocessing/encoders.py +++ b/python/cuml/cuml/preprocessing/encoders.py @@ -16,7 +16,6 @@ from typing import List, Optional, TypeVar import cuml.internals.logger as logger -from cudf import DataFrame, Series from cuml import Base from cuml.common.doc_utils import generate_docstring from cuml.common.exceptions import NotFittedError @@ -95,7 +94,7 @@ def _check_input(self, X, is_categories=False): self._set_input_type("array") if is_categories: X = X.transpose() - return DataFrame(X) + return cudf.DataFrame(X) else: self._set_input_type("df") return X @@ -346,7 +345,7 @@ def _compute_drop_idx(self): ) drop_idx = dict() for feature in self.drop.keys(): - self.drop[feature] = Series(self.drop[feature]) + self.drop[feature] = cudf.Series(self.drop[feature]) if len(self.drop[feature]) != 1: msg = ( "Trying to drop multiple values for feature {}, " @@ -361,7 +360,7 @@ def _compute_drop_idx(self): "categories.".format(feature) ) raise ValueError(msg) - cats = Series(cats) + cats = cudf.Series(cats) idx = cats.isin(self.drop[feature]) drop_idx[feature] = cp.asarray(cats[idx].index) return drop_idx @@ -517,7 +516,7 @@ def inverse_transform(self, X): # if close: `and not cupyx.scipy.sparse.issparsecsc(X)` # and change the following line by `X = X.tocsc()` X = X.toarray() - result = DataFrame(columns=self._encoders.keys()) + result = cudf.DataFrame(columns=self._encoders.keys()) j = 0 for feature in self._encoders.keys(): feature_enc = self._encoders[feature] @@ -525,10 +524,12 @@ def inverse_transform(self, X): if self.drop is not None: # Remove dropped categories - dropped_class_idx = Series(self.drop_idx_[feature]) - dropped_class_mask = Series(cats).isin(cats[dropped_class_idx]) + dropped_class_idx = cudf.Series(self.drop_idx_[feature]) + dropped_class_mask = cudf.Series(cats).isin( + cats[dropped_class_idx] + ) if len(cats) == 1: - inv = Series(Index([cats[0]]).repeat(X.shape[0])) + inv = cudf.Series(Index([cats[0]]).repeat(X.shape[0])) result[feature] = inv continue cats = cats[~dropped_class_mask] @@ -536,7 +537,7 @@ def inverse_transform(self, X): enc_size = len(cats) x_feature = X[:, j : j + enc_size] idx = cp.argmax(x_feature, axis=1) - inv = Series(cats.iloc[idx]).reset_index(drop=True) + inv = cudf.Series(cats.iloc[idx]).reset_index(drop=True) if self.handle_unknown == "ignore": not_null_idx = x_feature.any(axis=1) @@ -548,7 +549,7 @@ def inverse_transform(self, X): dropped_mask = cp.asarray(x_feature.sum(axis=1) == 0).flatten() if dropped_mask.any(): inv[dropped_mask] = feature_enc.inverse_transform( - Series(self.drop_idx_[feature]) + cudf.Series(self.drop_idx_[feature]) )[0] result[feature] = inv @@ -624,7 +625,7 @@ def _slice_feat(X, i): def _get_output( output_type: Optional[str], input_type: Optional[str], - out: DataFrame, + out: "cudf.DataFrame", dtype, ): if output_type == "input": @@ -729,7 +730,7 @@ def transform(self, X): col_idx = self._encoders[feature].transform(Xi) result[feature] = col_idx - r = DataFrame(result) + r = cudf.DataFrame(result) return _get_output(self.output_type, self.input_type, r, self.dtype) @generate_docstring( @@ -766,7 +767,7 @@ def inverse_transform(self, X): inv = self._encoders[feature].inverse_transform(Xi) result[feature] = inv - r = DataFrame(result) + r = cudf.DataFrame(result) return _get_output(self.output_type, self.input_type, r, self.dtype) @classmethod From 3b68d76db4aca73a13e98cdf05f71bd36ead3102 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 11 Mar 2025 10:58:57 -0500 Subject: [PATCH 12/17] switch to context-manager form of cudf.core.buffer.acquire_spill_lock --- .../stem/porter_stemmer_utils/suffix_utils.py | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/python/cuml/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py b/python/cuml/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py index d030345676..a81e87c5da 100644 --- a/python/cuml/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py +++ b/python/cuml/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -84,23 +84,25 @@ def subtract_valid(input_array, valid_bool_array, sub_val): input_array[pos] = input_array[pos] - sub_val -@cudf.core.buffer.acquire_spill_lock() def get_stem_series(word_str_ser, suffix_len, can_replace_mask): """ word_str_ser: input string column suffix_len: length of suffix to replace can_repalce_mask: bool array marking strings where to replace """ - NTHRD = 1024 - NBLCK = int(np.ceil(float(len(word_str_ser)) / float(NTHRD))) - - start_series = cudf.Series(cp.zeros(len(word_str_ser), dtype=cp.int32)) - end_ser = word_str_ser.str.len() - - end_ar = end_ser._column.data_array_view(mode="read") - can_replace_mask_ar = can_replace_mask._column.data_array_view(mode="read") - - subtract_valid[NBLCK, NTHRD](end_ar, can_replace_mask_ar, suffix_len) - return word_str_ser.str.slice_from( - starts=start_series, stops=end_ser.fillna(0) - ) + with cudf.core.buffer.acquire_spill_lock(): + NTHRD = 1024 + NBLCK = int(np.ceil(float(len(word_str_ser)) / float(NTHRD))) + + start_series = cudf.Series(cp.zeros(len(word_str_ser), dtype=cp.int32)) + end_ser = word_str_ser.str.len() + + end_ar = end_ser._column.data_array_view(mode="read") + can_replace_mask_ar = can_replace_mask._column.data_array_view( + mode="read" + ) + + subtract_valid[NBLCK, NTHRD](end_ar, can_replace_mask_ar, suffix_len) + return word_str_ser.str.slice_from( + starts=start_series, stops=end_ser.fillna(0) + ) From e3ef66c6589e1e862fb5b0b6fe0c4f2921d38ea5 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 11 Mar 2025 11:55:27 -0500 Subject: [PATCH 13/17] make ufunc lazy --- .../_thirdparty/sklearn/utils/sparsefuncs.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py index 7bea44a366..7d5d71054b 100644 --- a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py +++ b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py @@ -148,17 +148,18 @@ def mean_variance_axis(X, axis): else: _raise_typeerror(X) - -ufunc_dic = { - 'min': np.min, - 'max': np.max, - 'nanmin': np.nanmin, - 'nanmax': np.nanmax -} +# this is function to avoid attribute access at import time in CPU-only environments +def ufunc_dic(): + return { + 'min': np.min, + 'max': np.max, + 'nanmin': np.nanmin, + 'nanmax': np.nanmax + } def _minor_reduce(X, min_or_max): - fminmax = ufunc_dic[min_or_max] + fminmax = ufunc_dic()[min_or_max] major_index = np.flatnonzero(np.diff(X.indptr)) values = cpu_np.zeros(major_index.shape[0], dtype=X.dtype) @@ -209,7 +210,7 @@ def _sparse_min_or_max(X, axis, min_or_max): raise ValueError("zero-size array to reduction operation") if X.nnz == 0: return X.dtype.type(0) - fminmax = ufunc_dic[min_or_max] + fminmax = ufunc_dic()[min_or_max] m = fminmax(X.data) if np.isnan(m): if 'nan' in min_or_max: From f53a77784a2d7b29a39f56a8ab27c5fb7bdbc7d9 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 11 Mar 2025 13:39:34 -0500 Subject: [PATCH 14/17] np. references in function signatures --- .../cuml/cuml/_thirdparty/sklearn/preprocessing/_imputation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/cuml/_thirdparty/sklearn/preprocessing/_imputation.py b/python/cuml/cuml/_thirdparty/sklearn/preprocessing/_imputation.py index 2be0c81ff0..e7b171b8be 100644 --- a/python/cuml/cuml/_thirdparty/sklearn/preprocessing/_imputation.py +++ b/python/cuml/cuml/_thirdparty/sklearn/preprocessing/_imputation.py @@ -30,7 +30,7 @@ from cuml.internals.safe_imports import cpu_only_import numpy = cpu_only_import('numpy') -np = gpu_only_import('cupy') +np = gpu_only_import('cupy', alt=numpy) sparse = gpu_only_import_from('cupyx.scipy', 'sparse') From 153b21870e8acc370a7446f16dc50cc6ed8ff3f1 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 12 Mar 2025 16:18:16 -0500 Subject: [PATCH 15/17] fix merge conflicts --- .github/workflows/pr.yaml | 313 +++++++++--------- conda/recipes/cuml-cpu/meta.yaml | 1 - .../_thirdparty/sklearn/utils/sparsefuncs.py | 2 +- 3 files changed, 158 insertions(+), 158 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b7f65b4645..26b181f86f 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -13,123 +13,124 @@ jobs: # Please keep pr-builder as the top job here pr-builder: needs: - # - check-nightly-ci - # - changed-files - # - checks - # - clang-tidy + - check-nightly-ci + - changed-files + - checks + - clang-tidy - conda-cpp-build - # - conda-cpp-tests - # - conda-cpp-checks + - conda-cpp-tests + - conda-cpp-checks - conda-python-build - conda-python-tests-singlegpu - conda-python-tests-dask - conda-notebook-tests - docs-build - # - telemetry-setup - # - wheel-build-libcuml - # - wheel-build-cuml - # - wheel-tests-cuml - # - devcontainer + - telemetry-setup + - wheel-build-libcuml + - wheel-build-cuml + - wheel-tests-cuml + - devcontainer secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.04 if: always() with: needs: ${{ toJSON(needs) }} - # telemetry-setup: - # runs-on: ubuntu-latest - # continue-on-error: true - # env: - # OTEL_SERVICE_NAME: "pr-cuml" - # steps: - # - name: Telemetry setup - # if: ${{ vars.TELEMETRY_ENABLED == 'true' }} - # uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main - # check-nightly-ci: - # # Switch to ubuntu-latest once it defaults to a version of Ubuntu that - # # provides at least Python 3.11 (see - # # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) - # runs-on: ubuntu-24.04 - # env: - # RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # steps: - # - name: Check if nightly CI is passing - # uses: rapidsai/shared-actions/check_nightly_success/dispatch@main - # with: - # repo: cuml - # # TODO: remove this once upstream issues in RAFT are resolved on 11.4. - # # The limit was temporarily increased to unblock work. - # max_days_without_success: 30 - # changed-files: - # secrets: inherit - # needs: telemetry-setup - # uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.04 - # with: - # files_yaml: | - # test_cpp: - # - '**' - # - '!.devcontainer/**' - # - '!.pre-commit-config.yaml' - # - '!CONTRIBUTING.md' - # - '!README.md' - # - '!docs/**' - # - '!img/**' - # - '!notebooks/**' - # - '!python/**' - # - '!thirdparty/LICENSES/**' - # test_notebooks: - # - '**' - # - '!.devcontainer/**' - # - '!.pre-commit-config.yaml' - # - '!CONTRIBUTING.md' - # - '!README.md' - # - '!thirdparty/LICENSES/**' - # test_python: - # - '**' - # - '!.devcontainer/**' - # - '!.pre-commit-config.yaml' - # - '!CONTRIBUTING.md' - # - '!README.md' - # - '!docs/**' - # - '!img/**' - # - '!notebooks/**' - # - '!thirdparty/LICENSES/**' - # checks: - # secrets: inherit - # needs: telemetry-setup - # uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.04 - # with: - # enable_check_generated_files: false - # ignored_pr_jobs: >- - # optional-job-conda-python-tests-cudf-pandas-integration telemetry-summarize - # clang-tidy: - # needs: checks - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04 - # with: - # build_type: pull-request - # node_type: "cpu8" - # arch: "amd64" - # container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10" - # run_script: "ci/run_clang_tidy.sh" + telemetry-setup: + runs-on: ubuntu-latest + continue-on-error: true + env: + OTEL_SERVICE_NAME: "pr-cuml" + steps: + - name: Telemetry setup + if: ${{ vars.TELEMETRY_ENABLED == 'true' }} + uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main + check-nightly-ci: + # Switch to ubuntu-latest once it defaults to a version of Ubuntu that + # provides at least Python 3.11 (see + # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat) + runs-on: ubuntu-24.04 + env: + RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Check if nightly CI is passing + uses: rapidsai/shared-actions/check_nightly_success/dispatch@main + with: + repo: cuml + # TODO: remove this once upstream issues in RAFT are resolved on 11.4. + # The limit was temporarily increased to unblock work. + max_days_without_success: 30 + changed-files: + secrets: inherit + needs: telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.04 + with: + files_yaml: | + test_cpp: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!notebooks/**' + - '!python/**' + - '!thirdparty/LICENSES/**' + test_notebooks: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!thirdparty/LICENSES/**' + test_python: + - '**' + - '!.devcontainer/**' + - '!.pre-commit-config.yaml' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!notebooks/**' + - '!thirdparty/LICENSES/**' + checks: + secrets: inherit + needs: telemetry-setup + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.04 + with: + enable_check_generated_files: false + ignored_pr_jobs: >- + optional-job-conda-python-tests-cudf-pandas-integration telemetry-summarize + clang-tidy: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04 + with: + build_type: pull-request + node_type: "cpu8" + arch: "amd64" + container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10" + run_script: "ci/run_clang_tidy.sh" conda-cpp-build: + needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.04 with: build_type: pull-request - # conda-cpp-tests: - # needs: [conda-cpp-build, changed-files] - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.04 - # if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp - # with: - # build_type: pull-request - # conda-cpp-checks: - # needs: conda-cpp-build - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.04 - # with: - # build_type: pull-request - # enable_check_symbols: true + conda-cpp-tests: + needs: [conda-cpp-build, changed-files] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.04 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp + with: + build_type: pull-request + conda-cpp-checks: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-25.04 + with: + build_type: pull-request + enable_check_symbols: true conda-python-build: needs: conda-cpp-build secrets: inherit @@ -137,7 +138,7 @@ jobs: with: build_type: pull-request conda-python-tests-singlegpu: - needs: [conda-python-build] + needs: [conda-python-build, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python @@ -145,7 +146,7 @@ jobs: build_type: pull-request script: "ci/test_python_singlegpu.sh" optional-job-conda-python-tests-cudf-pandas-integration: - needs: [conda-python-build] + needs: [conda-python-build, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python @@ -154,7 +155,7 @@ jobs: build_type: pull-request script: "ci/test_python_integration.sh" conda-python-tests-dask: - needs: [conda-python-build] + needs: [conda-python-build, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python @@ -162,7 +163,7 @@ jobs: build_type: pull-request script: "ci/test_python_dask.sh" conda-notebook-tests: - needs: [conda-python-build] + needs: [conda-python-build, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.04 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks @@ -182,55 +183,55 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" - # wheel-build-libcuml: - # needs: checks - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 - # with: - # build_type: pull-request - # branch: ${{ inputs.branch }} - # sha: ${{ inputs.sha }} - # date: ${{ inputs.date }} - # script: ci/build_wheel_libcuml.sh - # extra-repo: rapidsai/cumlprims_mg - # extra-repo-sha: branch-25.04 - # extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY - # # build for every combination of arch and CUDA version, but only for the latest Python - # matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) - # wheel-build-cuml: - # needs: [checks, wheel-build-libcuml] - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 - # with: - # build_type: pull-request - # script: ci/build_wheel_cuml.sh - # wheel-tests-cuml: - # needs: [wheel-build-cuml, changed-files] - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.04 - # if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python - # with: - # build_type: pull-request - # script: ci/test_wheel.sh - # devcontainer: - # needs: telemetry-setup - # secrets: inherit - # uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.04 - # with: - # arch: '["amd64"]' - # cuda: '["12.8"]' - # extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY - # build_command: | - # sccache -z; - # build-all --verbose; - # sccache -s; + wheel-build-libcuml: + needs: checks + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 + with: + build_type: pull-request + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libcuml.sh + extra-repo: rapidsai/cumlprims_mg + extra-repo-sha: branch-25.04 + extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + wheel-build-cuml: + needs: [checks, wheel-build-libcuml] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.04 + with: + build_type: pull-request + script: ci/build_wheel_cuml.sh + wheel-tests-cuml: + needs: [wheel-build-cuml, changed-files] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.04 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python + with: + build_type: pull-request + script: ci/test_wheel.sh + devcontainer: + needs: telemetry-setup + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.04 + with: + arch: '["amd64"]' + cuda: '["12.8"]' + extra-repo-deploy-key: CUMLPRIMS_SSH_PRIVATE_DEPLOY_KEY + build_command: | + sccache -z; + build-all --verbose; + sccache -s; - # telemetry-summarize: - # # This job must use a self-hosted runner to record telemetry traces. - # runs-on: linux-amd64-cpu4 - # needs: pr-builder - # if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} - # continue-on-error: true - # steps: - # - name: Telemetry summarize - # uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main + telemetry-summarize: + # This job must use a self-hosted runner to record telemetry traces. + runs-on: linux-amd64-cpu4 + needs: pr-builder + if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} + continue-on-error: true + steps: + - name: Telemetry summarize + uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml index 75e569690c..550b4dc30f 100644 --- a/conda/recipes/cuml-cpu/meta.yaml +++ b/conda/recipes/cuml-cpu/meta.yaml @@ -32,7 +32,6 @@ requirements: run: - python x.x - numpy>=1.23,<3.0a0 - - packaging - pandas - packaging - scikit-learn==1.5.* diff --git a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py index 7d5d71054b..5ba1d455a7 100644 --- a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py +++ b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py @@ -148,7 +148,7 @@ def mean_variance_axis(X, axis): else: _raise_typeerror(X) -# this is function to avoid attribute access at import time in CPU-only environments +# this is a function to avoid attribute access at import time in CPU-only environments def ufunc_dic(): return { 'min': np.min, From 7cc30ccb256c8227928b56c77dea435604625a72 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 13 Mar 2025 13:09:41 -0500 Subject: [PATCH 16/17] revert unnecessary whitespace change --- python/cuml/cuml/preprocessing/LabelEncoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cuml/cuml/preprocessing/LabelEncoder.py b/python/cuml/cuml/preprocessing/LabelEncoder.py index 8daaed9d49..7b3d88fd63 100644 --- a/python/cuml/cuml/preprocessing/LabelEncoder.py +++ b/python/cuml/cuml/preprocessing/LabelEncoder.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # + from typing import TYPE_CHECKING from cuml import Base From 6a56ca5980d6626b959d65fcec277da1457ac8c8 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 18 Mar 2025 09:20:59 -0500 Subject: [PATCH 17/17] fall back to numpy in sparsefuncs --- .../_thirdparty/sklearn/utils/sparsefuncs.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py index 5ba1d455a7..92e2ebdcbe 100644 --- a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py +++ b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py @@ -11,6 +11,7 @@ # This code is under BSD 3 clause license. # Authors mentioned above do not endorse or promote this production. +import numpy from ....thirdparty_adapters.sparsefuncs_fast import ( csr_mean_variance_axis0 as _csr_mean_var_axis0, @@ -21,7 +22,7 @@ from cuml.internals.safe_imports import cpu_only_import_from cpu_sp = cpu_only_import_from('scipy', 'sparse') gpu_sp = gpu_only_import_from('cupyx.scipy', 'sparse') -np = gpu_only_import('cupy') +np = gpu_only_import('cupy', alt=numpy) cpu_np = cpu_only_import('numpy') @@ -148,18 +149,17 @@ def mean_variance_axis(X, axis): else: _raise_typeerror(X) -# this is a function to avoid attribute access at import time in CPU-only environments -def ufunc_dic(): - return { - 'min': np.min, - 'max': np.max, - 'nanmin': np.nanmin, - 'nanmax': np.nanmax - } + +ufunc_dic = { + 'min': np.min, + 'max': np.max, + 'nanmin': np.nanmin, + 'nanmax': np.nanmax +} def _minor_reduce(X, min_or_max): - fminmax = ufunc_dic()[min_or_max] + fminmax = ufunc_dic[min_or_max] major_index = np.flatnonzero(np.diff(X.indptr)) values = cpu_np.zeros(major_index.shape[0], dtype=X.dtype) @@ -210,7 +210,7 @@ def _sparse_min_or_max(X, axis, min_or_max): raise ValueError("zero-size array to reduction operation") if X.nnz == 0: return X.dtype.type(0) - fminmax = ufunc_dic()[min_or_max] + fminmax = ufunc_dic[min_or_max] m = fminmax(X.data) if np.isnan(m): if 'nan' in min_or_max: