Skip to content

Commit c111341

Browse files
authored
[auto-merge] branch-25.08 to branch-25.10 [skip ci] [bot] (#971)
auto-merge triggered by github actions on `branch-25.08` to create a PR keeping `branch-25.10` up-to-date. If this PR is unable to be merged due to conflicts, it will remain open until manually fix.
2 parents 3d223fc + 4663782 commit c111341

File tree

7 files changed

+19
-10
lines changed

7 files changed

+19
-10
lines changed

ci/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,6 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
4747
&& conda config --set solver libmamba
4848

4949
# install cuML
50-
ARG CUML_VER=25.06
51-
RUN conda install -y -c rapidsai -c conda-forge -c nvidia cuml=$CUML_VER cuvs=$CUML_VER python=3.10 cuda-version=12.0 numpy~=1.0 \
50+
ARG CUML_VER=25.08
51+
RUN conda install -y -c rapidsai-nightly -c conda-forge -c nvidia cuml=$CUML_VER cuvs=$CUML_VER python=3.10 cuda-version=12.0 numpy~=1.0 \
5252
&& conda clean --all -f -y

python/run_test.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ fi
2828
python -m spark_rapids_ml tests_no_import_change/test_no_import_change.py 0.2
2929
# runs on cpu
3030
python tests_no_import_change/test_no_import_change.py 0.2
31-
# runs on gpu with spark-submit (note: local[1] for spark-rapids-submit hangs probably due to barrier rdd timer threads. TBD root cause)
31+
# runs on gpu with spark-submit (note: local[1] and pyspark<3.5.6 for spark-rapids-submit hangs probably due to barrier rdd timer threads. TBD root cause)
32+
pip install pyspark==3.5.6
3233
spark-rapids-submit --master local-cluster[1,1,1024] tests_no_import_change/test_no_import_change.py 0.2
34+
pip install -r requirements_dev.txt
3335
# runs on cpu with spark-submit
3436
spark-submit --master local-cluster[1,1,1024] tests_no_import_change/test_no_import_change.py 0.2
3537

python/src/spark_rapids_ml/classification.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,9 +1080,9 @@ def _single_fit(init_parameters: Dict[str, Any]) -> Dict[str, Any]:
10801080
**init_parameters,
10811081
)
10821082

1083-
logistic_regression.penalty_normalized = False
1084-
logistic_regression.lbfgs_memory = 10
1085-
logistic_regression.linesearch_max_iter = 20
1083+
logistic_regression.solver_model.penalty_normalized = False
1084+
logistic_regression.solver_model.lbfgs_memory = 10
1085+
logistic_regression.solver_model.linesearch_max_iter = 20
10861086

10871087
if is_sparse and pdesc.partition_max_nnz > nnz_limit_for_int32: # type: ignore
10881088
logistic_regression._convert_index = np.int64

python/src/spark_rapids_ml/clustering.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def _get_cuml_params_default(self) -> Dict[str, Any]:
113113
"max_iter": 300,
114114
"tol": 0.0001,
115115
"verbose": False,
116-
"random_state": 1,
116+
"random_state": None,
117117
"init": "scalable-k-means++",
118118
"n_init": "auto",
119119
"oversampling_factor": 2.0,
@@ -506,7 +506,7 @@ def _construct_kmeans() -> CumlT:
506506
def _transform_internal(
507507
kmeans: CumlT, df: Union[pd.DataFrame, np.ndarray]
508508
) -> pd.Series:
509-
res = list(kmeans.predict(df, normalize_weights=False).to_numpy())
509+
res = list(kmeans.predict(df).to_numpy())
510510
return pd.Series(res)
511511

512512
return _construct_kmeans, _transform_internal, None

python/tests/test_approximate_nearest_neighbors.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def test_params(default_params: bool) -> None:
8585
"metric_expanded",
8686
"metric_params",
8787
"output_type",
88+
"n_jobs",
8889
],
8990
)
9091

@@ -556,7 +557,11 @@ def assert_row_equal(r1: Row, r2: Row) -> None:
556557
)
557558

558559
assert len(reconstructed_collect) == len(knn_df_collect)
559-
if algorithm != "ivfpq" and not (algorithm == "ivfflat" and algoParams == None):
560+
if (
561+
algorithm != "ivfpq"
562+
and not (algorithm == "ivfflat" and algoParams == None)
563+
and (not algoParams or algoParams.get("build_algo") != "ivf_pq")
564+
):
560565
# it is fine to skip ivfpq as long as other algorithms assert the same results of approxSimilarityJoin and kneighbors.
561566
# Also skip ivfflat when algoParams == None. Ivfflat probes only 1/50 of the clusters, leading to unstable results.
562567
# ivfpq shows non-deterministic distances due to kmeans initialization uses GPU memory runtime values.

python/tests/test_logistic_regression.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ def _func_test_classifier(
441441
cu_lr = cuLR(fit_intercept=fit_intercept, penalty=penalty, C=C, l1_ratio=l1_ratio)
442442
cu_lr.solver_model.penalty_normalized = False
443443
cu_lr.solver_model.lbfgs_memory = 10
444+
cu_lr.solver_model.linesearch_max_iter = 20
444445
cu_lr.fit(X_train, y_train)
445446

446447
spark_conf.update(
@@ -490,7 +491,7 @@ def to_sparse_func(v: Union[SparseVector, DenseVector]) -> SparseVector:
490491
spark_lr_model: LogisticRegressionModel = spark_lr.fit(train_df)
491492

492493
# test coefficients and intercepts
493-
assert spark_lr_model.n_cols == cu_lr.n_cols
494+
assert spark_lr_model.n_cols == cu_lr.n_features_in_
494495

495496
# test float32_inputs
496497
assert spark_lr_model._float32_inputs == float32_inputs

python/tests/test_nearest_neighbors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def test_params(default_params: bool, caplog: LogCaptureFixture) -> None:
6868
"metric_expanded",
6969
"metric_params",
7070
"output_type",
71+
"n_jobs",
7172
],
7273
)
7374
assert cuml_params == NearestNeighbors()._get_cuml_params_default()

0 commit comments

Comments
 (0)