Skip to content
This repository was archived by the owner on Jan 12, 2026. It is now read-only.

Commit 2501b4c

Browse files
amogkamKai Fricke
andauthored
Support modin 0.10.0 (#121)
Co-authored-by: Kai Fricke <kai@anyscale.com>
1 parent e2ad926 commit 2501b4c

File tree

3 files changed

+15
-4
lines changed

3 files changed

+15
-4
lines changed

xgboost_ray/data_sources/modin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
try:
1616
import modin # noqa: F401
17-
MODIN_INSTALLED = modin.__version__ >= "0.9.0"
17+
from distutils.version import LooseVersion
18+
MODIN_INSTALLED = LooseVersion(modin.__version__) >= LooseVersion("0.9.0")
1819
except (ImportError, AttributeError):
1920
MODIN_INSTALLED = False
2021

xgboost_ray/matrix.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,9 @@ def get_data_source(self) -> Type[DataSource]:
282282
"np.ndarray, and CSV/Parquet file paths. If you specify a "
283283
"file, path, consider passing the `filetype` argument to "
284284
"specify the type of the source. Use the `RayFileType` "
285-
"enum for that.".format(type(self.data), self.filetype))
285+
"enum for that. If using Modin, Dask, or Petastorm, "
286+
"make sure the library is installed.".format(
287+
type(self.data), self.filetype))
286288

287289
if self.label is not None and not isinstance(self.label, str) and \
288290
not type(self.data) != type(self.label): # noqa: E721:
@@ -419,7 +421,9 @@ def get_data_source(self) -> Type[DataSource]:
419421
f"with FileType: {self.filetype} for a distributed dataset."
420422
"\nFIX THIS by passing a supported data type. Supported "
421423
"data types for distributed datasets are a list of "
422-
"CSV or Parquet sources as well as Ray MLDatasets.")
424+
"CSV or Parquet sources as well as Ray MLDatasets. If using "
425+
"Modin, Dask, or Petastorm, make sure the library is "
426+
"installed.")
423427

424428
self.data_source = data_source
425429
self._cached_n = data_source.get_n(self.data)

xgboost_ray/tests/test_client.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def start_client_server_4_cpus():
1515

1616
@pytest.fixture
1717
def start_client_server_5_cpus():
18-
ray.init(num_cpus=4)
18+
ray.init(num_cpus=5)
1919
with ray_start_client_server() as client:
2020
yield client
2121

@@ -40,6 +40,12 @@ def test_simple_dask(start_client_server_5_cpus):
4040
main(cpus_per_actor=1, num_actors=4)
4141

4242

43+
def test_simple_modin(start_client_server_5_cpus):
44+
assert ray.util.client.ray.is_connected()
45+
from xgboost_ray.examples.simple_modin import main
46+
main(cpus_per_actor=1, num_actors=4)
47+
48+
4349
if __name__ == "__main__":
4450
import pytest # noqa: F811
4551
import sys

0 commit comments

Comments
 (0)