Skip to content

Commit 4f52138

Browse files
authored
[BACKPORT] Enable running on GPU for oscar (#2284) (#2306)
1 parent 4808c24 commit 4f52138

File tree

51 files changed

+1018
-468
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1018
-468
lines changed

.github/workflows/checks.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: Pre-commit Checks
22

3-
on: [push, pull_request_target]
3+
on: [push, pull_request]
44

55
jobs:
66
checks:

.github/workflows/upload-packages.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ else
4040
cp *.whl dist/
4141

4242
if [[ "$UNAME" == "darwin" ]]; then
43-
pip install delocate
43+
pip install delocate==0.8.2
4444
delocate-wheel dist/*.whl
4545
delocate-addplat --rm-orig -x 10_9 -x 10_10 dist/*.whl
4646
fi

docs/source/development/oscar/batch.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ group requests by certain keys and resent them to different handlers in
2626
batches. Oscar supports creating a batch version of the method:
2727

2828
.. code-block:: python
29+
2930
class ExampleActor(mo.Actor):
3031
@mo.extensible
3132
async def batch_method(self, a, b=None):

mars/conftest.py

+23
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,23 @@ def _new_integrated_test_session(_stop_isolation):
107107
sess.stop_server(isolation=False)
108108

109109

110+
@pytest.fixture(scope='module')
111+
def _new_gpu_test_session(_stop_isolation): # pragma: no cover
112+
from .deploy.oscar.tests.session import new_test_session
113+
from .resource import cuda_count
114+
115+
cuda_devices = list(range(min(cuda_count(), 2)))
116+
117+
sess = new_test_session(address='127.0.0.1',
118+
init_local=True, n_worker=1, n_cpu=1, cuda_devices=cuda_devices,
119+
default=True, timeout=300)
120+
with option_context({'show_progress': False}):
121+
try:
122+
yield sess
123+
finally:
124+
sess.stop_server(isolation=False)
125+
126+
110127
@pytest.fixture
111128
def setup(_new_test_session):
112129
_new_test_session.as_default()
@@ -117,3 +134,9 @@ def setup(_new_test_session):
117134
def setup_cluster(_new_integrated_test_session):
118135
_new_integrated_test_session.as_default()
119136
yield _new_integrated_test_session
137+
138+
139+
@pytest.fixture
140+
def setup_gpu(_new_gpu_test_session): # pragma: no cover
141+
_new_gpu_test_session.as_default()
142+
yield _new_test_session

mars/core/operand/base.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class SchedulingHint(Serializable):
6868
# need to be executed not later than the later ones,
6969
# because the range index of later chunk should be accumulated from
7070
# indexes of previous ones
71+
# `gpu` indicates that if the operand should be executed on the GPU.
72+
gpu = BoolField('gpu', default=None)
7173
priority = Int32Field('priority', default=0)
7274

7375
@classproperty
@@ -108,7 +110,6 @@ class Operand(Base, metaclass=OperandMetaclass):
108110
"""
109111
Operand base class. All operands should have a type, which can be Add, Subtract etc.
110112
`sparse` indicates that if the operand is applied on a sparse tensor/chunk.
111-
`gpu` indicates that if the operand should be executed on the GPU.
112113
`device`, 0 means the CPU, otherwise means the GPU device.
113114
Operand can have inputs and outputs
114115
which should be the :class:`mars.tensor.core.TensorData`, :class:`mars.tensor.core.ChunkData` etc.
@@ -119,7 +120,6 @@ class Operand(Base, metaclass=OperandMetaclass):
119120
_output_type_ = None
120121

121122
sparse = BoolField('sparse', default=False)
122-
gpu = BoolField('gpu', default=None)
123123
device = Int32Field('device', default=None)
124124
# will this operand create a view of input data or not
125125
create_view = BoolField('create_view', default=False)
@@ -250,6 +250,9 @@ def _get_output_type(self, output_idx):
250250
def copy(self: OperandType) -> OperandType:
251251
new_op = super().copy()
252252
new_op.outputs = []
253+
# copy scheduling_hint
254+
new_op.scheduling_hint = SchedulingHint(**{field: getattr(self.scheduling_hint, field)
255+
for field in SchedulingHint.all_hint_names})
253256
new_op.extra_params = deepcopy(self.extra_params)
254257
return new_op
255258

mars/dataframe/base/tests/test_base_execution.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939

4040
@require_cudf
41-
def test_to_gpu_execution(setup):
41+
def test_to_gpu_execution(setup_gpu):
4242
pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1))
4343
df = from_pandas_df(pdf, chunk_size=(13, 21))
4444
cdf = to_gpu(df)
@@ -57,7 +57,7 @@ def test_to_gpu_execution(setup):
5757

5858

5959
@require_cudf
60-
def test_to_cpu_execution(setup):
60+
def test_to_cpu_execution(setup_gpu):
6161
pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1))
6262
df = from_pandas_df(pdf, chunk_size=(13, 21))
6363
cdf = to_gpu(df)

mars/dataframe/datasource/read_csv.py

-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,6 @@ def execute(cls, ctx, op):
332332
df = df[op.usecols]
333333
else:
334334
df = cls._cudf_read_csv(op) if op.gpu else cls._pandas_read_csv(f, op)
335-
336335
ctx[out_df.key] = df
337336

338337
def estimate_size(cls, ctx, op):

mars/dataframe/datasource/tests/test_datasource_execution.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ def test_read_csv_use_arrow_dtype(setup):
570570

571571

572572
@require_cudf
573-
def test_read_csvgpu_execution(setup):
573+
def test_read_csv_gpu_execution(setup_gpu):
574574
with tempfile.TemporaryDirectory() as tempdir:
575575
file_path = os.path.join(tempdir, 'test.csv')
576576

mars/dataframe/groupby/aggregation.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from ..reduction.core import ReductionCompiler, ReductionSteps, ReductionPreStep, \
3737
ReductionAggStep, ReductionPostStep
3838
from ..reduction.aggregation import is_funcs_aggregate, normalize_reduction_funcs
39-
from ..utils import parse_index, build_concatenated_rows_frame
39+
from ..utils import parse_index, build_concatenated_rows_frame, is_cudf
4040
from .core import DataFrameGroupByOperand
4141

4242
cp = lazy_import('cupy', globals=globals(), rename='cp')
@@ -84,7 +84,10 @@ def _patch_groupby_kurt():
8484
from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy
8585
if not hasattr(DataFrameGroupBy, 'kurt'): # pragma: no branch
8686
def _kurt_by_frame(a, *args, **kwargs):
87-
return a.to_frame().kurt(*args, **kwargs).iloc[0]
87+
data = a.to_frame().kurt(*args, **kwargs).iloc[0]
88+
if is_cudf(data): # pragma: no cover
89+
data = data.copy()
90+
return data
8891

8992
def _group_kurt(x, *args, **kwargs):
9093
if kwargs.get('numeric_only') is not None:
@@ -802,6 +805,8 @@ def _execute_agg(cls, ctx, op: "DataFrameGroupByAgg"):
802805
result = xdf.concat(aggs)
803806
if result.ndim == 2:
804807
result = result.iloc[:, 0]
808+
if is_cudf(result): # pragma: no cover
809+
result = result.copy()
805810
result.name = out_chunk.name
806811

807812
ctx[out_chunk.key] = result

mars/dataframe/groupby/core.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,18 @@
2323
from ...core.operand import OperandStage, MapReduceOperand
2424
from ...lib.groupby_wrapper import wrapped_groupby
2525
from ...serialization.serializables import BoolField, Int32Field, AnyField
26+
from ...utils import lazy_import
2627
from ..align import align_dataframe_series, align_series_series
2728
from ..initializer import Series as asseries
2829
from ..core import SERIES_TYPE, SERIES_CHUNK_TYPE
2930
from ..utils import build_concatenated_rows_frame, hash_dataframe_on, \
30-
build_df, build_series, parse_index
31+
build_df, build_series, parse_index, is_cudf
3132
from ..operands import DataFrameOperandMixin, DataFrameShuffleProxy
3233

3334

35+
cudf = lazy_import('cudf', globals=globals())
36+
37+
3438
class DataFrameGroupByOperand(MapReduceOperand, DataFrameOperandMixin):
3539
_op_type_ = OperandDef.GROUPBY
3640

@@ -302,6 +306,8 @@ def _take_index(src, f):
302306
result = src.iloc[f]
303307
if src.index.names:
304308
result.index.names = src.index.names
309+
if is_cudf(result): # pragma: no cover
310+
result = result.copy()
305311
return result
306312

307313
for index_idx, index_filter in enumerate(filters):
@@ -330,6 +336,7 @@ def _take_index(src, f):
330336

331337
@classmethod
332338
def execute_reduce(cls, ctx, op: "DataFrameGroupByOperand"):
339+
xdf = cudf if op.gpu else pd
333340
chunk = op.outputs[0]
334341
input_idx_to_df = dict(op.iter_mapper_data_with_index(ctx))
335342
row_idxes = sorted(input_idx_to_df.keys())
@@ -347,7 +354,7 @@ def execute_reduce(cls, ctx, op: "DataFrameGroupByOperand"):
347354
part_len = len(res[0])
348355
part_len -= 1 if not deliver_by else 2
349356
for n in range(part_len):
350-
r.append(pd.concat([it[n] for it in res], axis=0))
357+
r.append(xdf.concat([it[n] for it in res], axis=0))
351358
r = tuple(r)
352359

353360
if deliver_by:

mars/dataframe/groupby/tests/test_groupby_execution.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ def test_groupby_agg_str_cat(setup):
418418

419419

420420
@require_cudf
421-
def test_gpu_groupby_agg(setup):
421+
def test_gpu_groupby_agg(setup_gpu):
422422
rs = np.random.RandomState(0)
423423
df1 = pd.DataFrame({'a': rs.choice([2, 3, 4], size=(100,)),
424424
'b': rs.choice([2, 3, 4], size=(100,))})

mars/dataframe/indexing/iloc.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from ...tensor.indexing.core import calc_shape
3030
from ...utils import ceildiv
3131
from ..operands import DataFrameOperand, DataFrameOperandMixin, DATAFRAME_TYPE
32-
from ..utils import indexing_index_value
32+
from ..utils import indexing_index_value, is_cudf
3333
from .index_lib import DataFrameIlocIndexesHandler
3434

3535

@@ -358,6 +358,8 @@ def execute(cls, ctx, op):
358358
r = df.iloc[indexes]
359359
if isinstance(r, pd.Series) and r.dtype != chunk.dtype:
360360
r = r.astype(chunk.dtype)
361+
if is_cudf(r): # pragma: no cover
362+
r = r.copy()
361363
ctx[chunk.key] = r
362364

363365

mars/dataframe/reduction/tests/test_reduction_execute.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def compute(data, **kwargs):
280280

281281
@require_cudf
282282
@require_cupy
283-
def test_gpu_execution(setup, check_ref_counts):
283+
def test_gpu_execution(setup_gpu, check_ref_counts):
284284
df_raw = pd.DataFrame(np.random.rand(30, 3), columns=list('abc'))
285285
df = to_gpu(md.DataFrame(df_raw, chunk_size=6))
286286

mars/dataframe/sort/psrs.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from ...serialization.serializables import Int32Field, ListField, StringField, BoolField
2424
from ...tensor.base.psrs import PSRSOperandMixin
2525
from ..core import IndexValue, OutputType
26-
from ..utils import standardize_range_index, parse_index
26+
from ..utils import standardize_range_index, parse_index, is_cudf
2727
from ..operands import DataFrameOperandMixin, DataFrameOperand, \
2828
DataFrameShuffleProxy
2929

@@ -544,6 +544,8 @@ def _execute_dataframe_map(cls, ctx, op):
544544
poses = (None,) + tuple(poses) + (None,)
545545
for i in range(op.n_partition):
546546
values = a.iloc[poses[i]: poses[i + 1]]
547+
if is_cudf(values): # pragma: no cover
548+
values = values.copy()
547549
ctx[out.key, (i,)] = values
548550

549551
@classmethod

mars/dataframe/sort/tests/test_sort_execute.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -329,22 +329,23 @@ def test_arrow_string_sort_values(setup):
329329

330330

331331
@require_cudf
332-
def test_gpu_execution(setup):
332+
def test_gpu_execution(setup_gpu):
333333
# test sort_values
334+
rs = np.random.RandomState(0)
334335
distinct_opts = ['0'] if sys.platform.lower().startswith('win') else ['0', '1']
335336
for add_distinct in distinct_opts:
336337
os.environ['PSRS_DISTINCT_COL'] = add_distinct
337338

338339
# test dataframe
339-
raw = pd.DataFrame(np.random.rand(100, 10), columns=['a' + str(i) for i in range(10)])
340+
raw = pd.DataFrame(rs.rand(100, 10), columns=['a' + str(i) for i in range(10)])
340341
mdf = DataFrame(raw, chunk_size=30).to_gpu()
341342

342343
result = mdf.sort_values(by='a0').execute().fetch()
343344
expected = raw.sort_values(by='a0')
344345
pd.testing.assert_frame_equal(result.to_pandas(), expected)
345346

346347
# test series
347-
raw = pd.Series(np.random.rand(10))
348+
raw = pd.Series(rs.rand(10))
348349
series = Series(raw).to_gpu()
349350

350351
result = series.sort_values().execute().fetch()

mars/dataframe/utils.py

+44-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
from ..core import Entity, ExecutableTuple
3232
from ..lib.mmh3 import hash as mmh_hash
3333
from ..tensor.utils import dictify_chunk_size, normalize_chunk_sizes
34-
from ..utils import tokenize, sbytes
34+
from ..utils import tokenize, sbytes, lazy_import
35+
36+
cudf = lazy_import('cudf', globals=globals(), rename='cudf')
3537

3638

3739
def hash_index(index, size):
@@ -48,6 +50,8 @@ def hash_dataframe_on(df, on, size, level=None):
4850
idx = df.index
4951
if level is not None:
5052
idx = idx.to_frame(False)[level]
53+
if cudf and isinstance(idx, cudf.Index): # pragma: no cover
54+
idx = idx.to_pandas()
5155
hashed_label = pd.util.hash_pandas_object(idx, categorize=False)
5256
elif callable(on):
5357
# todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
@@ -292,6 +296,10 @@ def _serialize_multi_index(index):
292296
_max_val_close=True,
293297
_key=key or tokenize(*args),
294298
))
299+
if hasattr(index_value, 'to_pandas'): # pragma: no cover
300+
# convert cudf.Index to pandas
301+
index_value = index_value.to_pandas()
302+
295303
if isinstance(index_value, pd.RangeIndex):
296304
return IndexValue(_index_value=_serialize_range_index(index_value))
297305
elif isinstance(index_value, pd.MultiIndex):
@@ -1116,3 +1124,38 @@ def make_dtypes(dtypes):
11161124
else:
11171125
dtypes = pd.Series(dtypes)
11181126
return dtypes.apply(make_dtype)
1127+
1128+
1129+
def is_dataframe(x):
1130+
if cudf is not None: # pragma: no cover
1131+
if isinstance(x, cudf.DataFrame):
1132+
return True
1133+
return isinstance(x, pd.DataFrame)
1134+
1135+
1136+
def is_series(x):
1137+
if cudf is not None: # pragma: no cover
1138+
if isinstance(x, cudf.Series):
1139+
return True
1140+
return isinstance(x, pd.Series)
1141+
1142+
1143+
def is_index(x):
1144+
if cudf is not None: # pragma: no cover
1145+
if isinstance(x, cudf.Index):
1146+
return True
1147+
return isinstance(x, pd.Index)
1148+
1149+
1150+
def get_xdf(x):
1151+
if cudf is not None: # pragma: no cover
1152+
if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1153+
return cudf
1154+
return pd
1155+
1156+
1157+
def is_cudf(x):
1158+
if cudf is not None: # pragma: no cover
1159+
if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1160+
return True
1161+
return False

0 commit comments

Comments
 (0)