31
31
from ..core import Entity , ExecutableTuple
32
32
from ..lib .mmh3 import hash as mmh_hash
33
33
from ..tensor .utils import dictify_chunk_size , normalize_chunk_sizes
34
- from ..utils import tokenize , sbytes
34
+ from ..utils import tokenize , sbytes , lazy_import
35
+
36
+ cudf = lazy_import ('cudf' , globals = globals (), rename = 'cudf' )
35
37
36
38
37
39
def hash_index (index , size ):
@@ -48,6 +50,8 @@ def hash_dataframe_on(df, on, size, level=None):
48
50
idx = df .index
49
51
if level is not None :
50
52
idx = idx .to_frame (False )[level ]
53
+ if cudf and isinstance (idx , cudf .Index ): # pragma: no cover
54
+ idx = idx .to_pandas ()
51
55
hashed_label = pd .util .hash_pandas_object (idx , categorize = False )
52
56
elif callable (on ):
53
57
# todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
@@ -292,6 +296,10 @@ def _serialize_multi_index(index):
292
296
_max_val_close = True ,
293
297
_key = key or tokenize (* args ),
294
298
))
299
+ if hasattr (index_value , 'to_pandas' ): # pragma: no cover
300
+ # convert cudf.Index to pandas
301
+ index_value = index_value .to_pandas ()
302
+
295
303
if isinstance (index_value , pd .RangeIndex ):
296
304
return IndexValue (_index_value = _serialize_range_index (index_value ))
297
305
elif isinstance (index_value , pd .MultiIndex ):
@@ -1116,3 +1124,38 @@ def make_dtypes(dtypes):
1116
1124
else :
1117
1125
dtypes = pd .Series (dtypes )
1118
1126
return dtypes .apply (make_dtype )
1127
+
1128
+
1129
+ def is_dataframe (x ):
1130
+ if cudf is not None : # pragma: no cover
1131
+ if isinstance (x , cudf .DataFrame ):
1132
+ return True
1133
+ return isinstance (x , pd .DataFrame )
1134
+
1135
+
1136
+ def is_series (x ):
1137
+ if cudf is not None : # pragma: no cover
1138
+ if isinstance (x , cudf .Series ):
1139
+ return True
1140
+ return isinstance (x , pd .Series )
1141
+
1142
+
1143
+ def is_index (x ):
1144
+ if cudf is not None : # pragma: no cover
1145
+ if isinstance (x , cudf .Index ):
1146
+ return True
1147
+ return isinstance (x , pd .Index )
1148
+
1149
+
1150
+ def get_xdf (x ):
1151
+ if cudf is not None : # pragma: no cover
1152
+ if isinstance (x , (cudf .DataFrame , cudf .Series , cudf .Index )):
1153
+ return cudf
1154
+ return pd
1155
+
1156
+
1157
+ def is_cudf (x ):
1158
+ if cudf is not None : # pragma: no cover
1159
+ if isinstance (x , (cudf .DataFrame , cudf .Series , cudf .Index )):
1160
+ return True
1161
+ return False
0 commit comments