2020from ... import opcodes as OperandDef
2121from ...config import options
2222from ...core import OutputType
23- from ...serialize import IndexField , DataTypeField , KeyField
23+ from ...serialize import IndexField , DataTypeField , KeyField , BoolField
2424from ...tensor .utils import get_chunk_slices
2525from ..operands import DataFrameOperand , DataFrameOperandMixin
2626from ..utils import parse_index , decide_series_chunk_size
@@ -36,11 +36,12 @@ class IndexDataSource(DataFrameOperand, DataFrameOperandMixin):
3636 _input = KeyField ('input' )
3737 _data = IndexField ('data' )
3838 _dtype = DataTypeField ('dtype' )
39+ _store_data = BoolField ('store_data' )
3940
4041 def __init__ (self , input = None , data = None , dtype = None , gpu = None , # pylint: disable=redefined-builtin
41- sparse = None , ** kw ):
42- super ().__init__ (_input = input , _data = data , _dtype = dtype , _gpu = gpu ,
43- _sparse = sparse , _output_types = [OutputType .index ], ** kw )
42+ store_data = None , sparse = None , ** kw ):
43+ super ().__init__ (_input = input , _data = data , _dtype = dtype , _gpu = gpu , _sparse = sparse ,
44+ _store_data = store_data , _output_types = [OutputType .index ], ** kw )
4445
4546 @property
4647 def input (self ):
@@ -54,6 +55,10 @@ def data(self):
5455 def dtype (self ):
5556 return self ._dtype
5657
58+ @property
59+ def store_data (self ):
60+ return self ._store_data
61+
5762 def _set_inputs (self , inputs ):
5863 super ()._set_inputs (inputs )
5964 if inputs is not None and len (inputs ) > 0 :
@@ -66,7 +71,7 @@ def __call__(self, shape=None, chunk_size=None, inp=None, name=None,
6671 name = name if name is not None else self ._data .name
6772 names = names if names is not None else self ._data .names
6873 return self .new_index (None , shape = shape , dtype = self ._dtype ,
69- index_value = parse_index (self ._data ),
74+ index_value = parse_index (self ._data , store_data = self . store_data ),
7075 name = name , names = names , raw_chunk_size = chunk_size )
7176 elif hasattr (inp , 'index_value' ):
7277 # get index from Mars DataFrame, Series or Index
@@ -75,8 +80,8 @@ def __call__(self, shape=None, chunk_size=None, inp=None, name=None,
7580 if inp .index_value .has_value ():
7681 self ._data = data = inp .index_value .to_pandas ()
7782 return self .new_index (None , shape = (inp .shape [0 ],), dtype = data .dtype ,
78- index_value = parse_index (data ), name = name ,
79- names = names , raw_chunk_size = chunk_size )
83+ index_value = parse_index (data , store_data = self . store_data ) ,
84+ name = name , names = names , raw_chunk_size = chunk_size )
8085 else :
8186 if self ._dtype is None :
8287 self ._dtype = inp .index_value .to_pandas ().dtype
@@ -92,7 +97,7 @@ def __call__(self, shape=None, chunk_size=None, inp=None, name=None,
9297 if self ._dtype is None :
9398 self ._dtype = pd_index .dtype
9499 return self .new_index ([inp ], shape = inp .shape , dtype = self ._dtype ,
95- index_value = parse_index (pd_index , inp ),
100+ index_value = parse_index (pd_index , inp , store_data = self . store_data ),
96101 name = name , names = names )
97102
98103 @classmethod
@@ -111,9 +116,9 @@ def _tile_from_pandas(cls, op):
111116 chunk_op = op .copy ().reset_key ()
112117 slc = get_chunk_slices (chunk_size , chunk_index )
113118 chunk_op ._data = chunk_data = raw_index [slc ]
114- out_chunk = chunk_op .new_chunk (None , shape = chunk_shape , dtype = index . dtype ,
115- index = chunk_index , name = index .name ,
116- index_value = parse_index (chunk_data ))
119+ out_chunk = chunk_op .new_chunk (
120+ None , shape = chunk_shape , dtype = index .dtype , index = chunk_index ,
121+ name = index . name , index_value = parse_index (chunk_data , store_data = op . store_data ))
117122 out_chunks .append (out_chunk )
118123
119124 new_op = op .copy ()
@@ -165,7 +170,7 @@ def _tile_from_tensor(cls, op):
165170 out_chunks = []
166171 for c in inp .chunks :
167172 chunk_op = op .copy ().reset_key ()
168- index_value = parse_index (out .index_value .to_pandas (), c )
173+ index_value = parse_index (out .index_value .to_pandas (), c , store_data = op . store_data )
169174 out_chunk = chunk_op .new_chunk ([c ], shape = c .shape ,
170175 dtype = out .dtype , index = c .index ,
171176 index_value = index_value ,
@@ -206,8 +211,9 @@ def execute(cls, ctx, op):
206211 ctx [out .key ] = pd .Index (inp , dtype = dtype , name = out .name )
207212
208213
209- def from_pandas (data , chunk_size = None , gpu = False , sparse = False ):
210- op = IndexDataSource (data = data , gpu = gpu , sparse = sparse , dtype = data .dtype )
214+ def from_pandas (data , chunk_size = None , gpu = False , sparse = False , store_data = False ):
215+ op = IndexDataSource (data = data , gpu = gpu , sparse = sparse , dtype = data .dtype ,
216+ store_data = store_data )
211217 return op (shape = data .shape , chunk_size = chunk_size )
212218
213219
0 commit comments