mars-project
diff --git a/‎mars/dataframe/align.py‎
Lines changed: 1 addition & 0 deletions b/‎mars/dataframe/align.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mars/dataframe/arithmetic/core.py‎
Lines changed: 31 additions & 11 deletions b/‎mars/dataframe/arithmetic/core.py‎
Lines changed: 31 additions & 11 deletions
diff --git a/‎mars/dataframe/groupby/apply.py‎
Lines changed: 9 additions & 6 deletions b/‎mars/dataframe/groupby/apply.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎mars/dataframe/indexing/getitem.py‎
Lines changed: 2 additions & 1 deletion b/‎mars/dataframe/indexing/getitem.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎mars/dataframe/indexing/index_lib.py‎
Lines changed: 1 addition & 1 deletion b/‎mars/dataframe/indexing/index_lib.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mars/dataframe/merge/concat.py‎
Lines changed: 1 addition & 1 deletion b/‎mars/dataframe/merge/concat.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mars/dataframe/reduction/aggregation.py‎
Lines changed: 4 additions & 3 deletions b/‎mars/dataframe/reduction/aggregation.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎mars/dataframe/reduction/core.py‎
Lines changed: 31 additions & 10 deletions b/‎mars/dataframe/reduction/core.py‎
Lines changed: 31 additions & 10 deletions
@@ -579,6 +579,7 @@ def _gen_dataframe_chunks(splits, out_shape, left_or_right, df):
                 chunk_kw = {
                     'index_value': chunk.index_value if splits[0].isdummy() else None,
                     'columns_value': chunk.columns_value if splits[1].isdummy() else None,
+                    'dtypes': chunk.dtypes if splits[1].isdummy() else None
                 }
                 align_op = DataFrameIndexAlign(
                     stage=OperandStage.map, index_min_max=index_min_max,
 
@@ -195,11 +195,12 @@ def _tile_scalar(cls, op):
             out_chunks.append(out_chunk)
 
         new_op = op.copy()
+        out = op.outputs[0]
         if isinstance(df, SERIES_TYPE):
-            return new_op.new_seriess(op.inputs, df.shape, nsplits=tileable.nsplits, dtype=df.dtype,
+            return new_op.new_seriess(op.inputs, df.shape, nsplits=tileable.nsplits, dtype=out.dtype,
                                       index_value=df.index_value, name=df.name, chunks=out_chunks)
         else:
-            return new_op.new_dataframes(op.inputs, df.shape, nsplits=tileable.nsplits, dtypes=df.dtypes,
+            return new_op.new_dataframes(op.inputs, df.shape, nsplits=tileable.nsplits, dtypes=out.dtypes,
                                          index_value=df.index_value, columns_value=df.columns_value,
                                          chunks=out_chunks)
 
@@ -233,11 +234,12 @@ def _tile_with_tensor(cls, op):
             out_chunks.append(out_chunk)
 
         new_op = op.copy()
+        out = op.outputs[0]
         if isinstance(other, SERIES_TYPE):
-            return new_op.new_seriess(op.inputs, other.shape, nsplits=other.nsplits, dtype=other.dtype,
-                                      index_value=other.index_value, name=other.name, chunks=out_chunks)
+            return new_op.new_seriess(op.inputs, other.shape, nsplits=other.nsplits, dtype=out.dtype,
+                                      index_value=other.index_value, chunks=out_chunks)
         else:
-            return new_op.new_dataframes(op.inputs, other.shape, nsplits=other.nsplits, dtypes=other.dtypes,
+            return new_op.new_dataframes(op.inputs, other.shape, nsplits=other.nsplits, dtypes=out.dtypes,
                                          index_value=other.index_value, columns_value=other.columns_value,
                                          chunks=out_chunks)
 
@@ -294,8 +296,17 @@ def _operator(self):
     def _calc_properties(cls, x1, x2=None, axis='columns'):
         if isinstance(x1, (DATAFRAME_TYPE, DATAFRAME_CHUNK_TYPE)) \
                 and (x2 is None or np.isscalar(x2) or isinstance(x2, TENSOR_TYPE)):
-            # FIXME infer the dtypes of result df properly
-            return {'shape': x1.shape, 'dtypes': x1.dtypes,
+            if x2 is None:
+                dtypes = x1.dtypes
+            elif np.isscalar(x2):
+                dtypes = infer_dtypes(x1.dtypes, pd.Series(np.array(x2).dtype), cls._operator)
+            elif x1.dtypes is not None and isinstance(x2, TENSOR_TYPE):
+                dtypes = pd.Series(
+                    [infer_dtype(dt, x2.dtype, cls._operator) for dt in x1.dtypes],
+                    index=x1.dtypes.index)
+            else:
+                dtypes = x1.dtypes
+            return {'shape': x1.shape, 'dtypes': dtypes,
                     'columns_value': x1.columns_value, 'index_value': x1.index_value}
 
         if isinstance(x1, (SERIES_TYPE, SERIES_CHUNK_TYPE)) \
@@ -310,7 +321,9 @@ def _calc_properties(cls, x1, x2=None, axis='columns'):
 
             if x1.columns_value is not None and x2.columns_value is not None and \
                     x1.columns_value.key == x2.columns_value.key:
-                dtypes = x1.dtypes
+                dtypes = pd.Series([infer_dtype(dt1, dt2, cls._operator) for dt1, dt2
+                                    in zip(x1.dtypes, x2.dtypes)],
+                                   index=x1.dtypes.index)
                 columns = copy.copy(x1.columns_value)
                 columns.value.should_be_monotonic = False
                 column_shape = len(dtypes)
@@ -342,11 +355,12 @@ def _calc_properties(cls, x1, x2=None, axis='columns'):
                 column_shape, dtypes, columns = np.nan, None, None
                 if x1.columns_value is not None and x1.index_value is not None:
                     if x1.columns_value.key == x2.index_value.key:
-                        dtypes = x1.dtypes
+                        dtypes = pd.Series([infer_dtype(dt, x2.dtype, cls._operator) for dt in x1.dtypes],
+                                           index=x1.dtypes.index)
                         columns = copy.copy(x1.columns_value)
                         columns.value.should_be_monotonic = False
                         column_shape = len(dtypes)
-                    else:
+                    else:  # pragma: no cover
                         dtypes = x1.dtypes  # FIXME
                         columns = infer_index_value(x1.columns_value, x2.index_value)
                         columns.value.should_be_monotonic = True
@@ -359,10 +373,16 @@ def _calc_properties(cls, x1, x2=None, axis='columns'):
                 index_shape, index = np.nan, None
                 if x1.index_value is not None and x1.index_value is not None:
                     if x1.index_value.key == x2.index_value.key:
-                        index = copy.copy(x1.columns_value)
+                        dtypes = pd.Series([infer_dtype(dt, x2.dtype, cls._operator) for dt in x1.dtypes],
+                                           index=x1.dtypes.index)
+                        index = copy.copy(x1.index_value)
                         index.value.should_be_monotonic = False
                         index_shape = x1.shape[0]
                     else:
+                        if x1.dtypes is not None:
+                            dtypes = pd.Series(
+                                [infer_dtype(dt, x2.dtype, cls._operator) for dt in x1.dtypes],
+                                index=x1.dtypes.index)
                         index = infer_index_value(x1.index_value, x2.index_value)
                         index.value.should_be_monotonic = True
                         index_shape = np.nan
 
@@ -107,7 +107,7 @@ def tile(cls, op):
                     columns_value=out_df.columns_value, index_value=out_df.index_value))
             else:
                 chunks.append(new_op.new_chunk(
-                    inp_chunks, index=c.index, shape=(np.nan,), dtype=out_df.dtype,
+                    inp_chunks, name=out_df.name, index=c.index, shape=(np.nan,), dtype=out_df.dtype,
                     index_value=out_df.index_value))
 
         new_op = op.copy().reset_key()
@@ -129,7 +129,7 @@ def _infer_df_func_returns(self, in_object_type, in_dtypes, dtypes, index):
             if in_object_type == ObjectType.dataframe:
                 empty_df = build_empty_df(in_dtypes, index=pd.RangeIndex(2))
             else:
-                empty_df = build_empty_series(in_dtypes, index=pd.RangeIndex(2))
+                empty_df = build_empty_series(in_dtypes[1], index=pd.RangeIndex(2), name=in_dtypes[0])
 
             with np.errstate(all='ignore'):
                 if self.is_transform:
@@ -148,10 +148,10 @@ def _infer_df_func_returns(self, in_object_type, in_dtypes, dtypes, index):
                 new_dtypes = new_dtypes or infer_df.dtypes
             elif isinstance(infer_df, pd.Series):
                 object_type = object_type or ObjectType.series
-                new_dtypes = new_dtypes or infer_df.dtype
+                new_dtypes = new_dtypes or (infer_df.name, infer_df.dtype)
             else:
                 object_type = ObjectType.series
-                new_dtypes = pd.Series(infer_df).dtype
+                new_dtypes = (None, pd.Series(infer_df).dtype)
         except:  # noqa: E722  # nosec
             pass
 
@@ -164,7 +164,8 @@ def __call__(self, groupby, dtypes=None, index=None):
         in_df = groupby.inputs[0]
         in_dtypes = getattr(in_df, 'dtypes', None)
         if in_dtypes is None:
-            in_dtypes = in_df.dtype
+            in_dtypes = (in_df.name, in_df.dtype)
+
         dtypes, index_value = self._infer_df_func_returns(
             in_df.op.object_type, in_dtypes, dtypes, index)
         for arg, desc in zip((self._object_type, dtypes, index_value),
@@ -178,8 +179,10 @@ def __call__(self, groupby, dtypes=None, index=None):
             return self.new_dataframe([groupby], shape=new_shape, dtypes=dtypes,
                                       index_value=index_value, columns_value=in_df.columns_value)
         else:
+            name, dtype = dtypes
             new_shape = in_df.shape if self.is_transform else (np.nan,)
-            return self.new_series([groupby], shape=new_shape, dtype=dtypes, index_value=index_value)
+            return self.new_series([groupby], name=name, shape=new_shape, dtype=dtype,
+                                   index_value=index_value)
 
 
 class GroupByApply(GroupByApplyTransform):
 
@@ -275,9 +275,10 @@ def tile_with_mask(cls, op):
             out_chunks = []
             for idx, df_chunk in zip(out_chunk_indexes, df_chunks):
                 mask_chunk = mask_chunks[df_chunk.index[0]]
+                index_value = parse_index(out_df.index_value.to_pandas(), df_chunk)
                 out_chunk = op.copy().reset_key().new_chunk([df_chunk, mask_chunk],
                                                             shape=(np.nan, df_chunk.shape[1]), index=idx,
-                                                            index_value=df_chunk.index_value,
+                                                            index_value=index_value,
                                                             columns_value=df_chunk.columns_value,
                                                             dtypes=df_chunk.dtypes)
                 out_chunks.append(out_chunk)
 
@@ -475,7 +475,7 @@ def set_chunk_index_info(cls,
         assert index_info.input_axis == 0, \
             'bool indexing on axis columns cannot be tensor'
 
-        index_value = parse_index(chunk_input.index_value.to_pandas(),
+        index_value = parse_index(pd.Index([], chunk_input.index_value.to_pandas().dtype),
                                   chunk_input, index, store_data=False)
 
         info = ChunkIndexAxisInfo(output_axis_index=output_axis_index,
 
@@ -269,7 +269,7 @@ def _call_series(self, objs):
             else:
                 index_value = parse_index(index)
             return self.new_series(objs, shape=(row_length,), dtype=objs[0].dtype,
-                                   index_value=index_value)
+                                   index_value=index_value, name=objs[0].name)
         else:
             self._object_type = ObjectType.dataframe
             col_length = 0
 
@@ -285,7 +285,7 @@ def _gen_map_chunks(cls, op, in_df, out_df, stage_infos: List[_stage_info],
                     columns_value=chunk.columns_value)
             elif op.object_type == ObjectType.series:
                 agg_chunk = map_op.new_chunk([chunk], shape=(out_df.shape[0], 1), index=new_index,
-                                             index_value=out_df.index_value)
+                                             index_value=out_df.index_value, name=out_df.name)
             else:  # scalar target
                 agg_chunk = map_op.new_chunk([chunk], shape=(1, 1), index=new_index)
             agg_chunks[agg_chunk.index] = agg_chunk
@@ -299,10 +299,11 @@ def _tile_single_chunk(cls, op: "DataFrameAggregate"):
         chunk_op = op.copy().reset_key()
         if op.object_type == ObjectType.dataframe:
             chunk = chunk_op.new_chunk(in_df.chunks, index=(0, 0), shape=out_df.shape,
-                                       index_value=out_df.index_value, columns_value=out_df.columns_value)
+                                       index_value=out_df.index_value, columns_value=out_df.columns_value,
+                                       dtypes=out_df.dtypes)
         else:
             chunk = chunk_op.new_chunk(in_df.chunks, index=(0,), shape=out_df.shape,
-                                       index_value=out_df.index_value)
+                                       index_value=out_df.index_value, name=out_df.name)
 
         tileable_op = op.copy().reset_key()
         kw = out_df.params.copy()
 
@@ -21,7 +21,7 @@
 from ...operands import OperandStage
 from ...utils import lazy_import
 from ...serialize import BoolField, AnyField, DataTypeField, Int32Field
-from ..utils import parse_index, build_empty_df, validate_axis
+from ..utils import parse_index, build_empty_df, build_empty_series, validate_axis
 from ..operands import DataFrameOperandMixin, DataFrameOperand, ObjectType, DATAFRAME_TYPE
 from ..merge import DataFrameConcat
 
@@ -113,20 +113,26 @@ class DataFrameReductionMixin(DataFrameOperandMixin):
     @classmethod
     def _tile_one_chunk(cls, op):
         df = op.outputs[0]
-        params = df.params
 
         chk = op.inputs[0].chunks[0]
         chunk_params = {k: v for k, v in chk.params.items()
                         if k in df.params}
         chunk_params['shape'] = df.shape
         chunk_params['index'] = chk.index
+        if op.object_type == ObjectType.series:
+            chunk_params.update(dict(dtype=df.dtype, index_value=df.index_value))
+        elif op.object_type == ObjectType.dataframe:
+            chunk_params.update(dict(dtypes=df.dtypes, index_value=df.index_value,
+                                     columns_value=df.columns_value))
+        else:
+            chunk_params.update(dict(dtype=df.dtype))
         new_chunk_op = op.copy().reset_key()
         chunk = new_chunk_op.new_chunk(op.inputs[0].chunks, kws=[chunk_params])
 
         new_op = op.copy()
         nsplits = tuple((s,) for s in chunk.shape)
-        params['chunks'] = [chunk]
-        params['nsplits'] = nsplits
+        params = df.params.copy()
+        params.update(dict(chunks=[chunk], nsplits=nsplits))
         return new_op.new_tileables(op.inputs, kws=[params])
 
     @classmethod
@@ -402,30 +408,45 @@ def execute(cls, ctx, op):
     def _call_dataframe(self, df):
         axis = getattr(self, 'axis', None) or 0
         level = getattr(self, 'level', None)
+        skipna = getattr(self, 'skipna', None)
         numeric_only = getattr(self, 'numeric_only', None)
         self._axis = axis = validate_axis(axis, df)
         # TODO: enable specify level if we support groupby
         if level is not None:
             raise NotImplementedError('Not support specify level now')
 
         empty_df = build_empty_df(df.dtypes)
-        reduced_df = getattr(empty_df, getattr(self, '_func_name'))(axis=axis, level=level,
-                                                                    numeric_only=numeric_only)
+        func_name = getattr(self, '_func_name')
+        if func_name == 'count':
+            reduced_df = getattr(empty_df, func_name)(axis=axis, level=level, numeric_only=numeric_only)
+        else:
+            reduced_df = getattr(empty_df, func_name)(axis=axis, level=level, skipna=skipna,
+                                                      numeric_only=numeric_only)
         reduced_shape = (df.shape[0],) if axis == 1 else reduced_df.shape
         return self.new_series([df], shape=reduced_shape, dtype=reduced_df.dtype,
-                               index_value=parse_index(reduced_df.index))
+                               index_value=parse_index(reduced_df.index, store_data=axis == 0))
 
     def _call_series(self, series):
         level = getattr(self, 'level', None)
         axis = getattr(self, 'axis', None)
+        skipna = getattr(self, 'skipna', None)
+        numeric_only = getattr(self, 'numeric_only', None)
         if axis == 'index':
             axis = 0
         self._axis = axis
         # TODO: enable specify level if we support groupby
         if level is not None:
             raise NotImplementedError('Not support specified level now')
 
-        return self.new_scalar([series], dtype=series.dtype)
+        empty_series = build_empty_series(series.dtype)
+        func_name = getattr(self, '_func_name')
+        if func_name == 'count':
+            reduced_series = empty_series.count(level=level)
+        else:
+            reduced_series = getattr(empty_series, func_name)(axis=axis, level=level, skipna=skipna,
+                                                              numeric_only=numeric_only)
+
+        return self.new_scalar([series], dtype=np.array(reduced_series).dtype)
 
     def __call__(self, a):
         if isinstance(a, DATAFRAME_TYPE):
@@ -438,7 +459,7 @@ class DataFrameCumReductionMixin(DataFrameOperandMixin):
     @classmethod
     def _tile_one_chunk(cls, op):
         df = op.outputs[0]
-        params = df.params
+        params = df.params.copy()
 
         chk = op.inputs[0].chunks[0]
         chunk_params = {k: v for k, v in chk.params.items()
@@ -525,7 +546,7 @@ def _tile_series(cls, op):
         new_op = op.copy().reset_key()
         return new_op.new_tileables(op.inputs, shape=in_series.shape, nsplits=in_series.nsplits,
                                     chunks=output_chunks, dtype=series.dtype,
-                                    index_value=series.index_value)
+                                    index_value=series.index_value, name=series.name)
 
     @classmethod
     def tile(cls, op):
Original file line number	Diff line number	Diff line change
`@@ -579,6 +579,7 @@ def _gen_dataframe_chunks(splits, out_shape, left_or_right, df):`
`579`	`579`	`chunk_kw = {`
`580`	`580`	`'index_value': chunk.index_value if splits[0].isdummy() else None,`
`581`	`581`	`'columns_value': chunk.columns_value if splits[1].isdummy() else None,`
	`582`	`+ 'dtypes': chunk.dtypes if splits[1].isdummy() else None`
`582`	`583`	`}`
`583`	`584`	`align_op = DataFrameIndexAlign(`
`584`	`585`	`stage=OperandStage.map, index_min_max=index_min_max,`