Skip to content

Commit 6cf5cb6

Browse files
committed
punt on type preservation
1 parent 91d1135 commit 6cf5cb6

File tree

1 file changed

+11
-17
lines changed

1 file changed

+11
-17
lines changed

merlin/dag/executors.py

+11-17
Original file line numberDiff line numberDiff line change
@@ -389,32 +389,26 @@ def transform(
389389
if col_dtype:
390390
output_dtypes[col_name] = md.dtype(col_dtype).to_numpy
391391

392-
def empty_like(df, cols):
393-
# Construct an empty DataFrame with the same dtypes as df
394-
395-
# TODO: constructing meta like this can loose dtype information for
396-
# columns that are arbitrarily set to 'float64'. We should propagate
397-
# dtype information along with column names in the columngroup graph.
398-
# This currently only happens during intermediate 'fit' transforms,
399-
# so as long as statoperators don't require dtype information on the
400-
# DDF this doesn't matter all that much
392+
def make_empty(df, cols):
393+
# Construct an empty DataFrame
394+
395+
# TODO: constructing meta like this loses dtype information on the ddf
396+
# and sets it all to 'float64'. We should propagate dtype information along
397+
# with column names in the columngroup graph. This currently only
398+
# happens during intermediate 'fit' transforms, so as long as statoperators
399+
# don't require dtype information on the DDF this doesn't matter all that much
401400
return df._constructor(
402-
{
403-
col: df._constructor_sliced(
404-
[], dtype=df[col].dtype if col in df.columns else "float64"
405-
)
406-
for col in cols
407-
}
401+
{col: df._constructor_sliced([], dtype="float64") for col in cols}
408402
)
409403

410404
if isinstance(output_dtypes, dict) and isinstance(ddf._meta, pd.DataFrame):
411405
dtypes = output_dtypes
412-
output_dtypes = empty_like(ddf._meta, columns)
406+
output_dtypes = make_empty(ddf._meta, columns)
413407
for col_name, col_dtype in dtypes.items():
414408
output_dtypes[col_name] = output_dtypes[col_name].astype(col_dtype)
415409

416410
elif not output_dtypes:
417-
output_dtypes = empty_like(ddf._meta, columns)
411+
output_dtypes = make_empty(ddf._meta, columns)
418412

419413
return ensure_optimize_dataframe_graph(
420414
ddf=ddf.map_partitions(

0 commit comments

Comments
 (0)