Skip to content

Commit acc7531

Browse files
committed
Avoid unnecessary copies.
1. Don't call astype unless we need to. 2. Don't sort group order if already sorted. This uncovered a bug with nan_sentinel with one larger than needed. Fixed that too.
1 parent 6e1e93a commit acc7531

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

flox/core.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ def factorize_(by: Tuple, axis, expected_groups: Tuple = None, isbin: Tuple = No
356356
# so we'll add use (ngroups+1) as the sentinel
357357
# note we cannot simply remove the NaN locations;
358358
# that would mess up argmax, argmin
359-
nan_sentinel = size + 1 if offset_group else ngroups + 1
359+
nan_sentinel = size if offset_group else ngroups
360360
group_idx[group_idx == -1] = nan_sentinel
361361

362362
props = FactorProps(offset_group, nan_sentinel)
@@ -512,6 +512,9 @@ def chunk_reduce(
512512
results["groups"] = np.array([np.nan])
513513
else:
514514
sortidx = np.argsort(groups)
515+
if np.all(sortidx == np.arange(len(sortidx))):
516+
# already sorted, avoid the copy.
517+
sortidx = slice(None)
515518
results["groups"] = groups[sortidx]
516519

517520
final_array_shape += results["groups"].shape
@@ -546,7 +549,7 @@ def chunk_reduce(
546549
dtype=final_dtype,
547550
**kw,
548551
)
549-
if final_dtype is not None:
552+
if final_dtype is not None and result.dtype != final_dtype:
550553
result = result.astype(final_dtype)
551554
if np.any(~mask):
552555
# remove NaN group label which should be last

0 commit comments

Comments
 (0)