Skip to content

Commit ff4480a

Browse files
dcheriansebastic
andauthored
32bit support: int64 to intp for count (#201)
Use intp as expected dtype for count tests. Co-authored-by: Bas Couwenberg <[email protected]>
1 parent b148724 commit ff4480a

File tree

1 file changed

+24
-20
lines changed

1 file changed

+24
-20
lines changed

tests/test_core.py

+24-20
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def test_groupby_reduce(
140140
elif func == "sum":
141141
expected_result = np.array(expected, dtype=dtype)
142142
elif func == "count":
143-
expected_result = np.array(expected, dtype=np.int64)
143+
expected_result = np.array(expected, dtype=np.intp)
144144

145145
result, groups, = groupby_reduce(
146146
array,
@@ -284,7 +284,7 @@ def test_groupby_reduce_count():
284284
array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1])
285285
labels = np.array(["a", "b", "b", "b", "c", "c", "c"])
286286
result, _ = groupby_reduce(array, labels, func="count")
287-
assert_equal(result, np.array([1, 1, 2], dtype=np.int64))
287+
assert_equal(result, np.array([1, 1, 2], dtype=np.intp))
288288

289289

290290
def test_func_is_aggregation():
@@ -408,29 +408,29 @@ def test_numpy_reduce_axis_subset(engine):
408408
array = np.ones_like(by, dtype=np.int64)
409409
kwargs = dict(func="count", engine=engine, fill_value=0)
410410
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
411-
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))
411+
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.intp))
412412

413413
by = np.broadcast_to(labels2d, (3, *labels2d.shape))
414414
array = np.ones_like(by)
415415
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
416-
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]], dtype=np.int64)
416+
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]], dtype=np.intp)
417417
expected = np.tile(subarr, (3, 1, 1))
418418
assert_equal(result, expected)
419419

420420
result, _ = groupby_reduce(array, by, **kwargs, axis=2)
421-
subarr = np.array([[2, 3], [2, 3]], dtype=np.int64)
421+
subarr = np.array([[2, 3], [2, 3]], dtype=np.intp)
422422
expected = np.tile(subarr, (3, 1, 1))
423423
assert_equal(result, expected)
424424

425425
result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2))
426-
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
426+
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.intp)
427427
assert_equal(result, expected)
428428

429429
result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1))
430430
assert_equal(result, expected)
431431

432432
result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2))
433-
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
433+
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.intp)
434434
assert_equal(result, expected)
435435

436436

@@ -672,7 +672,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
672672
engine=engine,
673673
method=method,
674674
)
675-
expected = np.array([3, 1, 0], dtype=np.int64)
675+
expected = np.array([3, 1, 0], dtype=np.intp)
676676
for left, right in zip(groups, pd.IntervalIndex.from_arrays([1, 2, 4], [2, 4, 5]).to_numpy()):
677677
assert left == right
678678
assert_equal(actual, expected)
@@ -955,7 +955,7 @@ def test_group_by_datetime(engine, method):
955955

956956

957957
def test_factorize_values_outside_bins():
958-
958+
# pd.factorize returns intp
959959
vals = factorize_(
960960
(np.arange(10).reshape(5, 2), np.arange(10).reshape(5, 2)),
961961
axis=(0, 1),
@@ -967,7 +967,7 @@ def test_factorize_values_outside_bins():
967967
fastpath=True,
968968
)
969969
actual = vals[0]
970-
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]], np.int64)
970+
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]], np.intp)
971971
assert_equal(expected, actual)
972972

973973

@@ -991,7 +991,8 @@ def test_multiple_groupers_bins(chunk) -> None:
991991
),
992992
func="count",
993993
)
994-
expected = np.eye(5, 5, dtype=np.int64)
994+
# output from `count` is intp
995+
expected = np.eye(5, 5, dtype=np.intp)
995996
assert_equal(expected, actual)
996997

997998

@@ -1020,7 +1021,8 @@ def test_multiple_groupers(chunk, by1, by2, expected_groups) -> None:
10201021
if chunk:
10211022
by2 = dask.array.from_array(by2)
10221023

1023-
expected = np.ones((5, 2), dtype=np.int64)
1024+
# output from `count` is intp
1025+
expected = np.ones((5, 2), dtype=np.intp)
10241026
actual, *_ = groupby_reduce(
10251027
array, by1, by2, axis=(0, 1), func="count", expected_groups=expected_groups
10261028
)
@@ -1059,45 +1061,47 @@ def test_validate_expected_groups_not_none_dask() -> None:
10591061

10601062

10611063
def test_factorize_reindex_sorting_strings():
1064+
# pd.factorize seems to return intp so int32 on 32bit arch
10621065
kwargs = dict(
10631066
by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),),
10641067
axis=-1,
10651068
expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),),
10661069
)
10671070

10681071
expected = factorize_(**kwargs, reindex=True, sort=True)[0]
1069-
assert_equal(expected, np.array([0, 1, 4, 2], dtype=np.int64))
1072+
assert_equal(expected, np.array([0, 1, 4, 2], dtype=np.intp))
10701073

10711074
expected = factorize_(**kwargs, reindex=True, sort=False)[0]
1072-
assert_equal(expected, np.array([0, 3, 4, 1], dtype=np.int64))
1075+
assert_equal(expected, np.array([0, 3, 4, 1], dtype=np.intp))
10731076

10741077
expected = factorize_(**kwargs, reindex=False, sort=False)[0]
1075-
assert_equal(expected, np.array([0, 1, 2, 3], dtype=np.int64))
1078+
assert_equal(expected, np.array([0, 1, 2, 3], dtype=np.intp))
10761079

10771080
expected = factorize_(**kwargs, reindex=False, sort=True)[0]
1078-
assert_equal(expected, np.array([0, 1, 3, 2], dtype=np.int64))
1081+
assert_equal(expected, np.array([0, 1, 3, 2], dtype=np.intp))
10791082

10801083

10811084
def test_factorize_reindex_sorting_ints():
1085+
# pd.factorize seems to return intp so int32 on 32bit arch
10821086
kwargs = dict(
10831087
by=(np.array([-10, 1, 10, 2, 3, 5]),),
10841088
axis=-1,
10851089
expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),),
10861090
)
10871091

10881092
expected = factorize_(**kwargs, reindex=True, sort=True)[0]
1089-
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))
1093+
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.intp))
10901094

10911095
expected = factorize_(**kwargs, reindex=True, sort=False)[0]
1092-
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))
1096+
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.intp))
10931097

10941098
kwargs["expected_groups"] = (np.arange(5, -1, -1),)
10951099

10961100
expected = factorize_(**kwargs, reindex=True, sort=True)[0]
1097-
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))
1101+
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.intp))
10981102

10991103
expected = factorize_(**kwargs, reindex=True, sort=False)[0]
1100-
assert_equal(expected, np.array([6, 4, 6, 3, 2, 0], dtype=np.int64))
1104+
assert_equal(expected, np.array([6, 4, 6, 3, 2, 0], dtype=np.intp))
11011105

11021106

11031107
@requires_dask

0 commit comments

Comments
 (0)