@@ -140,7 +140,7 @@ def test_groupby_reduce(
140
140
elif func == "sum" :
141
141
expected_result = np .array (expected , dtype = dtype )
142
142
elif func == "count" :
143
- expected_result = np .array (expected , dtype = np .int64 )
143
+ expected_result = np .array (expected , dtype = np .intp )
144
144
145
145
result , groups , = groupby_reduce (
146
146
array ,
@@ -284,7 +284,7 @@ def test_groupby_reduce_count():
284
284
array = np .array ([0 , 0 , np .nan , np .nan , np .nan , 1 , 1 ])
285
285
labels = np .array (["a" , "b" , "b" , "b" , "c" , "c" , "c" ])
286
286
result , _ = groupby_reduce (array , labels , func = "count" )
287
- assert_equal (result , np .array ([1 , 1 , 2 ], dtype = np .int64 ))
287
+ assert_equal (result , np .array ([1 , 1 , 2 ], dtype = np .intp ))
288
288
289
289
290
290
def test_func_is_aggregation ():
@@ -408,29 +408,29 @@ def test_numpy_reduce_axis_subset(engine):
408
408
array = np .ones_like (by , dtype = np .int64 )
409
409
kwargs = dict (func = "count" , engine = engine , fill_value = 0 )
410
410
result , _ = groupby_reduce (array , by , ** kwargs , axis = 1 )
411
- assert_equal (result , np .array ([[2 , 3 ], [2 , 3 ]], dtype = np .int64 ))
411
+ assert_equal (result , np .array ([[2 , 3 ], [2 , 3 ]], dtype = np .intp ))
412
412
413
413
by = np .broadcast_to (labels2d , (3 , * labels2d .shape ))
414
414
array = np .ones_like (by )
415
415
result , _ = groupby_reduce (array , by , ** kwargs , axis = 1 )
416
- subarr = np .array ([[1 , 1 ], [1 , 1 ], [0 , 2 ], [1 , 1 ], [1 , 1 ]], dtype = np .int64 )
416
+ subarr = np .array ([[1 , 1 ], [1 , 1 ], [0 , 2 ], [1 , 1 ], [1 , 1 ]], dtype = np .intp )
417
417
expected = np .tile (subarr , (3 , 1 , 1 ))
418
418
assert_equal (result , expected )
419
419
420
420
result , _ = groupby_reduce (array , by , ** kwargs , axis = 2 )
421
- subarr = np .array ([[2 , 3 ], [2 , 3 ]], dtype = np .int64 )
421
+ subarr = np .array ([[2 , 3 ], [2 , 3 ]], dtype = np .intp )
422
422
expected = np .tile (subarr , (3 , 1 , 1 ))
423
423
assert_equal (result , expected )
424
424
425
425
result , _ = groupby_reduce (array , by , ** kwargs , axis = (1 , 2 ))
426
- expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np .int64 )
426
+ expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np .intp )
427
427
assert_equal (result , expected )
428
428
429
429
result , _ = groupby_reduce (array , by , ** kwargs , axis = (2 , 1 ))
430
430
assert_equal (result , expected )
431
431
432
432
result , _ = groupby_reduce (array , by [0 , ...], ** kwargs , axis = (1 , 2 ))
433
- expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np .int64 )
433
+ expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np .intp )
434
434
assert_equal (result , expected )
435
435
436
436
@@ -672,7 +672,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
672
672
engine = engine ,
673
673
method = method ,
674
674
)
675
- expected = np .array ([3 , 1 , 0 ], dtype = np .int64 )
675
+ expected = np .array ([3 , 1 , 0 ], dtype = np .intp )
676
676
for left , right in zip (groups , pd .IntervalIndex .from_arrays ([1 , 2 , 4 ], [2 , 4 , 5 ]).to_numpy ()):
677
677
assert left == right
678
678
assert_equal (actual , expected )
@@ -955,7 +955,7 @@ def test_group_by_datetime(engine, method):
955
955
956
956
957
957
def test_factorize_values_outside_bins ():
958
-
958
+ # pd.factorize returns intp
959
959
vals = factorize_ (
960
960
(np .arange (10 ).reshape (5 , 2 ), np .arange (10 ).reshape (5 , 2 )),
961
961
axis = (0 , 1 ),
@@ -967,7 +967,7 @@ def test_factorize_values_outside_bins():
967
967
fastpath = True ,
968
968
)
969
969
actual = vals [0 ]
970
- expected = np .array ([[- 1 , - 1 ], [- 1 , 0 ], [6 , 12 ], [18 , 24 ], [- 1 , - 1 ]], np .int64 )
970
+ expected = np .array ([[- 1 , - 1 ], [- 1 , 0 ], [6 , 12 ], [18 , 24 ], [- 1 , - 1 ]], np .intp )
971
971
assert_equal (expected , actual )
972
972
973
973
@@ -991,7 +991,8 @@ def test_multiple_groupers_bins(chunk) -> None:
991
991
),
992
992
func = "count" ,
993
993
)
994
- expected = np .eye (5 , 5 , dtype = np .int64 )
994
+ # output from `count` is intp
995
+ expected = np .eye (5 , 5 , dtype = np .intp )
995
996
assert_equal (expected , actual )
996
997
997
998
@@ -1020,7 +1021,8 @@ def test_multiple_groupers(chunk, by1, by2, expected_groups) -> None:
1020
1021
if chunk :
1021
1022
by2 = dask .array .from_array (by2 )
1022
1023
1023
- expected = np .ones ((5 , 2 ), dtype = np .int64 )
1024
+ # output from `count` is intp
1025
+ expected = np .ones ((5 , 2 ), dtype = np .intp )
1024
1026
actual , * _ = groupby_reduce (
1025
1027
array , by1 , by2 , axis = (0 , 1 ), func = "count" , expected_groups = expected_groups
1026
1028
)
@@ -1059,45 +1061,47 @@ def test_validate_expected_groups_not_none_dask() -> None:
1059
1061
1060
1062
1061
1063
def test_factorize_reindex_sorting_strings ():
1064
+ # pd.factorize seems to return intp so int32 on 32bit arch
1062
1065
kwargs = dict (
1063
1066
by = (np .array (["El-Nino" , "La-Nina" , "boo" , "Neutral" ]),),
1064
1067
axis = - 1 ,
1065
1068
expected_groups = (np .array (["El-Nino" , "Neutral" , "foo" , "La-Nina" ]),),
1066
1069
)
1067
1070
1068
1071
expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
1069
- assert_equal (expected , np .array ([0 , 1 , 4 , 2 ], dtype = np .int64 ))
1072
+ assert_equal (expected , np .array ([0 , 1 , 4 , 2 ], dtype = np .intp ))
1070
1073
1071
1074
expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
1072
- assert_equal (expected , np .array ([0 , 3 , 4 , 1 ], dtype = np .int64 ))
1075
+ assert_equal (expected , np .array ([0 , 3 , 4 , 1 ], dtype = np .intp ))
1073
1076
1074
1077
expected = factorize_ (** kwargs , reindex = False , sort = False )[0 ]
1075
- assert_equal (expected , np .array ([0 , 1 , 2 , 3 ], dtype = np .int64 ))
1078
+ assert_equal (expected , np .array ([0 , 1 , 2 , 3 ], dtype = np .intp ))
1076
1079
1077
1080
expected = factorize_ (** kwargs , reindex = False , sort = True )[0 ]
1078
- assert_equal (expected , np .array ([0 , 1 , 3 , 2 ], dtype = np .int64 ))
1081
+ assert_equal (expected , np .array ([0 , 1 , 3 , 2 ], dtype = np .intp ))
1079
1082
1080
1083
1081
1084
def test_factorize_reindex_sorting_ints ():
1085
+ # pd.factorize seems to return intp so int32 on 32bit arch
1082
1086
kwargs = dict (
1083
1087
by = (np .array ([- 10 , 1 , 10 , 2 , 3 , 5 ]),),
1084
1088
axis = - 1 ,
1085
1089
expected_groups = (np .array ([0 , 1 , 2 , 3 , 4 , 5 ], np .int64 ),),
1086
1090
)
1087
1091
1088
1092
expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
1089
- assert_equal (expected , np .array ([6 , 1 , 6 , 2 , 3 , 5 ], dtype = np .int64 ))
1093
+ assert_equal (expected , np .array ([6 , 1 , 6 , 2 , 3 , 5 ], dtype = np .intp ))
1090
1094
1091
1095
expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
1092
- assert_equal (expected , np .array ([6 , 1 , 6 , 2 , 3 , 5 ], dtype = np .int64 ))
1096
+ assert_equal (expected , np .array ([6 , 1 , 6 , 2 , 3 , 5 ], dtype = np .intp ))
1093
1097
1094
1098
kwargs ["expected_groups" ] = (np .arange (5 , - 1 , - 1 ),)
1095
1099
1096
1100
expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
1097
- assert_equal (expected , np .array ([6 , 1 , 6 , 2 , 3 , 5 ], dtype = np .int64 ))
1101
+ assert_equal (expected , np .array ([6 , 1 , 6 , 2 , 3 , 5 ], dtype = np .intp ))
1098
1102
1099
1103
expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
1100
- assert_equal (expected , np .array ([6 , 4 , 6 , 3 , 2 , 0 ], dtype = np .int64 ))
1104
+ assert_equal (expected , np .array ([6 , 4 , 6 , 3 , 2 , 0 ], dtype = np .intp ))
1101
1105
1102
1106
1103
1107
@requires_dask
0 commit comments