Skip to content

Commit ffa9e24

Browse files
Merge pull request #10281 from RaimoNiskanen/raimo/stdlib/rand-shuffle OTP-19826
Random shuffle of lists
2 parents bda5e7d + a5dbc7b commit ffa9e24

File tree

2 files changed

+531
-68
lines changed

2 files changed

+531
-68
lines changed

lib/stdlib/src/rand.erl

Lines changed: 207 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,8 @@ the generator's range:
404404
uniform_real/0, uniform_real_s/1,
405405
bytes/1, bytes_s/2,
406406
jump/0, jump/1,
407-
normal/0, normal/2, normal_s/1, normal_s/3
407+
normal/0, normal/2, normal_s/1, normal_s/3,
408+
shuffle/1, shuffle_s/2
408409
]).
409410

410411
%% Utilities
@@ -1315,6 +1316,211 @@ normal_s(Mean, Variance, State0) when 0 =< Variance ->
13151316
{X, State} = normal_s(State0),
13161317
{Mean + (math:sqrt(Variance) * X), State}.
13171318

1319+
1320+
-doc """
1321+
Shuffle a list.
1322+
1323+
Like `shuffle_s/2` but operates on the state stored in
1324+
the process dictionary. Returns the shuffled list.
1325+
""".
1326+
-doc(#{group => <<"Plug-in framework API">>,since => <<"OTP 29.0">>}).
1327+
-spec shuffle(List :: list()) -> ShuffledList :: list().
1328+
shuffle(List) ->
1329+
{ShuffledList, State} = shuffle_s(List, seed_get()),
1330+
_ = seed_put(State),
1331+
ShuffledList.
1332+
1333+
-doc """
1334+
Shuffle a list.
1335+
1336+
From the specified `State` shuffles the elements in argument `List` so that,
1337+
given that the [PRNG algorithm](#algorithms) in `State` is perfect,
1338+
every possible permutation of the elements in the returned `ShuffledList`
1339+
has the same probability.
1340+
1341+
In other words, the quality of the shuffling depends only on
1342+
the quality of the backend [random number generator](#algorithms)
1343+
and [seed](`seed_s/1`). If a cryptographically unpredictable
1344+
shuffling is needed, use for example `crypto:rand_seed_alg_s/1`
1345+
to initialize the random number generator.
1346+
1347+
Returns the shuffled list [`ShuffledList`](`t:list/0`)
1348+
and the [`NewState`](`t:state/0`).
1349+
""".
1350+
-doc(#{group => <<"Plug-in framework API">>,since => <<"OTP 29.0">>}).
1351+
-spec shuffle_s(List, State) ->
1352+
{ShuffledList :: list(), NewState :: state()}
1353+
when
1354+
List :: list(),
1355+
State :: state().
1356+
shuffle_s(List, {#{bits:=_, next:=Next} = AlgHandler, R0})
1357+
when is_list(List) ->
1358+
WeakLowBits = maps:get(weak_low_bits, AlgHandler, 0),
1359+
[P0|S0] = shuffle_init_bitstream(R0, Next, WeakLowBits),
1360+
{ShuffledList, _P1, [R1|_]=_S1} = shuffle_r(List, [], P0, S0),
1361+
{ShuffledList, {AlgHandler, R1}};
1362+
shuffle_s(List, {#{max:=_, next:=Next} = AlgHandler, R0})
1363+
when is_list(List) ->
1364+
%% Old spec - assume 2 weak low bits
1365+
WeakLowBits = 2,
1366+
[P0|S0] = shuffle_init_bitstream(R0, Next, WeakLowBits),
1367+
{ShuffledList, _P1, [R1|_]=_S1} = shuffle_r(List, [], P0, S0),
1368+
{ShuffledList, {AlgHandler, R1}}.
1369+
1370+
%% Random-split-and-shuffle algorithm suggested by Richard A. O'Keefe
1371+
%% on ErlangForums, as I interpreted it... "basically a randomized
1372+
%% quicksort", shall we name it Quickshuffle?
1373+
%%
1374+
%% Randomly split the list in two lists, and recursively shuffle
1375+
%% the two smaller lists.
1376+
%%
1377+
%% How the algorithm works and why it is correct can be explained like this:
1378+
%%
1379+
%% The objective is, given a list of elements, to return a random
1380+
%% permutation of those elements so that every possible permutation
1381+
%% has the same probability to be returned.
1382+
%%
1383+
%% One of the two correct and bias free algorithms described on the Wikipedia
1384+
%% page for Fisher-Yates shuffling is to assign a random number to each
1385+
%% element in the list and order the elements according to the numbers.
1386+
%% For this to be correct the generated numbers must not have duplicates.
1387+
%%
1388+
%% This algorithm does that, but assigning a number and ordering
1389+
%% the elements is more or less the same step, which is taken
1390+
%% one binary bit at the time.
1391+
%%
1392+
%% It can be seen as, to each element, assign a fixpoint number
1393+
%% of infinite length starting with bit weight 1/2, continuing with 1/4,
1394+
%% and so on..., but reveal it incrementally.
1395+
%%
1396+
%% The list to shuffle is traversed, and a random bit is generated
1397+
%% for each element. If it is a 0, the element is assigned the zero bit
1398+
%% by moving it to the head of the list Zero, and if it is a 1,
1399+
%% to the head of the list One.
1400+
%%
1401+
%% Then the list Zero is recursively shuffled onto the accumulator list Acc,
1402+
%% after that the list One. By that all elements in Zero are ordered
1403+
%% before the ones in One, according to the generated numbers.
1404+
%% The order is actually not important as long as it is consistent.
1405+
%%
1406+
%% The algorithm recurses until the Zero or One list has length
1407+
%% 0 or 1, which is when the generated fixpoint number has no duplicate.
1408+
%% The fixpoint number in itself only exists in the guise of the
1409+
%% recursion call stack, that is whether an element belongs to list
1410+
%% Zero or One on each recursion level.
1411+
%% Here is the bare algorithm:
1412+
%%
1413+
%% quickshuffle([], Acc) -> Acc;
1414+
%% quickshuffle([X], Acc) -> [X | Acc];
1415+
%% quickshuffle([_|_] = L, Acc) ->
1416+
%% {Zero, One} = quickshuffle_split(L, [], []),
1417+
%% quickshuffle(One, quickshuffle(Zero, Acc)).
1418+
%%
1419+
%% quickshuffle_split([], Zero, One) ->
1420+
%% {Zero, One};
1421+
%% quickshuffle_split([X | L], Zero, One) ->
1422+
%% case random_bit() of
1423+
%% 0 -> quickshuffle_split(L, [X | Zero], One);
1424+
%% 1 -> quickshuffle_split(L, Zero, [X | One])
1425+
%% end.
1426+
%%
1427+
%% As an optimization, since the algorithm is equivalent to its objective
1428+
%% to randomly permute a list, we can when reaching a small enough list
1429+
%% as in 3 or 2 instead do an explicit random permutation of the list.
1430+
%%
1431+
%% The `random_bit()` can be generated with small overhead by generating
1432+
%% a random word and cache it, then shift out one bit at the time.
1433+
%%
1434+
%% Also, it is faster to do a 4-way split by 2 bits instead of,
1435+
%% as described above, a 2-way split by 1 bit.
1436+
1437+
%% Leaf cases - random permutations for 0..3 elements
1438+
shuffle_r([], Acc, P, S) ->
1439+
{Acc, P, S};
1440+
shuffle_r([X], Acc, P, S) ->
1441+
{[X | Acc], P, S};
1442+
shuffle_r([X, Y], Acc, P, S) ->
1443+
shuffle_r_2(X, Acc, P, S, Y);
1444+
shuffle_r([X, Y, Z], Acc, P, S) ->
1445+
shuffle_r_3(X, Acc, P, S, Y, Z);
1446+
%% General case - split and recursive shuffle
1447+
shuffle_r([_, _, _ | _] = List, Acc, P, S) ->
1448+
%% P and S is bitstream cache and state
1449+
shuffle_r(List, Acc, P, S, [], [], [], []).
1450+
%%
1451+
%% Split L into 4 random subsets
1452+
%%
1453+
shuffle_r([], Acc0, P0, S0, Zero, One, Two, Three) ->
1454+
%% Split done, recursively shuffle the splitted lists onto Acc
1455+
{Acc1, P1, S1} = shuffle_r(Zero, Acc0, P0, S0),
1456+
{Acc2, P2, S2} = shuffle_r(One, Acc1, P1, S1),
1457+
{Acc3, P3, S3} = shuffle_r(Two, Acc2, P2, S2),
1458+
shuffle_r(Three, Acc3, P3, S3);
1459+
shuffle_r([X | L], Acc, P0, S, Zero, One, Two, Three)
1460+
when is_integer(P0), 3 < P0, P0 < 1 bsl 57 ->
1461+
P1 = P0 bsr 2,
1462+
case P0 band 3 of
1463+
0 -> shuffle_r(L, Acc, P1, S, [X | Zero], One, Two, Three);
1464+
1 -> shuffle_r(L, Acc, P1, S, Zero, [X | One], Two, Three);
1465+
2 -> shuffle_r(L, Acc, P1, S, Zero, One, [X | Two], Three);
1466+
3 -> shuffle_r(L, Acc, P1, S, Zero, One, Two, [X | Three])
1467+
end;
1468+
shuffle_r([_ | _] = L, Acc, _P, S0, Zero, One, Two, Three) ->
1469+
[P|S1] = shuffle_new_bits(S0),
1470+
shuffle_r(L, Acc, P, S1, Zero, One, Two, Three).
1471+
1472+
%% Permute 2 elements
1473+
shuffle_r_2(X, Acc, P, S, Y)
1474+
when is_integer(P), 1 < P, P < 1 bsl 57 ->
1475+
{case P band 1 of
1476+
0 -> [Y, X | Acc];
1477+
1 -> [X, Y | Acc]
1478+
end, P bsr 1, S};
1479+
shuffle_r_2(X, Acc, _P, S0, Y) ->
1480+
[P|S1] = shuffle_new_bits(S0),
1481+
shuffle_r_2(X, Acc, P, S1, Y).
1482+
1483+
%% Permute 3 elements
1484+
%%
1485+
%% Uses 3 random bits per iteration with a probability of 1/4
1486+
%% to reject and retry, which on average is 3 * 4/3
1487+
%% (infinite sum of (1/4)^k) = 4 bits per permutation
1488+
shuffle_r_3(X, Acc, P0, S, Y, Z)
1489+
when is_integer(P0), 7 < P0, P0 < 1 bsl 57 ->
1490+
P1 = P0 bsr 3,
1491+
case P0 band 7 of
1492+
0 -> {[Z, Y, X | Acc], P1, S};
1493+
1 -> {[Y, Z, X | Acc], P1, S};
1494+
2 -> {[Z, X, Y | Acc], P1, S};
1495+
3 -> {[X, Z, Y | Acc], P1, S};
1496+
4 -> {[Y, X, Z | Acc], P1, S};
1497+
5 -> {[X, Y, Z | Acc], P1, S};
1498+
_ -> % Reject and retry
1499+
shuffle_r_3(X, Acc, P1, S, Y, Z)
1500+
end;
1501+
shuffle_r_3(X, Acc, _P, S0, Y, Z) ->
1502+
[P|S1] = shuffle_new_bits(S0),
1503+
shuffle_r_3(X, Acc, P, S1, Y, Z).
1504+
1505+
-dialyzer({no_improper_lists, shuffle_init_bitstream/3}).
1506+
%%
1507+
shuffle_init_bitstream(R, Next, WeakLowBits) ->
1508+
P = 1, % Marker for out of random bits
1509+
W = {Next,WeakLowBits}, % Generator
1510+
S = [R|W], % Generator state
1511+
[P|S]. % Bit cash and state
1512+
1513+
-dialyzer({no_improper_lists, shuffle_new_bits/1}).
1514+
%%
1515+
shuffle_new_bits([R0|{Next,WeakLowBits}=W]) ->
1516+
{V, R1} = Next(R0),
1517+
%% Setting the top bit M here provides the marker
1518+
%% for when we are out of random bits: P =:= 1
1519+
M = 1 bsl 56,
1520+
P = ((V bsr WeakLowBits) band (M-1)) bor M,
1521+
S = [R1|W],
1522+
[P|S].
1523+
13181524
%% =====================================================================
13191525
%% Internal functions
13201526

0 commit comments

Comments
 (0)