Skip to content

Commit 356906c

Browse files
authored
Add --shuffle option to PonyTest (#5076)
Randomizes test dispatch order to catch hidden dependencies between tests. Without --shuffle, tests dispatch in registration order as before (buffered dispatch is always used, but the order is preserved). Uses a _TestOrdering union type (_InOrder | _Shuffled) to represent the ordering mode, with the seed resolved at parse time. The shuffle itself uses Rand.from_u64(seed) with Fisher-Yates via the stdlib Random.shuffle method. --list --shuffle=SEED shows the shuffled order so users can preview what a given seed produces without running anything. Closes #5075
1 parent b32c839 commit 356906c

File tree

4 files changed

+367
-5
lines changed

4 files changed

+367
-5
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
## Add --shuffle option to PonyTest
2+
3+
PonyTest now has a `--shuffle` option that randomizes the order tests are dispatched. This catches a class of bug that's invisible under fixed ordering: test B passes, but only because test A ran first and left behind some state. You won't find out until someone removes test A and something breaks in a way that's hard to trace.
4+
5+
Use `--shuffle` for a random seed or `--shuffle=SEED` with a specific U64 seed for reproducibility. When shuffle is active, the seed is printed before any test output:
6+
7+
```
8+
Test seed: 8675309
9+
```
10+
11+
Grab that seed from your CI log and pass it back to reproduce the exact ordering:
12+
13+
```
14+
./my-tests --shuffle=8675309
15+
```
16+
17+
Shuffle applies to all scheduling modes. For CI environments that run tests sequentially to avoid resource contention, `--sequential --shuffle` is the recommended combination: stable runs without flakiness, and each run uses a different seed so test coupling surfaces over time instead of hiding forever.
18+
19+
`--list --shuffle=SEED` shows the test names in the order that seed would produce, so you can preview orderings without running anything.

packages/pony_test/_test.pony

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
actor \nodoc\ Main is TestList
2+
new create(env: Env) => PonyTest(env, this)
3+
new make() => None
4+
5+
fun tag tests(test: PonyTest) =>
6+
test(_TestListPreservesOrder)
7+
test(_TestShuffleVariesAcrossSeeds)
8+
test(_TestListShuffleSeedZero)
9+
10+
class \nodoc\ iso _TestListPreservesOrder is UnitTest
11+
"""
12+
--list without --shuffle prints test names in registration order.
13+
"""
14+
fun name(): String => "pony_test/list/preserves_order"
15+
16+
fun apply(h: TestHelper) =>
17+
h.long_test(2_000_000_000)
18+
let list = object tag is TestList
19+
fun tag tests(test: PonyTest) =>
20+
test(_NamedTest("A"))
21+
test(_NamedTest("B"))
22+
test(_NamedTest("C"))
23+
test(_NamedTest("D"))
24+
test(_NamedTest("E"))
25+
end
26+
let expected = recover val ["A"; "B"; "C"; "D"; "E"] end
27+
_RunList(h, ["test"; "--list"], list, expected)
28+
29+
class \nodoc\ iso _TestShuffleVariesAcrossSeeds is UnitTest
30+
"""
31+
Across 10 different seeds, the shuffled test order varies. Each seed is run
32+
through the full PonyTest code path (argument parsing, buffered dispatch,
33+
shuffle, output) and the resulting orderings are collected. The test passes
34+
when at least two orderings differ.
35+
"""
36+
fun name(): String => "pony_test/shuffle/varies_across_seeds"
37+
38+
fun apply(h: TestHelper) =>
39+
h.long_test(5_000_000_000)
40+
let num_tests: USize = 10
41+
let num_seeds: USize = 10
42+
let collector = _MultiSeedCollector(h, num_seeds)
43+
var seed: U64 = 1
44+
while seed <= num_seeds.u64() do
45+
let list = object tag is TestList
46+
fun tag tests(test: PonyTest) =>
47+
test(_NamedTest("A"))
48+
test(_NamedTest("B"))
49+
test(_NamedTest("C"))
50+
test(_NamedTest("D"))
51+
test(_NamedTest("E"))
52+
test(_NamedTest("F"))
53+
test(_NamedTest("G"))
54+
test(_NamedTest("H"))
55+
test(_NamedTest("I"))
56+
test(_NamedTest("J"))
57+
end
58+
let args = recover val
59+
["test"; "--list"; "--shuffle=" + seed.string()]
60+
end
61+
let out = _PerSeedCollector(collector, num_tests + 1)
62+
_RunListWith(h, args, list, out)
63+
seed = seed + 1
64+
end
65+
66+
class \nodoc\ iso _TestListShuffleSeedZero is UnitTest
67+
"""
68+
Seed 0 is valid and not confused with "no seed provided".
69+
"""
70+
fun name(): String => "pony_test/list/shuffle_seed_zero"
71+
72+
fun apply(h: TestHelper) =>
73+
h.long_test(2_000_000_000)
74+
let list = object tag is TestList
75+
fun tag tests(test: PonyTest) =>
76+
test(_NamedTest("A"))
77+
test(_NamedTest("B"))
78+
test(_NamedTest("C"))
79+
test(_NamedTest("D"))
80+
test(_NamedTest("E"))
81+
end
82+
let expected = recover val
83+
["Test seed: 0"; "E"; "A"; "C"; "D"; "B"]
84+
end
85+
_RunList(h, ["test"; "--list"; "--shuffle=0"], list, expected)
86+
87+
// ---------------------------------------------------------------------------
88+
// Test infrastructure
89+
// ---------------------------------------------------------------------------
90+
91+
primitive \nodoc\ _RunList
92+
"""
93+
Create a PonyTest in --list mode with controlled args and verify its output.
94+
"""
95+
fun apply(
96+
h: TestHelper,
97+
args: Array[String] val,
98+
list: TestList tag,
99+
expected: Array[String] val)
100+
=>
101+
let collector = _OutputCollector(h, expected, 1)
102+
_RunListWith(h, args, list, collector)
103+
104+
primitive \nodoc\ _RunListWith
105+
"""
106+
Create a PonyTest in --list mode, sending output to the given collector.
107+
"""
108+
fun apply(h: TestHelper, args: Array[String] val, list: TestList tag,
109+
collector: OutStream)
110+
=>
111+
let env = Env.create(
112+
h.env.root,
113+
h.env.input,
114+
collector,
115+
h.env.err,
116+
args,
117+
h.env.vars,
118+
{(code: I32) => None})
119+
PonyTest(env, list)
120+
121+
class \nodoc\ iso _NamedTest is UnitTest
122+
"""
123+
A trivially-passing test with a configurable name.
124+
"""
125+
let _name: String
126+
new iso create(name': String) => _name = name'
127+
fun name(): String => _name
128+
fun apply(h: TestHelper) => None
129+
130+
actor \nodoc\ _OutputCollector is OutStream
131+
"""
132+
Captures print output from a PonyTest instance and verifies it against
133+
expected lines. Supports multiple runs: _runs_remaining counts how many
134+
complete sets of expected output must be received before signaling
135+
completion. Each run must produce output identical to _expected.
136+
"""
137+
let _h: TestHelper
138+
let _expected: Array[String] val
139+
var _runs_remaining: USize
140+
embed _received: Array[String] = Array[String]
141+
142+
new create(h: TestHelper, expected: Array[String] val,
143+
runs: USize = 1)
144+
=>
145+
_h = h
146+
_expected = expected
147+
_runs_remaining = runs
148+
149+
be print(data: ByteSeq) =>
150+
if _runs_remaining == 0 then return end
151+
match data
152+
| let s: String => _received.push(s)
153+
| let a: Array[U8] val => _received.push(String.from_array(a))
154+
end
155+
if _received.size() == _expected.size() then
156+
_h.assert_array_eq[String](_expected, _received)
157+
_received.clear()
158+
_runs_remaining = _runs_remaining - 1
159+
if _runs_remaining == 0 then
160+
_h.complete(true)
161+
end
162+
end
163+
164+
be write(data: ByteSeq) => None
165+
be printv(data: ByteSeqIter) => None
166+
be writev(data: ByteSeqIter) => None
167+
be flush() => None
168+
169+
actor \nodoc\ _MultiSeedCollector
170+
"""
171+
Collects shuffled test orders from multiple PonyTest runs (one per seed)
172+
and verifies that at least two different orderings were produced.
173+
"""
174+
let _h: TestHelper
175+
let _total: USize
176+
embed _orders: Array[Array[String] val] = Array[Array[String] val]
177+
178+
new create(h: TestHelper, total: USize) =>
179+
_h = h
180+
_total = total
181+
182+
be receive(order: Array[String] val) =>
183+
_orders.push(order)
184+
if _orders.size() == _total then
185+
_verify()
186+
end
187+
188+
fun ref _verify() =>
189+
var found_different = false
190+
try
191+
let first = _orders(0)?
192+
var i: USize = 1
193+
while i < _orders.size() do
194+
let other = _orders(i)?
195+
if not _arrays_equal(first, other) then
196+
found_different = true
197+
break
198+
end
199+
i = i + 1
200+
end
201+
end
202+
_h.assert_true(found_different,
203+
"All 10 seeds produced the same test order")
204+
_h.complete(true)
205+
206+
fun _arrays_equal(a: Array[String] val, b: Array[String] val): Bool =>
207+
if a.size() != b.size() then return false end
208+
try
209+
var i: USize = 0
210+
while i < a.size() do
211+
if a(i)? != b(i)? then return false end
212+
i = i + 1
213+
end
214+
else
215+
return false
216+
end
217+
true
218+
219+
actor \nodoc\ _PerSeedCollector is OutStream
220+
"""
221+
Captures output from a single --list --shuffle=SEED run. After receiving
222+
all expected lines, strips the seed line and sends just the test name
223+
ordering to the parent _MultiSeedCollector.
224+
"""
225+
let _parent: _MultiSeedCollector
226+
let _expected_lines: USize
227+
var _done: Bool = false
228+
embed _received: Array[String] = Array[String]
229+
230+
new create(parent: _MultiSeedCollector, expected_lines: USize) =>
231+
_parent = parent
232+
_expected_lines = expected_lines
233+
234+
be print(data: ByteSeq) =>
235+
if _done then return end
236+
match data
237+
| let s: String => _received.push(s)
238+
| let a: Array[U8] val => _received.push(String.from_array(a))
239+
end
240+
if _received.size() == _expected_lines then
241+
_done = true
242+
let order: Array[String] iso = recover iso Array[String] end
243+
var i: USize = 1 // skip "Test seed: N" line
244+
while i < _received.size() do
245+
try order.push(_received(i)?) end
246+
i = i + 1
247+
end
248+
_parent.receive(consume order)
249+
end
250+
251+
be write(data: ByteSeq) => None
252+
be printv(data: ByteSeqIter) => None
253+
be writev(data: ByteSeqIter) => None
254+
be flush() => None

0 commit comments

Comments
 (0)