diff --git a/.release-notes/add-shuffle-option-to-ponytest.md b/.release-notes/add-shuffle-option-to-ponytest.md new file mode 100644 index 0000000000..29c7560107 --- /dev/null +++ b/.release-notes/add-shuffle-option-to-ponytest.md @@ -0,0 +1,19 @@ +## Add --shuffle option to PonyTest + +PonyTest now has a `--shuffle` option that randomizes the order tests are dispatched. This catches a class of bug that's invisible under fixed ordering: test B passes, but only because test A ran first and left behind some state. You won't find out until someone removes test A and something breaks in a way that's hard to trace. + +Use `--shuffle` for a random seed or `--shuffle=SEED` with a specific U64 seed for reproducibility. When shuffle is active, the seed is printed before any test output: + +``` +Test seed: 8675309 +``` + +Grab that seed from your CI log and pass it back to reproduce the exact ordering: + +``` +./my-tests --shuffle=8675309 +``` + +Shuffle applies to all scheduling modes. For CI environments that run tests sequentially to avoid resource contention, `--sequential --shuffle` is the recommended combination: stable runs without flakiness, and each run uses a different seed so test coupling surfaces over time instead of hiding forever. + +`--list --shuffle=SEED` shows the test names in the order that seed would produce, so you can preview orderings without running anything. diff --git a/packages/pony_test/_test.pony b/packages/pony_test/_test.pony new file mode 100644 index 0000000000..e23d45f465 --- /dev/null +++ b/packages/pony_test/_test.pony @@ -0,0 +1,255 @@ +actor \nodoc\ Main is TestList + new create(env: Env) => PonyTest(env, this) + new make() => None + + fun tag tests(test: PonyTest) => + test(_TestListPreservesOrder) + test(_TestShuffleVariesAcrossSeeds) + test(_TestListShuffleSeedZero) + +class \nodoc\ iso _TestListPreservesOrder is UnitTest + """ + --list without --shuffle prints test names in registration order. + """ + fun name(): String => "pony_test/list/preserves_order" + + fun apply(h: TestHelper) => + h.long_test(2_000_000_000) + let list = object tag is TestList + fun tag tests(test: PonyTest) => + test(_NamedTest("A")) + test(_NamedTest("B")) + test(_NamedTest("C")) + test(_NamedTest("D")) + test(_NamedTest("E")) + end + let expected = recover val ["A"; "B"; "C"; "D"; "E"] end + _RunList(h, ["test"; "--list"], list, expected) + +class \nodoc\ iso _TestShuffleVariesAcrossSeeds is UnitTest + """ + Across 10 different seeds, the shuffled test order varies. Each seed is run + through the full PonyTest code path (argument parsing, buffered dispatch, + shuffle, output) and the resulting orderings are collected. The test passes + when at least two orderings differ. + """ + fun name(): String => "pony_test/shuffle/varies_across_seeds" + + fun apply(h: TestHelper) => + h.long_test(5_000_000_000) + let num_tests: USize = 10 + let num_seeds: USize = 10 + let collector = _MultiSeedCollector(h, num_seeds, num_tests) + var seed: U64 = 1 + while seed <= num_seeds.u64() do + let list = object tag is TestList + fun tag tests(test: PonyTest) => + test(_NamedTest("A")) + test(_NamedTest("B")) + test(_NamedTest("C")) + test(_NamedTest("D")) + test(_NamedTest("E")) + test(_NamedTest("F")) + test(_NamedTest("G")) + test(_NamedTest("H")) + test(_NamedTest("I")) + test(_NamedTest("J")) + end + let args = recover val + ["test"; "--list"; "--shuffle=" + seed.string()] + end + let out = _PerSeedCollector(collector, num_tests + 1) + _RunListWith(h, args, list, out) + seed = seed + 1 + end + +class \nodoc\ iso _TestListShuffleSeedZero is UnitTest + """ + Seed 0 is valid and not confused with "no seed provided". + """ + fun name(): String => "pony_test/list/shuffle_seed_zero" + + fun apply(h: TestHelper) => + h.long_test(2_000_000_000) + let list = object tag is TestList + fun tag tests(test: PonyTest) => + test(_NamedTest("A")) + test(_NamedTest("B")) + test(_NamedTest("C")) + test(_NamedTest("D")) + test(_NamedTest("E")) + end + let expected = recover val + ["Test seed: 0"; "E"; "A"; "C"; "D"; "B"] + end + _RunList(h, ["test"; "--list"; "--shuffle=0"], list, expected) + +// --------------------------------------------------------------------------- +// Test infrastructure +// --------------------------------------------------------------------------- + +primitive \nodoc\ _RunList + """ + Create a PonyTest in --list mode with controlled args and verify its output. + """ + fun apply( + h: TestHelper, + args: Array[String] val, + list: TestList tag, + expected: Array[String] val) + => + let collector = _OutputCollector(h, expected, 1) + _RunListWith(h, args, list, collector) + +primitive \nodoc\ _RunListWith + """ + Create a PonyTest in --list mode, sending output to the given collector. + """ + fun apply(h: TestHelper, args: Array[String] val, list: TestList tag, + collector: OutStream) + => + let env = Env.create( + h.env.root, + h.env.input, + collector, + h.env.err, + args, + h.env.vars, + {(code: I32) => None}) + PonyTest(env, list) + +class \nodoc\ iso _NamedTest is UnitTest + """ + A trivially-passing test with a configurable name. + """ + let _name: String + new iso create(name': String) => _name = name' + fun name(): String => _name + fun apply(h: TestHelper) => None + +actor \nodoc\ _OutputCollector is OutStream + """ + Captures print output from a PonyTest instance and verifies it against + expected lines. Supports multiple runs: _runs_remaining counts how many + complete sets of expected output must be received before signaling + completion. Each run must produce output identical to _expected. + """ + let _h: TestHelper + let _expected: Array[String] val + var _runs_remaining: USize + embed _received: Array[String] = Array[String] + + new create(h: TestHelper, expected: Array[String] val, + runs: USize = 1) + => + _h = h + _expected = expected + _runs_remaining = runs + + be print(data: ByteSeq) => + match data + | let s: String => _received.push(s) + | let a: Array[U8] val => _received.push(String.from_array(a)) + end + if _received.size() == _expected.size() then + _check_and_maybe_complete() + end + + fun ref _check_and_maybe_complete() => + _h.assert_array_eq[String](_expected, _received) + _received.clear() + _runs_remaining = _runs_remaining - 1 + if _runs_remaining == 0 then + _h.complete(true) + end + + be write(data: ByteSeq) => None + be printv(data: ByteSeqIter) => None + be writev(data: ByteSeqIter) => None + be flush() => None + +actor \nodoc\ _MultiSeedCollector + """ + Collects shuffled test orders from multiple PonyTest runs (one per seed) + and verifies that at least two different orderings were produced. + """ + let _h: TestHelper + let _total: USize + let _num_tests: USize + embed _orders: Array[Array[String] val] = Array[Array[String] val] + + new create(h: TestHelper, total: USize, num_tests: USize) => + _h = h + _total = total + _num_tests = num_tests + + be receive(order: Array[String] val) => + _orders.push(order) + if _orders.size() == _total then + _verify() + end + + fun ref _verify() => + var found_different = false + try + let first = _orders(0)? + var i: USize = 1 + while i < _orders.size() do + let other = _orders(i)? + if not _arrays_equal(first, other) then + found_different = true + break + end + i = i + 1 + end + end + _h.assert_true(found_different, + "All 10 seeds produced the same test order") + _h.complete(true) + + fun _arrays_equal(a: Array[String] val, b: Array[String] val): Bool => + if a.size() != b.size() then return false end + try + var i: USize = 0 + while i < a.size() do + if a(i)? != b(i)? then return false end + i = i + 1 + end + else + return false + end + true + +actor \nodoc\ _PerSeedCollector is OutStream + """ + Captures output from a single --list --shuffle=SEED run. After receiving + all expected lines, strips the seed line and sends just the test name + ordering to the parent _MultiSeedCollector. + """ + let _parent: _MultiSeedCollector + let _expected_lines: USize + embed _received: Array[String] = Array[String] + + new create(parent: _MultiSeedCollector, expected_lines: USize) => + _parent = parent + _expected_lines = expected_lines + + be print(data: ByteSeq) => + match data + | let s: String => _received.push(s) + | let a: Array[U8] val => _received.push(String.from_array(a)) + end + if _received.size() == _expected_lines then + let order: Array[String] iso = recover iso Array[String] end + var i: USize = 1 // skip "Test seed: N" line + while i < _received.size() do + try order.push(_received(i)?) end + i = i + 1 + end + _parent.receive(consume order) + end + + be write(data: ByteSeq) => None + be printv(data: ByteSeqIter) => None + be writev(data: ByteSeqIter) => None + be flush() => None diff --git a/packages/pony_test/pony_test.pony b/packages/pony_test/pony_test.pony index 217927a860..d61f2aedc9 100644 --- a/packages/pony_test/pony_test.pony +++ b/packages/pony_test/pony_test.pony @@ -11,6 +11,7 @@ other packages as possible. Currently the required packages are: * builtin * time * collections +* random Each unit test is a class, with a single test function. By default all tests run concurrently. @@ -181,6 +182,31 @@ class iso _I8AddTest is UnitTest ``` +## Shuffle + +By default tests are dispatched in registration order. The `--shuffle` option +randomizes the order, which helps detect hidden dependencies between tests. A +test that only passes because another test ran first will eventually fail under +shuffled ordering. + +Use `--shuffle` to generate a random seed, or `--shuffle=SEED` to use a +specific U64 seed for reproducibility. When shuffle is active, the seed is +printed before any test output: + +``` +Test seed: 8675309 +``` + +To reproduce a failure, pass the seed back: `--shuffle=8675309`. The same seed +always produces the same test ordering. + +For CI environments that need sequential execution, `--sequential --shuffle` is +the recommended combination: you get stable runs without resource contention, +and each run uses a different seed so test coupling surfaces over time. + +`--list --shuffle=SEED` prints the test names in the shuffled order that the +given seed would produce. + ## Setting up and tearing down a test environment ### Set Up @@ -240,9 +266,34 @@ class iso TempDirTest """ +use "random" use "time" use @ponyint_assert_disable_popups[None]() +primitive _InOrder + """ + Tests are dispatched in registration order (the default). + """ + +class _Shuffled + """ + Tests are dispatched in a randomized order derived from the given seed. + """ + let seed: U64 + new create(seed': U64) => seed = seed' + + fun apply[A](array: Array[A]) => + """ + Shuffle the given array using this instance's seed. The same seed always + produces the same permutation for the same input. + """ + Rand.from_u64(seed).shuffle[A](array) + +type _TestOrdering is (_InOrder | _Shuffled) + """ + Controls the order in which tests are dispatched to their groups. + """ + actor PonyTest """ Main test framework actor that organises tests, collates information and @@ -251,8 +302,11 @@ actor PonyTest embed _groups: Array[(String, _Group)] = Array[(String, _Group)] embed _records: Array[_TestRecord] = Array[_TestRecord] + embed _pending: Array[(_TestRunner, _Group)] = Array[(_TestRunner, _Group)] + embed _list_names: Array[String] = Array[String] let _env: Env let _timers: Timers = Timers + var _ordering: _TestOrdering = _InOrder var _do_nothing: Bool = false var _verbose: Bool = false var _sequential: Bool = false @@ -308,8 +362,7 @@ actor PonyTest _any_found = true if _list_only then - // Don't actually run tests, just list them - _env.out.print(name) + _list_names.push(name) return end @@ -317,8 +370,9 @@ actor PonyTest _records.push(_TestRecord(_env, name)) var group = _find_group(test.exclusion_group()) - group(_TestRunner(this, index, consume test, group, _verbose, _env, - _timers)) + let runner = _TestRunner(this, index, consume test, group, _verbose, _env, + _timers) + _pending.push((runner, group)) fun ref _find_group(group_name: String): _Group => """ @@ -401,10 +455,31 @@ actor PonyTest end if _list_only then - // No tests to run + match _ordering + | let s: _Shuffled => + _env.out.print("Test seed: " + s.seed.string()) + s.apply[String](_list_names) + end + for name in _list_names.values() do + _env.out.print(name) + end return end + // Shuffle pending tests if requested, then dispatch all to their groups. + // Buffered dispatch is always used: without shuffle, dispatch order matches + // registration order. This works because Pony's FIFO message ordering + // guarantees all apply messages arrive before _all_tests_applied. + match _ordering + | let s: _Shuffled => + _env.out.print("Test seed: " + s.seed.string()) + s.apply[(_TestRunner, _Group)](_pending) + end + + for (runner, group) in _pending.values() do + group(runner) + end + _all_started = true if _finished == _records.size() then // All tests have completed @@ -440,6 +515,16 @@ actor PonyTest _label = arg.substring(8) elseif arg.compare_sub("--only=", 7) is Equal then _only = arg.substring(7) + elseif arg == "--shuffle" then + _ordering = _Shuffled(Time.cycles()) + elseif arg.compare_sub("--shuffle=", 10) is Equal then + try + _ordering = _Shuffled(arg.substring(10).u64()?) + else + _env.out.print("Invalid shuffle seed: " + arg.substring(10)) + _do_nothing = true + return + end else _env.out.print("Unrecognised argument \"" + arg + "\"") _env.out.print("") @@ -456,6 +541,8 @@ actor PonyTest _env.out.print(" --noprog - Do not print progress messages.") _env.out.print(" --list - List but do not run tests.") _env.out.print(" --label=label - Only run tests with given label") + _env.out.print(" --shuffle[=seed] - Run tests in a random order, " + + "optionally specifying a U64 seed.") _do_nothing = true return end diff --git a/packages/stdlib/_test.pony b/packages/stdlib/_test.pony index 8999de8c6d..c53dca1efd 100644 --- a/packages/stdlib/_test.pony +++ b/packages/stdlib/_test.pony @@ -37,6 +37,7 @@ use math = "math" use net = "net" use pony_bench = "pony_bench" use pony_check = "pony_check" +use pony_test = "pony_test" use process = "process" use promises = "promises" use random = "random" @@ -72,6 +73,7 @@ actor \nodoc\ Main is TestList math.Main.make().tests(test) net.Main.make().tests(test) pony_check.Main.make().tests(test) + pony_test.Main.make().tests(test) process.Main.make().tests(test) promises.Main.make().tests(test) random.Main.make().tests(test)