Skip to content

Annotate counterexample output of potentially flaky tests #1511

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,7 @@ prop_simple_real_pbft_convergence TestSetup
tabulate "Ref.PBFT result" [Ref.resultConstrName refResult] $
tabulate "proposed protocol version was adopted" [show aPvuRequired] $
tabulate "proposed software version was adopted" [show aSvuRequired] $
counterexample flakyTestCopy $
counterexample ("params: " <> show params) $
counterexample ("Ref.PBFT result: " <> show refResult) $
counterexample
Expand Down Expand Up @@ -1055,6 +1056,9 @@ prop_simple_real_pbft_convergence TestSetup
genesisSecrets :: Genesis.GeneratedSecrets
(genesisConfig, genesisSecrets) = generateGenesisConfig slotLength params

flakyTestCopy :: String
flakyTestCopy = "This test may be flaky, and its failure may not be indicative of an actual problem: see https://github.com/IntersectMBO/ouroboros-consensus/issues/1294 and / or https://github.com/IntersectMBO/ouroboros-consensus/issues/582"

byronForgeEbbEnv :: ForgeEbbEnv ByronBlock
byronForgeEbbEnv = ForgeEbbEnv
{ forgeEBB = Byron.forgeEBB . configBlock
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ prop_simple_cardano_convergence TestSetup
, setupTestConfig
, setupVersion
} =
counterexample flakyTestCopy $
prop_general_semisync pga testOutput .&&.
prop_inSync testOutput .&&.
prop_ReachesEra2 reachesEra2 .&&.
Expand Down Expand Up @@ -436,6 +437,9 @@ prop_simple_cardano_convergence TestSetup
counterexample "CP violation in final chains!" $
property $ unNonZero (maxRollbacks setupK) >= finalIntersectionDepth

flakyTestCopy :: String
flakyTestCopy = "This test may be flaky, and its failure may not be indicative of an actual problem: see https://github.com/IntersectMBO/ouroboros-consensus/issues/545"

mkProtocolCardanoAndHardForkTxs ::
forall c m. (IOLike m, c ~ StandardCrypto)
-- Byron
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ prop_simple_praos_convergence TestSetup
, setupTestConfig = testConfig
, setupEvolvingStake = evolvingStake
} =
counterexample flakyTestCopy $
counterexample (tracesToDot testOutputNodes) $
prop_general PropGeneralArgs
{ pgaBlockProperty = prop_validSimpleBlock
Expand Down Expand Up @@ -155,3 +156,5 @@ prop_simple_praos_convergence TestSetup
(blockForgingPraos numCoreNodes nid)
, mkRekeyM = Nothing
}

flakyTestCopy = "This test may be flaky, and its failure may not be indicative of an actual problem: see https://github.com/IntersectMBO/ouroboros-consensus/issues/1105"
5 changes: 4 additions & 1 deletion ouroboros-consensus/bench/mempool-bench/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import Main.Utf8 (withStdTerminalHandles)
import Ouroboros.Consensus.Ledger.SupportsMempool (ByteSize32)
import qualified Ouroboros.Consensus.Mempool.Capacity as Mempool
import System.Exit (die, exitFailure)
import System.IO (hPutStrLn, stderr)
import qualified Test.Consensus.Mempool.Mocked as Mocked
import Test.Consensus.Mempool.Mocked (MockedMempool)
import Test.Tasty (withResource)
Expand All @@ -46,7 +47,9 @@ main = withStdTerminalHandles $ do
Nothing -> exitFailure
Just runIngredient -> do
success <- runIngredient
unless success exitFailure
unless success $ do
hPutStrLn stderr "This benchmark is flaky in GitHub Actions due to CI runner load, which can it to run significantly slower than expected. It may be useful to try to re-run the job if it fails. See https://github.com/IntersectMBO/ouroboros-consensus/issues/313"
exitFailure
where
benchmarkJustAddingTransactions =
bgroup "Just adding" $
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ instance Arbitrary TestDelayIO where
-- test gives us a more useful property. Also see issue #3894.
prop_delayNextSlot :: TestDelayIO -> Property
prop_delayNextSlot TestDelayIO{..} =
ioProperty test
counterexample flakyTestCopy $ ioProperty test
where
test :: IO Property
test = do
Expand Down Expand Up @@ -261,6 +261,7 @@ instance Arbitrary Schedule where

prop_delayClockShift :: Schedule -> Property
prop_delayClockShift schedule =
counterexample flakyTestCopy $
tabulate "schedule length" [show $ range (length (getSchedule schedule))] $
tabulate "schedule goes back" [show $ scheduleGoesBack schedule] $
tabulate "schedule skips" [show $ range (scheduleCountSkips schedule)] $
Expand Down Expand Up @@ -312,15 +313,16 @@ prop_delayClockShift schedule =
-- | Just as a sanity check, verify that this works in IO
prop_delayNoClockShift :: Property
prop_delayNoClockShift =
ioProperty $ do
now <- getCurrentTime
slots <- originalDelay $
testOverrideDelay
(SystemStart now)
(slotLengthFromMillisec 100)
(secondsToNominalDiffTime 20)
5
pure $ slots === [SlotNo n | n <- [0..4]]
counterexample flakyTestCopy $
ioProperty $ do
now <- getCurrentTime
slots <- originalDelay $
testOverrideDelay
(SystemStart now)
(slotLengthFromMillisec 100)
(secondsToNominalDiffTime 20)
5
pure $ slots === [SlotNo n | n <- [0..4]]

-- | Note that that under load, the returned list could be missing certain slots
-- or contain more slots than requested. This means that tests using this
Expand Down Expand Up @@ -624,3 +626,6 @@ overrideDelay start schedule ma = runSim $ do

originalDelay :: OverrideDelay IO a -> IO a
originalDelay ma = runReaderT (unOverrideDelay ma) (error "schedule unused")

flakyTestCopy :: String
flakyTestCopy = "This test may be flaky, and its failure may not be indicative of an actual problem: see https://github.com/IntersectMBO/ouroboros-consensus/issues/567"
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ instance QC.Arbitrary SomeTestAdversarialMutation where
-- TODO this did fail after >500,000 tests. Is that amount of flakiness acceptable?
prop_adversarialChainMutation :: SomeTestAdversarialMutation -> QCGen -> QC.Property
prop_adversarialChainMutation (SomeTestAdversarialMutation Proxy Proxy testAdversarialMut) testSeedAsSeed0 =
TT.counterexample flakyTestCopy $
QC.ioProperty $ do
A.SomeCheckedAdversarialRecipe Proxy recipeA' <- pure someRecipeA'

Expand Down Expand Up @@ -507,6 +508,8 @@ prop_adversarialChainMutation (SomeTestAdversarialMutation Proxy Proxy testAdver
A.BadDensity{} -> pure $ QC.property ()
A.BadRace{} -> pure $ QC.property ()

flakyTestCopy = "This test may be flaky, and its failure may not be indicative of an actual problem: see https://github.com/IntersectMBO/ouroboros-consensus/issues/1442"

-----

-- | A nice rendering for failures of 'prop_adversarialChainMutation'
Expand Down
Loading