diff --git a/ouroboros-consensus-cardano/app/DBAnalyser/Parsers.hs b/ouroboros-consensus-cardano/app/DBAnalyser/Parsers.hs index bf0f73b3f6..011112596c 100644 --- a/ouroboros-consensus-cardano/app/DBAnalyser/Parsers.hs +++ b/ouroboros-consensus-cardano/app/DBAnalyser/Parsers.hs @@ -120,6 +120,10 @@ parseAnalysis = asum [ ] , benchmarkLedgerOpsParser , getBlockApplicationMetrics + , flag' DumpStakeDistributions $ mconcat [ + long "dump-stake-distributions" + , help "Show the stake distribution for each epoch of some processed block" + ] , pure OnlyValidation ] diff --git a/ouroboros-consensus-cardano/changelog.d/20250312_142505_nick.frisby_stake_drift_tool.md b/ouroboros-consensus-cardano/changelog.d/20250312_142505_nick.frisby_stake_drift_tool.md new file mode 100644 index 0000000000..facce97d44 --- /dev/null +++ b/ouroboros-consensus-cardano/changelog.d/20250312_142505_nick.frisby_stake_drift_tool.md @@ -0,0 +1,24 @@ + + + + +### Non-Breaking + +- Added the --dump-stake-distributions pass to `db-analyser` + + + diff --git a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Analysis.hs b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Analysis.hs index 6dfd65d6b2..5f987c1c2c 100644 --- a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Analysis.hs +++ b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Analysis.hs @@ -25,6 +25,8 @@ module Cardano.Tools.DBAnalyser.Analysis ( , runAnalysis ) where +import Cardano.Ledger.Crypto (StandardCrypto) +import qualified Cardano.Ledger.PoolDistr as SL import qualified Cardano.Slotting.Slot as Slotting import qualified Cardano.Tools.DBAnalyser.Analysis.BenchmarkLedgerOps.FileWriting as F import qualified Cardano.Tools.DBAnalyser.Analysis.BenchmarkLedgerOps.SlotDataPoint as DP @@ -115,6 +117,7 @@ runAnalysis analysisName = case go analysisName of go (ReproMempoolAndForge nBks) = mkAnalysis $ reproMempoolForge nBks go (BenchmarkLedgerOps mOutfile lgrAppMode) = mkAnalysis $ benchmarkLedgerOps mOutfile lgrAppMode go (GetBlockApplicationMetrics nrBlocks mOutfile) = mkAnalysis $ getBlockApplicationMetrics nrBlocks mOutfile + go DumpStakeDistributions = mkAnalysis $ dumpStakeDistributions mkAnalysis :: forall startFrom. SingI startFrom @@ -218,6 +221,7 @@ data TraceEvent blk = -- * monotonic time to call 'Mempool.getSnapshotFor' -- * total time spent in the mutator when calling 'Mempool.getSnapshotFor' -- * total time spent in gc when calling 'Mempool.getSnapshotFor' + | DumpStakeDistribution EpochNo (SL.PoolDistr StandardCrypto) instance (HasAnalysis blk, LedgerSupportsProtocol blk) => Show (TraceEvent blk) where show (StartedEvent analysisName) = "Started " <> (show analysisName) @@ -271,7 +275,14 @@ instance (HasAnalysis blk, LedgerSupportsProtocol blk) => Show (TraceEvent blk) , "mutSnap " <> show mutSnap , "gcSnap " <> show gcSnap ] - + show (DumpStakeDistribution eno pd) = + intercalate "\t" + $ (\ss -> show eno : show (SL.pdTotalActiveStake pd) : show (Map.size mp) : ss) + $ [ show (keyhash, SL.individualTotalPoolStake x, SL.individualPoolStake x) + | (keyhash, x) <- Map.assocs mp + ] + where + mp = SL.unPoolDistr pd {------------------------------------------------------------------------------- Analysis: show block and slot number and hash for all blocks @@ -863,6 +874,40 @@ reproMempoolForge numBlks env = do -- this flushes blk from the mempool, since every tx in it is now on the chain void $ Mempool.syncWithLedger mempool +{------------------------------------------------------------------------------- + Analysis: print out the stake distibution for each epoch +-------------------------------------------------------------------------------} + +dumpStakeDistributions :: + forall blk. + ( HasAnalysis blk, + LedgerSupportsProtocol blk + ) => + Analysis blk StartFromLedgerState +dumpStakeDistributions env = do + void $ processAll db registry GetBlock startFrom limit (initLedger, Nothing) process + pure Nothing + where + AnalysisEnv {db, cfg, limit, registry, startFrom, tracer} = env + + FromLedgerState initLedger = startFrom + + process + :: (ExtLedgerState blk, Maybe EpochNo) + -> blk + -> IO (ExtLedgerState blk, Maybe EpochNo) + process (oldLedger, mbEpoch) blk = do + let lcfg = ExtLedgerCfg cfg + newLedger = tickThenReapply lcfg blk oldLedger + lst = ledgerState newLedger + + (,) newLedger <$> case HasAnalysis.epochPoolDistr lst of + Just (epoch, pd) + | mbEpoch /= Just epoch -> + Just epoch <$ traceWith tracer (DumpStakeDistribution epoch pd) + + _ -> pure mbEpoch + {------------------------------------------------------------------------------- Auxiliary: processing all blocks in the DB -------------------------------------------------------------------------------} diff --git a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Byron.hs b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Byron.hs index f5409511f2..abd961e444 100644 --- a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Byron.hs +++ b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Byron.hs @@ -41,6 +41,8 @@ instance HasAnalysis ByronBlock where -- metrics for the Byron era only. blockApplicationMetrics = [] + epochPoolDistr _lst = Nothing + instance HasProtocolInfo ByronBlock where data Args ByronBlock = ByronBlockArgs { diff --git a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Cardano.hs b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Cardano.hs index 341f0bcc08..1ce3846692 100644 --- a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Cardano.hs +++ b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Cardano.hs @@ -91,6 +91,16 @@ analyseBlock f = p :: Proxy HasAnalysis p = Proxy +analyseLedgerState :: + (forall blk. HasAnalysis blk => LedgerState blk -> a) + -> LedgerState (CardanoBlock StandardCrypto) -> a +analyseLedgerState f = + hcollapse + . hcmap (Proxy @HasAnalysis) (K . f . currentState) + . Telescope.tip + . getHardForkState + . hardForkLedgerStatePerEra + -- | Lift a function polymorphic over all block types supporting `HasAnalysis` -- into a corresponding function over `CardanoBlock.` analyseWithLedgerState :: @@ -299,6 +309,8 @@ instance (HasAnnTip (CardanoBlock StandardCrypto), GetPrevHash (CardanoBlock Sta ) ] + epochPoolDistr = analyseLedgerState epochPoolDistr + dispatch :: LedgerState (CardanoBlock StandardCrypto) -> (LedgerState ByronBlock -> IO Builder) diff --git a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Shelley.hs b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Shelley.hs index 0d90d64eb2..d5589ee135 100644 --- a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Shelley.hs +++ b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Block/Shelley.hs @@ -5,6 +5,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} {-# OPTIONS_GHC -Wno-orphans #-} @@ -49,11 +50,13 @@ import qualified Ouroboros.Consensus.Shelley.Ledger.Block as Shelley import Ouroboros.Consensus.Shelley.Node (Nonce (..), ProtocolParamsShelleyBased (..), ShelleyGenesis, protocolInfoShelley) +import Ouroboros.Consensus.Shelley.Protocol.Abstract (ProtoCrypto) import Text.Builder (decimal) -- | Usable for each Shelley-based era instance ( ShelleyCompatible proto era , PerEraAnalysis era + , ProtoCrypto proto ~ StandardCrypto ) => HasAnalysis (ShelleyBlock proto era) where countTxOutputs blk = case Shelley.shelleyBlockRaw blk of @@ -103,6 +106,13 @@ instance ( ShelleyCompatible proto era -- metrics for Shelley-only eras. blockApplicationMetrics = [] + epochPoolDistr lst = + Just (SL.nesEL nes, SL.nesPd nes) + where + nes = shelleyLedgerState lst + +----- + class PerEraAnalysis era where txExUnitsSteps :: Maybe (Core.Tx era -> Word64) diff --git a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/HasAnalysis.hs b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/HasAnalysis.hs index d16395d5ac..bdacfa6798 100644 --- a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/HasAnalysis.hs +++ b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/HasAnalysis.hs @@ -8,6 +8,8 @@ module Cardano.Tools.DBAnalyser.HasAnalysis ( , WithLedgerState (..) ) where +import Cardano.Ledger.Crypto (StandardCrypto) +import Cardano.Ledger.Shelley.API (PoolDistr) import Data.Map.Strict (Map) import Ouroboros.Consensus.Block import Ouroboros.Consensus.HeaderValidation (HasAnnTip (..)) @@ -58,6 +60,16 @@ class (HasAnnTip blk, GetPrevHash blk, Condense (HeaderHash blk)) => HasAnalysis -- the IO monad. blockApplicationMetrics :: [(Builder, WithLedgerState blk -> IO Builder)] + -- | The epoch number of the block's slot, and the stake distribution used + -- for the leader schedule of that epoch + -- + -- This pool distribution should match 'protocolLedgerView', for example. + -- + -- It should return 'Nothing' if and only if the block is in the Byron era. + epochPoolDistr :: + LedgerState blk + -> Maybe (EpochNo, PoolDistr StandardCrypto) + class HasProtocolInfo blk where data Args blk mkProtocolInfo :: Args blk -> IO (ProtocolInfo blk) diff --git a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Types.hs b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Types.hs index ddea8b5347..c46be27ad5 100644 --- a/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Types.hs +++ b/ouroboros-consensus-cardano/src/unstable-cardano-tools/Cardano/Tools/DBAnalyser/Types.hs @@ -38,6 +38,7 @@ data AnalysisName = -- The metrics will be written to the provided file path, or to -- the standard output if no file path is specified. | GetBlockApplicationMetrics NumberOfBlocks (Maybe FilePath) + | DumpStakeDistributions deriving Show data AnalysisResult = diff --git a/scripts/genesis-stake-drift-analysis/README.md b/scripts/genesis-stake-drift-analysis/README.md new file mode 100644 index 0000000000..e0b4b4d142 --- /dev/null +++ b/scripts/genesis-stake-drift-analysis/README.md @@ -0,0 +1,7 @@ +The `scrutinize-stake-drift.sh` bash script postprocesses the output of the `db-analyser --dump-stake-distributions` pass. + +It yields several temporary files in the local directory, so run it in a temporary folder. + +The script prints out a table that indicate how much stake the pools that have been in the top 90% of every epoch in the data had in each epoch. + +The script also prints out a counterfactual table that pretends each of those pools' least-stake epochs were coincident, where that minimum iterates over every suffix of the list of epochs. diff --git a/scripts/genesis-stake-drift-analysis/scrutinize-stake-drift.sh b/scripts/genesis-stake-drift-analysis/scrutinize-stake-drift.sh new file mode 100644 index 0000000000..79ae4741a5 --- /dev/null +++ b/scripts/genesis-stake-drift-analysis/scrutinize-stake-drift.sh @@ -0,0 +1,48 @@ +# For example: +# +# $ db-analyser ... --dump-stake-distributions >foo.txt +# $ bash scrutinize-stake-drift.sh foo.txt + +db_analyser_output_file=$1 + +echo "# the PoolDistrs in tabular form >tidied.txt" +cat "${db_analyser_output_file}" | tr '(,){=}"' ' ' | sed 's/KeyHash/\n/g' | awk -f tidy.awk >tidied.txt + +firstEpoch=$(head -n1 tidied.txt | awk '{print $1}') +lastEpoch=$(tail -n1 tidied.txt | awk '{print $1}') +nepochs=$(expr $lastEpoch - $firstEpoch + 1) + +echo "# discard pools outside of the 90% in each epoch >big.txt" +cat tidied.txt | sort -k1,1n -k5,5gr | awk '(eno != $1) { eno = $1; acc = 0 } (acc < 0.9) { acc = acc + $5; print $0 }' >big.txt +# cp tidied.txt big.txt # uncomment this command to use pools that were in all epochs regardless of their relative stake + +echo "# for how many epochs was each pool in the top 90% >epochs.txt" +cat big.txt | awk '{print $4}' | sort | uniq -c >epochs.txt + +echo "# histogram of epochs.txt" +cat epochs.txt | awk '{print $1}' | sort -n | uniq -c + +echo "# big.txt sorted by pool and then by epoch >sorted.txt" +cat big.txt | sort -k4,4 -k1,1n >sorted.txt + +echo "# restrict sorted.txt to the pools that are in all $nepochs epochs >steady.txt" +join -1 2 -2 4 <(grep -w -e $nepochs epochs.txt) sorted.txt >steady.txt + +echo "# wc -l" +wc -l tidied.txt epochs.txt sorted.txt steady.txt + +echo "# head -n5" +head -n5 tidied.txt epochs.txt sorted.txt steady.txt + +echo "# cumulative stake per epoch within steady.txt" +cat steady.txt | awk '{x[$3] = x[$3] + $6} END { acc = 1/0; for (k in x) { if (acc > x[k]) { kacc = k; acc = x[k] }; print k, x[k] }; print " Min is ", kacc, acc }' | sort -n + +echo "# the statistical distance between each epoch and epoch $lastEpoch" +echo "# " +echo "# see https://en.wikipedia.org/wiki/Statistical_distance#Statistically_close" +cat steady.txt | awk -v eno=$lastEpoch '(eno == $3) { print $0 }' >tmpfile-lastEpoch +for i in $(seq $firstEpoch $lastEpoch); do + cat steady.txt | awk -v eno=$i '(eno == $3) { print $0 }' >tmpfile-$i + + paste tmpfile-lastEpoch tmpfile-$i | awk -v eno=$i '($6 > $12) { x = x + ($6 - $12) } ($6 < $12) { x = x + ($12 - $6) } END { printf("%i %.3f\n", eno, (x / 2)) }' +done diff --git a/scripts/genesis-stake-drift-analysis/tidy.awk b/scripts/genesis-stake-drift-analysis/tidy.awk new file mode 100644 index 0000000000..aa821b31ac --- /dev/null +++ b/scripts/genesis-stake-drift-analysis/tidy.awk @@ -0,0 +1,10 @@ +/EpochNo/ { + eno = $3; + n = $7; + i = 0; +} + +(/%/) { + print eno, i, n, $1, $5 / $7; + i++; +}