input-output-hk · ladamesny · Jan 14, 2026 · Jan 8, 2026 · Jan 14, 2026
diff --git a/.gitignore b/.gitignore
@@ -47,4 +47,7 @@ dev/local-environment-dynamic/configurations/partner-chains-nodes/*
 ogmios_client.log
 
 # e2e tests (Python)
-venv
+venv
+
+# Mempool benchmark results
+e2e-tests/utils/mempool_benchmarks/results/
diff --git a/e2e-tests/utils/block_size_benchmarks/analyzer.py b/e2e-tests/utils/block_size_benchmarks/analyzer.py
@@ -30,6 +30,7 @@ def __init__(self, nodes: List[str]):
             raise ValueError("At least one node must be specified")
         self.all_nodes = [node.lower() for node in nodes]
         self.blocks: List[Block] = []
+        self.active_nodes: List[str] = []  # Will be populated after parsing
 
     def parse_file(self, filename: str) -> None:
         try:
@@ -60,6 +61,9 @@ def _parse_content(self, content: str) -> None:
                     current_block.add_import(node, delay)
             elif 'Creator unknown' in line and current_block:
                 current_block.creator = 'unknown'
+
+        # Detect which nodes are actually active
+        self._detect_active_nodes()
 
     def _parse_block_header(self, line: str) -> Optional[Block]:
         block_match = re.search(r'Block #(\d+)', line)
@@ -87,10 +91,29 @@ def _parse_import(self, line: str) -> Tuple[Optional[str], float]:
             delay = float(delay_str) if delay_str else 0.0
             return node, delay
         return None, 0.0
+
+    def _detect_active_nodes(self) -> None:
+        """Detect which nodes are actually active based on parsed data."""
+        active_set = set()
+        for block in self.blocks:
+            if block.creator and block.creator != 'unknown':
+                active_set.add(block.creator)
+            active_set.update(block.imports.keys())
+
+        # Keep only nodes from all_nodes that are actually active
+        self.active_nodes = [node for node in self.all_nodes if node in active_set]
+
+        if not self.active_nodes:
+            self.active_nodes = self.all_nodes
+
+        inactive_nodes = set(self.all_nodes) - active_set
+        if inactive_nodes:
+            print(f"Note: The following nodes appear to be offline: {', '.join(sorted(inactive_nodes))}")
 
     def get_complete_blocks(self) -> List[Block]:
+        """Get blocks that have data from all active nodes."""
         return [block for block in self.blocks
-                if block.is_complete(self.all_nodes)]
+                if block.is_complete(self.active_nodes)]
 
     def _format_table_row(self, values: List[str], widths: List[int]) -> str:
         formatted_values = []
@@ -107,7 +130,7 @@ def generate_summary_statistics(self, complete_blocks: List[Block]) -> str:
         lines.append("")
 
         stats = {}
-        for node in self.all_nodes:
+        for node in self.active_nodes:
             blocks_created = len([block for block in complete_blocks if block.creator == node])
 
             import_times = [
@@ -131,7 +154,7 @@ def generate_summary_statistics(self, complete_blocks: List[Block]) -> str:
         lines.append(header)
         lines.append(separator)
 
-        for node in self.all_nodes:
+        for node in self.active_nodes:
             s = stats[node]
             row = (f"| {node.capitalize():<7} | {s['blocks_created']:<14} | "
                    f"{s['blocks_imported']:<15} | {s['min_import']:<15.0f} | "
@@ -146,16 +169,17 @@ def run(self, input_filename: str, output_filename: str) -> None:
         print(f"Parsing file: {input_filename}")
         self.parse_file(input_filename)
         print(f"Total blocks parsed: {len(self.blocks)}")
+        print(f"Active nodes detected: {', '.join(self.active_nodes)}")
         complete_blocks = self.get_complete_blocks()
-        print(f"Complete blocks: {len(complete_blocks)}")
+        print(f"Complete blocks (with all active nodes): {len(complete_blocks)}")
         if not complete_blocks:
             print("No complete blocks found. Exiting.")
             sys.exit(1)
         stats_table = self.generate_summary_statistics(complete_blocks)
         try:
             with open(output_filename, 'w', encoding='utf-8') as file:
                 file.write("# Block Propagation Analysis\n\n")
-                nodes = ', '.join(node.capitalize() for node in self.all_nodes)
+                nodes = ', '.join(node.capitalize() for node in self.active_nodes)
                 file.write(f"**Nodes analyzed:** {nodes}")
                 file.write("\n\n")
                 file.write(stats_table)

diff --git a/e2e-tests/utils/mempool_benchmarks/README.md b/e2e-tests/utils/mempool_benchmarks/README.md
@@ -0,0 +1,247 @@
+# Mempool Benchmarking Scripts
+
+Scripts to extract and analyze mempool metrics from midnight-node logs, tracking transaction pool state over time.
+
+## Overview
+
+These scripts parse midnight-node logs to extract mempool metrics from **Ferdie's node** (the only node with detailed txpool logging enabled):
+
+### Core Metrics
+- **Ready transactions** - Valid and executable now
+- **Future transactions** - Valid but waiting for dependencies (e.g., nonce)
+- **Transaction count** - Total in mempool (ready + future)
+
+### Performance Metrics
+- **Validations scheduled/finished** - validated_count, revalidated
+- **Submitted transactions** - submitted_count  
+- **Pruned transactions** - Removed from finalized blocks
+- **Reverified transactions** - Resubmitted after reorg
+
+## Metrics Explained
+
+### Core Metrics
+
+**Ready Transactions**
+- Valid transactions that can be executed immediately
+- Have all dependencies satisfied (correct nonce, sufficient balance, etc.)
+- Eligible for inclusion in the next block
+
+**Future Transactions**  
+- Valid transactions waiting for dependencies
+- Typically waiting for earlier nonces or other prerequisites
+- Will become "ready" once dependencies are satisfied
+
+**Mempool Length (`mempool_len`)**
+- Total number of transaction objects currently tracked
+- May differ from ready + future due to internal pool management
+
+### Activity Metrics
+
+**Submitted Count (`submitted_count`)**
+- Number of new transactions submitted to the pool in this event
+- Tracks transaction admission rate
+
+**Validated Count (`validated_count`)**
+- Number of transactions validated in this event
+- Transactions checked for correctness (signature, nonce, balance, etc.)
+
+**Revalidated Count (`revalidated`)**
+- Number of transactions re-validated after chain updates
+- Happens when new blocks arrive and pool needs to refresh validity
+
+**Pruned Count (`pruned`)**
+- Number of transactions removed because they were included in finalized blocks
+- Indicates successful transaction execution on-chain
+
+**Reverified Count (`reverified`)**
+- Number of transactions resubmitted after chain reorganization
+- Happens when a fork is resolved and some transactions need to be re-added
+
+## Log Events Tracked
+
+The scripts extract these metrics from transaction pool events:
+
+1. **`maintain` event** (INFO level):
+   ```
+   2026-01-07 12:32:55.905 INFO txpool maintain txs=(5, 2) ...
+   ```
+   - `txs=(ready, future)` - Current pool state
+
+2. **`update_view_with_mempool` event** (DEBUG level):
+   ```
+   2026-01-07 12:32:54.150 DEBUG txpool update_view_with_mempool submitted_count=1 mempool_len=7
+   ```
+   - Tracks submissions and total pool size
+
+3. **`xts_count` event** (DEBUG level):
+   ```
+   2026-01-07 12:32:54.150 DEBUG txpool update_view_with_mempool xts_count=(5, 2)
+   ```
+   - Alternative source for ready/future counts
+
+4. **`purge_finalized_transactions` event**:
+   ```
+   purge_finalized_transactions count=2
+   ```
+   - Tracks transactions successfully included in blocks
+
+5. **`reverified_transactions` event**:
+   ```
+   reverified_transactions=1
+   ```
+   - Tracks transactions resubmitted after reorgs
+
+## How to Use
+
+### Prerequisites
+
+1. Install `python3` and `pip`
+2. Install pandas: `pip install pandas matplotlib`
+
+## Quick Start (Recommended)
+
+Use the automated runner script to download logs and generate analysis in one command:
+
+```bash
+# With config file containing Grafana credentials
+python3 run_mempool_benchmark.py \
+  --config ../../secrets/substrate/performance/performance.json \
+  --from-time "2026-01-08T10:00:00Z" \
+  --to-time "2026-01-08T10:10:00Z" \
+  --window 1000 \
+  --output-dir ./results
+```
+
+This will:
+1. Download Ferdie's logs for the specified time range
+2. Extract mempool metrics
+3. Generate analysis with 1-second time windows
+4. Save all outputs to the `results/` directory
+
+### Manual Steps (Alternative)
+
+1. **Download logs from Grafana/Loki** (optional)
+
+   Use the download_logs.py script to fetch Ferdie's logs:
+   ```bash
+   python3 download_logs.py --config path/to/config.json \
+     --node ferdie \
+     --from-time "2026-01-07T10:00:00Z" \
+     --to-time "2026-01-07T11:00:00Z" \
+     --output-dir logs
+   ```
+
+   Or manually put Ferdie's logs in `ferdie.txt`
+
+2. **Transform raw Grafana logs** (if needed)
+
+   If you downloaded raw Grafana logs with host labels, transform them:
+   ```bash
+   python3 transformer.py
+   ```
+
+3. **Extract mempool data from logs**
+
+   By default, processes Ferdie's logs:
+   ```bash
+   python3 extractor.py
+   ```
+
+   Or specify nodes explicitly:
+   ```bash
+   python3 extractor.py ferdie
+   ```
+
+   This generates `mempool_report.txt` with time-series data.
+
+4. **Generate statistics and graphs**
+
+   ```bash
+   python3 analyzer.py mempool_report.txt analysis.txt [window_ms]
+   ```
+
+   Example with 1-second timeframe:
+   ```bash
+   python3 analyzer.py mempool_report.txt analysis.txt 1000
+   ```
+
+   Optional timeframe parameter (in milliseconds):
+   - `100` for 100ms windows
+   - `1000` for 1-second windows (default)
+   - `5000` for 5-second windows
+
+## Script Reference
+
+### run_mempool_benchmark.py
+
+Automated wrapper that runs all steps. Options:
+
+```bash
+python3 run_mempool_benchmark.py \
+  --config <path>              # Config file with Grafana creds (optional)
+  --url <url>                  # Loki URL (optional, overrides config)
+  --header "Key: Value"        # Custom header (optional, repeatable)
+  --from-time <iso8601>        # Start time (required)
+  --to-time <iso8601>          # End time (required)
+  --window <ms>                # Analysis window in ms (default: 1000)
+  --output-dir <path>          # Output directory (default: .)
+  --skip-download              # Skip download, use existing logs
+  --skip-extract               # Skip extraction, use existing report
+```
+
+### extractor.py
+
+Extracts metrics from log files. By default processes `ferdie.txt`:
+
+```bash
+python3 extractor.py [node1 node2 ...]
+```
+
+### analyzer.py
+
+Analyzes extracted metrics with configurable time windows:
+
+```bash
+python3 analyzer.py <input_report> <output_analysis> [window_ms]
+```
+
+## Output
+
+### mempool_report.txt
+Time-series data showing mempool state at each logged event with columns:
+- Timestamp
+- Node name  
+- Ready transaction count
+- Future transaction count
+- Mempool length
+- Submitted count
+- Validated count
+- Revalidated count
+- Pruned count
+- Reverified count
+
+### analysis.txt
+Statistical summary including:
+- Average ready/future transaction counts
+- Peak transaction counts
+- Admission rates (TPS) over different time windows
+- Total validated/revalidated transactions
+- Total pruned transactions
+- Total reverified (resubmitted) transactions
+
+## Example Output
+
+```
+=== MEMPOOL STATISTICS BY NODE ===
+
+Node: ferdie
+  Average Ready Txs: 12.5
+  Average Future Txs: 3.2
+  Peak Ready Txs: 45
+  Peak Future Txs: 15
+  Avg Admission Rate: 8.3 TPS
+  Total Validated: 1523
+  Total Revalidated: 342
+  Total Pruned: 1489
+  Total Reverified: 28
+```