diff --git a/.env.example b/.env.example index 9b00c612d..d85beafdd 100644 --- a/.env.example +++ b/.env.example @@ -40,6 +40,16 @@ NETHERMIND_IMAGE=nethermind/nethermind-arbitrum:0.0.1-alpha # Nitro consensus client image NITRO_IMAGE=offchainlabs/nitro-node:v3.10.0-rc.3-26d4dc2 +# ============================================================================= +# NODE-RUNNER (native process tooling) +# ============================================================================= + +# Path to local Nitro repository (used by tools/node-runner) +# NITRO_PATH=/path/to/arbitrum-nitro + +# Path to local Nethermind repository (used by tools/node-runner) +# NETHERMIND_PATH=/path/to/nethermind-arbitrum + # ============================================================================= # OPTIONAL # ============================================================================= diff --git a/tools/node-runner/.gitignore b/tools/node-runner/.gitignore new file mode 100644 index 000000000..220b15e38 --- /dev/null +++ b/tools/node-runner/.gitignore @@ -0,0 +1,3 @@ +# Node-runner local data +.data/ +.omc/ diff --git a/tools/node-runner/README.md b/tools/node-runner/README.md new file mode 100644 index 000000000..1d3824f3e --- /dev/null +++ b/tools/node-runner/README.md @@ -0,0 +1,209 @@ +# node-runner + +TUI tool for running Arbitrum Nitro + Nethermind nodes in multiple modes. Orchestrates process startup, streams logs in real time, and handles graceful shutdown — all from a single terminal. + +``` +┌─ COMPARISON | Sepolia | nitro-el:healthy | nethermind:healthy | nitro-cl:healthy ─┐ +│ │ +│ ┌── nitro-el ── healthy ──────────┐ ┌── nethermind ── healthy ────────────────┐ │ +│ │ INFO Block 12345 validated │ │ INFO Processed block 12345 │ │ +│ │ INFO State root: 0xabc... │ │ INFO State root: 0xabc... │ │ +│ │ │ │ │ │ +│ └─────────────────────────────────┘ └─────────────────────────────────────────┘ │ +│ ┌── nitro-cl ── healthy ──────────────────────────────────────────────────────┐ │ +│ │ INFO Feeding block 12346 to execution engines │ │ +│ └────────────────────────────────────────────────────────────────────────────┘ │ +│ │ +│ q Quit x Stop nodes s Split view c Combined view f Errors only y Copy log │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## Modes + +| Mode | Processes | Purpose | +|------|-----------|---------| +| `comparison` | Nitro CL + Nitro EL + Nethermind EL | State root comparison between ELs | +| `nitro-nm` | Nitro CL + Nethermind EL | Production-like Nethermind setup | +| `nitro-nitro` | Nitro CL + Nitro EL | Reference baseline | + +All modes use Nitro as the consensus layer (CL). The difference is which execution layer (EL) backends are started. + +## Prerequisites + +- **Python 3.12+** +- **uv** (recommended) or pip +- **Nitro** built locally (`make build` in the Nitro repo) +- **Nethermind** built locally (`make build` in the Nethermind repo) — required for `comparison` and `nitro-nm` modes +- A shared **JWT secret** at `~/.arbitrum/jwt.hex` + +## Installation + +```bash +cd tools/node-runner +uv sync # installs dependencies into .venv +``` + +Or with pip: + +```bash +pip install -e ".[dev]" +``` + +## Quick Start + +Set repository paths (or pass them as flags): + +```bash +export NITRO_PATH=/path/to/arbitrum-nitro +export NETHERMIND_PATH=/path/to/nethermind-arbitrum +``` + +A `.env` file in any parent directory is also picked up automatically. + +Run in comparison mode on Sepolia: + +```bash +uv run node-runner comparison --network sepolia +``` + +Run Nethermind as the sole EL: + +```bash +uv run node-runner nitro-nm --network mainnet +``` + +## Usage + +``` +node-runner [options] +``` + +### Options + +| Flag | Description | Default | +|------|-------------|---------| +| `--network {sepolia,mainnet}` | Arbitrum network | `sepolia` | +| `--log-level {error,warn,info,debug,trace}` | Log verbosity for all processes | `info` | +| `--clean` | Wipe databases before starting | off | +| `--init` | Force Nitro EL re-initialization (genesis download) | auto-detected | +| `--verification [N]` | Enable Nethermind `VerifyBlockHash` (`nitro-nm` only). Optionally verify every N blocks. | off | +| `--nitro-path DIR` | Path to Nitro repo | `$NITRO_PATH` | +| `--nethermind-path DIR` | Path to Nethermind repo | `$NETHERMIND_PATH` | +| `--data-dir DIR` | Base data directory | `.data` | +| `--l1-rpc URL` | Override L1 RPC endpoint | per-network default | +| `--l1-beacon URL` | Override L1 Beacon endpoint | per-network default | + +### Verification Mode + +The `--verification` flag enables Nethermind's `VerifyBlockHash` feature, which cross-checks computed block hashes against a public Arbitrum RPC endpoint. This runs entirely within Nethermind — no additional processes are started. + +```bash +# Enable with Nethermind's default interval (every 10,000 blocks) +uv run node-runner nitro-nm --verification + +# Verify every 100 blocks +uv run node-runner nitro-nm --verification 100 +``` + +### Examples + +```bash +# Comparison mode, Sepolia, debug logging +uv run node-runner comparison --network sepolia --log-level debug + +# Nethermind-only with verification, mainnet +uv run node-runner nitro-nm --network mainnet --verification + +# Reference baseline, clean start +uv run node-runner nitro-nitro --clean --init + +# Custom L1 RPC +uv run node-runner comparison --l1-rpc wss://my-rpc.example.com +``` + +## TUI Keybindings + +| Key | Action | +|-----|--------| +| `q` | Quit (stops all processes, writes reports, exits) | +| `x` | Stop nodes but keep TUI open for log review | +| `s` | Split view (one pane per process) | +| `c` | Combined view (interleaved logs from all processes) | +| `f` | Toggle errors-only filter | +| `y` | Copy log content to system clipboard | +| `1`/`2`/`3` | Focus a specific pane | +| `Shift+click` | Native text selection (bypasses TUI mouse capture) | + +## Architecture + +``` +node_runner/ +├── cli.py # Argument parsing, config construction +├── config.py # Networks, modes, ports, RunnerConfig model +├── orchestrator.py # Process lifecycle: init → start → monitor → stop +├── crash_report.py # Shutdown reports (crash dump + error-only logs) +├── models.py # ProcessState, LogEntry, ring buffers +├── exceptions.py # Typed errors (StartupError, HealthCheckError, etc.) +├── processes/ +│ ├── base.py # BaseProcess: start, stop, health check, log parsing +│ ├── init_el.py # One-shot Nitro EL genesis initialization +│ ├── nethermind.py # Nethermind EL (dotnet nethermind.dll) +│ ├── nitro_cl.py # Nitro CL with per-mode EL connection flags +│ └── nitro_el.py # Nitro EL (geth-based) +└── tui/ + ├── app.py # Textual application, keybindings, log streaming + ├── widgets.py # ProcessLogPane, CombinedLogPane + └── styles.tcss # Terminal CSS for layout and colors +``` + +### Startup Sequence + +1. **Clean** (if `--clean`): wipe network data directory +2. **Init** (if needed): run `nitro --init.then-quit` to bootstrap EL genesis from L1 +3. **Start processes** in dependency order — each must pass a TCP health check before the next starts +4. **Stream logs** from per-process ring buffers to TUI panes at 10 Hz +5. **Monitor** for crashes and update status bar + +### Shutdown + +On `q` or `x`, processes are stopped in reverse startup order (CL first, then ELs). Each process gets a SIGTERM with a 10-second grace period before SIGKILL. Crash reports and error logs are written to `.data/crash_reports/` and `.data/errors/`. + +## Data Directory Layout + +``` +.data/ +└── sepolia/ + ├── nitro-el/ # Nitro EL chain data + ├── nethermind/ # Nethermind chain data + └── nitro-cl/ # Nitro CL data +.data/ +├── crash_reports/ # Timestamped shutdown reports +└── errors/ # Error-only log files per process +``` + +## Development + +```bash +# Run tests +uv run pytest + +# Lint +uv run ruff check node_runner/ tests/ + +# Type check +uv run mypy node_runner/ +``` + +## Port Assignments + +Ports are fixed and chosen to avoid conflicts with the `comparison_runner` tool: + +| Component | Port | Protocol | +|-----------|------|----------| +| Nitro EL WS | 20552 | WebSocket | +| Nitro EL HTTP | 8547 | JSON-RPC | +| Nitro EL Auth | 8551 | Engine API | +| Nitro CL WS | 8559 | WebSocket | +| Nitro CL HTTP | 8558 | JSON-RPC | +| Nethermind HTTP | 20545 | JSON-RPC | +| Nethermind Engine | 20551 | Engine API | diff --git a/tools/node-runner/node_runner/__init__.py b/tools/node-runner/node_runner/__init__.py new file mode 100644 index 000000000..6ca00cb39 --- /dev/null +++ b/tools/node-runner/node_runner/__init__.py @@ -0,0 +1,3 @@ +"""node-runner: TUI tool for running Arbitrum Nitro + Nethermind nodes.""" + +__version__ = "0.1.0" diff --git a/tools/node-runner/node_runner/__main__.py b/tools/node-runner/node_runner/__main__.py new file mode 100644 index 000000000..d073140f5 --- /dev/null +++ b/tools/node-runner/node_runner/__main__.py @@ -0,0 +1,23 @@ +"""Entry point for node-runner. + +Usage: + python -m node_runner comparison --network sepolia + python -m node_runner nitro-nm --network mainnet --log-level debug + python -m node_runner nitro-nitro --clean +""" + +from __future__ import annotations + + +def main() -> None: + """Parse arguments, build configuration, and launch the TUI.""" + from .cli import parse_args + from .tui.app import NodeRunnerApp + + config = parse_args() + app = NodeRunnerApp(config) + app.run() + + +if __name__ == "__main__": + main() diff --git a/tools/node-runner/node_runner/cli.py b/tools/node-runner/node_runner/cli.py new file mode 100644 index 000000000..8629320a2 --- /dev/null +++ b/tools/node-runner/node_runner/cli.py @@ -0,0 +1,244 @@ +"""Command-line interface for node-runner.""" + +from __future__ import annotations + +import argparse +import os +import sys +from pathlib import Path + +from dotenv import find_dotenv, load_dotenv + +from .config import LogLevel, Mode, Network, RunnerConfig +from .exceptions import ConfigError + +# Load .env from the nearest parent directory (walks up from cwd) +load_dotenv(find_dotenv(usecwd=True)) + + +def create_parser() -> argparse.ArgumentParser: + """Create the argument parser.""" + parser = argparse.ArgumentParser( + prog="node-runner", + description="Run Arbitrum Nitro + Nethermind nodes with TUI monitoring", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Modes: + comparison Nitro CL + Nitro EL + Nethermind EL (state root comparison) + nitro-nm Nitro CL + Nethermind EL (production-like) + nitro-nitro Nitro CL + Nitro EL (reference baseline) + +Examples: + node-runner comparison --network sepolia + node-runner nitro-nm --network mainnet --log-level debug + node-runner nitro-nitro --clean --init + node-runner comparison --l1-rpc wss://my-rpc.example.com + +Environment Variables: + NITRO_PATH Path to Nitro repository (required if --nitro-path not set) + NETHERMIND_PATH Path to Nethermind repository (required if --nethermind-path not set) +""", + ) + + parser.add_argument( + "mode", + choices=["comparison", "nitro-nm", "nitro-nitro"], + help="Operating mode", + ) + + parser.add_argument( + "--network", + choices=["sepolia", "mainnet"], + default="sepolia", + help="Network to sync (default: sepolia)", + ) + + parser.add_argument( + "--log-level", + choices=["error", "warn", "info", "debug", "trace"], + default="info", + help="Log level for all processes (default: info)", + ) + + parser.add_argument( + "--clean", + action="store_true", + help="Clean databases before starting", + ) + + parser.add_argument( + "--init", + action="store_true", + dest="force_init", + help="Force re-initialization of Nitro EL (download genesis from L1)", + ) + + parser.add_argument( + "--nitro-path", + type=Path, + default=None, + metavar="DIR", + help="Path to Nitro repository (default: $NITRO_PATH)", + ) + + parser.add_argument( + "--nethermind-path", + type=Path, + default=None, + metavar="DIR", + help="Path to Nethermind repository (default: $NETHERMIND_PATH)", + ) + + parser.add_argument( + "--data-dir", + type=Path, + default=Path(".data"), + metavar="DIR", + help="Base data directory (default: .data)", + ) + + parser.add_argument( + "--verification", + nargs="?", + const=0, + default=None, + type=int, + metavar="N", + help="Enable Nethermind VerifyBlockHash; optionally verify every N blocks (nitro-nm only)", + ) + + parser.add_argument( + "--l1-rpc", + default=None, + metavar="URL", + help="Override L1 RPC URL", + ) + + parser.add_argument( + "--l1-beacon", + default=None, + metavar="URL", + help="Override L1 Beacon URL", + ) + + return parser + + +def _resolve_path( + arg_value: Path | None, + env_var: str, + label: str, +) -> Path: + """Resolve a path from CLI arg or environment variable. + + Args: + arg_value: Value from argparse (may be None) + env_var: Environment variable name to fall back to + label: Human-readable label for error messages + + Returns: + Resolved absolute path + + Raises: + ConfigError: If path not provided or doesn't exist + """ + if arg_value is not None: + path = arg_value.expanduser().resolve() + else: + env_path = os.environ.get(env_var) + if not env_path: + raise ConfigError( + f"{label} not specified. Use --{label.lower().replace(' ', '-')} " + f"or set {env_var} environment variable." + ) + path = Path(env_path).expanduser().resolve() + + if not path.is_dir(): + raise ConfigError(f"{label} does not exist: {path}") + + return path + + +def build_config(args: argparse.Namespace) -> RunnerConfig: + """Build RunnerConfig from parsed CLI arguments. + + Raises: + ConfigError: If configuration is invalid + """ + nitro_path = _resolve_path(args.nitro_path, "NITRO_PATH", "Nitro path") + nethermind_path = _resolve_path(args.nethermind_path, "NETHERMIND_PATH", "Nethermind path") + + # Load Nitro's .env for L1 RPC URLs (won't override already-set vars) + nitro_env = nitro_path / ".env" + if nitro_env.is_file(): + load_dotenv(nitro_env, override=False) + + mode = Mode(args.mode) + + if args.verification is not None and mode != Mode.NITRO_NM: + raise ConfigError("--verification is only supported with nitro-nm mode") + + # All modes run Nitro CL, so the nitro binary is always required + nitro_binary = nitro_path / "target" / "bin" / "nitro" + if not nitro_binary.exists(): + raise ConfigError( + f"Nitro binary not found at {nitro_binary}. " + f"Build it with: cd {nitro_path} && make build" + ) + + nethermind_dll = ( + nethermind_path + / "src" + / "Nethermind" + / "src" + / "Nethermind" + / "artifacts" + / "bin" + / "Nethermind.Runner" + / "debug" + / "nethermind.dll" + ) + if mode in (Mode.COMPARISON, Mode.NITRO_NM) and not nethermind_dll.exists(): + raise ConfigError( + f"Nethermind build not found at {nethermind_dll.parent}. " + f"Build it with: cd {nethermind_path} && make build" + ) + + # Resolve L1 URLs: CLI flag > Nitro .env > hardcoded default + network = Network(args.network) + network_prefix = network.value.upper() + l1_rpc = args.l1_rpc or os.environ.get(f"{network_prefix}_L1_RPC") + l1_beacon = args.l1_beacon or os.environ.get(f"{network_prefix}_L1_BEACON") + + return RunnerConfig( + mode=mode, + network=network, + log_level=LogLevel(args.log_level), + clean=args.clean, + force_init=args.force_init, + nitro_path=nitro_path, + nethermind_path=nethermind_path, + data_dir=args.data_dir.resolve(), + verification=args.verification, + l1_rpc_override=l1_rpc, + l1_beacon_override=l1_beacon, + ) + + +def parse_args(argv: list[str] | None = None) -> RunnerConfig: + """Parse CLI arguments and build configuration. + + Args: + argv: Arguments to parse (defaults to sys.argv[1:]) + + Returns: + Validated RunnerConfig + """ + parser = create_parser() + args = parser.parse_args(argv) + + try: + return build_config(args) + except ConfigError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) diff --git a/tools/node-runner/node_runner/config.py b/tools/node-runner/node_runner/config.py new file mode 100644 index 000000000..fd1f78514 --- /dev/null +++ b/tools/node-runner/node_runner/config.py @@ -0,0 +1,192 @@ +"""Configuration models for node-runner. + +Defines networks, modes, and runtime configuration using frozen Pydantic models. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from pathlib import Path +from typing import Final + +from pydantic import BaseModel, ConfigDict, Field + + +class Network(Enum): + """Supported Arbitrum L2 networks.""" + + SEPOLIA = "sepolia" + MAINNET = "mainnet" + + +class Mode(Enum): + """Node runner operating modes.""" + + COMPARISON = "comparison" + NITRO_NM = "nitro-nm" + NITRO_NITRO = "nitro-nitro" + + +class LogLevel(Enum): + """Log levels supported by both Nitro and Nethermind.""" + + ERROR = "error" + WARN = "warn" + INFO = "info" + DEBUG = "debug" + TRACE = "trace" + + +@dataclass(frozen=True) +class NetworkConfig: + """Network-specific constants.""" + + chain_id: int + parent_chain_id: int + l1_rpc: str + l1_beacon: str + nethermind_config_name: str + + +NETWORKS: Final[dict[Network, NetworkConfig]] = { + Network.SEPOLIA: NetworkConfig( + chain_id=421614, + parent_chain_id=11155111, + l1_rpc="wss://sepolia.drpc.org", + l1_beacon="https://ethereum-sepolia-beacon-api.publicnode.com", + nethermind_config_name="arbitrum-sepolia", + ), + Network.MAINNET: NetworkConfig( + chain_id=42161, + parent_chain_id=1, + l1_rpc="wss://ethereum.drpc.org", + l1_beacon="https://ethereum-beacon-api.publicnode.com", + nethermind_config_name="arbitrum-mainnet", + ), +} + + +class PortConfig(BaseModel): + """Port assignments for each component. + + These are fixed and chosen to avoid conflicts with the comparison_runner tool. + """ + + nitro_el_ws: int = 20552 + nitro_el_http: int = 8547 + nitro_el_auth: int = 8551 + nitro_cl_ws: int = 8559 + nitro_cl_http: int = 8558 + nethermind_http: int = 20545 + nethermind_engine: int = 20551 + + model_config = ConfigDict(frozen=True) + + +DEFAULT_JWT_PATH: Final[Path] = Path.home() / ".arbitrum" / "jwt.hex" + +LOG_BUFFER_SIZE: Final[int] = 10_000 +"""Maximum log lines kept in memory per process.""" + +ERROR_BUFFER_SIZE: Final[int] = 1_000 +"""Maximum error/warning lines kept per process.""" + +CRASH_REPORT_LINES: Final[int] = 1_000 +"""Lines to save per process in crash report.""" + +HEALTH_CHECK_INTERVAL_S: Final[float] = 1.0 +"""Seconds between health check probes.""" + +HEALTH_CHECK_TIMEOUT_S: Final[float] = 60.0 +"""Default timeout waiting for a process to become healthy.""" + +INIT_TIMEOUT_S: Final[float] = 300.0 +"""Timeout for init-el process (genesis download can be slow).""" + +SHUTDOWN_GRACE_S: Final[float] = 10.0 +"""Seconds to wait for graceful shutdown before SIGKILL.""" + + +class RunnerConfig(BaseModel): + """Runtime configuration for a node-runner session. + + Immutable after construction. All paths are resolved at creation time. + """ + + mode: Mode + network: Network + log_level: LogLevel = LogLevel.INFO + clean: bool = False + force_init: bool = False + verification: int | None = None + nitro_path: Path + nethermind_path: Path + data_dir: Path = Field(default_factory=lambda: Path(".data")) + jwt_secret: Path = Field(default_factory=lambda: DEFAULT_JWT_PATH) + l1_rpc_override: str | None = None + l1_beacon_override: str | None = None + ports: PortConfig = Field(default_factory=PortConfig) + + model_config = ConfigDict(frozen=True) + + @property + def nitro_binary(self) -> Path: + """Path to the compiled nitro binary.""" + return self.nitro_path / "target" / "bin" / "nitro" + + @property + def nethermind_build_dir(self) -> Path: + """Path to the Nethermind build output containing nethermind.dll.""" + return ( + self.nethermind_path + / "src" + / "Nethermind" + / "src" + / "Nethermind" + / "artifacts" + / "bin" + / "Nethermind.Runner" + / "debug" + ) + + @property + def network_config(self) -> NetworkConfig: + """Get the network-specific configuration.""" + return NETWORKS[self.network] + + @property + def l1_rpc(self) -> str: + """Effective L1 RPC URL (override or network default).""" + return self.l1_rpc_override or self.network_config.l1_rpc + + @property + def l1_beacon(self) -> str: + """Effective L1 Beacon URL (override or network default).""" + return self.l1_beacon_override or self.network_config.l1_beacon + + def data_path(self, component: str) -> Path: + """Return data directory for a component, namespaced by network. + + Args: + component: Component name (e.g., 'nitro-el', 'nethermind', 'nitro-cl') + """ + return self.data_dir / self.network.value / component + + def crash_report_dir(self) -> Path: + """Directory for crash report output.""" + return self.data_dir / "crash_reports" + + def error_log_dir(self) -> Path: + """Directory for persistent error-only log files.""" + return self.data_dir / "errors" + + @property + def needs_nitro_el(self) -> bool: + """Whether this mode requires a Nitro EL process.""" + return self.mode in (Mode.COMPARISON, Mode.NITRO_NITRO) + + @property + def needs_nethermind(self) -> bool: + """Whether this mode requires a Nethermind process.""" + return self.mode in (Mode.COMPARISON, Mode.NITRO_NM) diff --git a/tools/node-runner/node_runner/crash_report.py b/tools/node-runner/node_runner/crash_report.py new file mode 100644 index 000000000..c0df5c005 --- /dev/null +++ b/tools/node-runner/node_runner/crash_report.py @@ -0,0 +1,141 @@ +"""Crash report generation on shutdown. + +Saves the last N lines from each process's ring buffer plus an error summary. +Reports are timestamped and stored in the data directory. +""" + +from __future__ import annotations + +import logging +from collections.abc import Sequence +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING + +from .config import CRASH_REPORT_LINES + +if TYPE_CHECKING: + from .processes.base import BaseProcess + +logger = logging.getLogger(__name__) + + +def write_crash_report( + processes: Sequence[BaseProcess], + output_dir: Path, +) -> Path | None: + """Write a crash report with recent logs from all processes. + + Creates a timestamped directory containing: + - summary.txt: Process status overview + - {name}.log: Last CRASH_REPORT_LINES lines per process + - {name}.errors.log: All captured error/warning lines per process + + Args: + processes: List of managed processes to report on + output_dir: Base directory for crash reports + + Returns: + Path to the created report directory, or None if no processes to report + """ + if not processes: + return None + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + report_dir = output_dir / f"report_{timestamp}" + + try: + report_dir.mkdir(parents=True, exist_ok=True) + except OSError as e: + logger.error("Failed to create crash report directory %s: %s", report_dir, e) + return None + + # Write summary + _write_summary(processes, report_dir / "summary.txt") + + # Write per-process logs + for proc in processes: + _write_process_log(proc, report_dir) + _write_error_log(proc, report_dir) + + logger.info("Crash report written to %s", report_dir) + return report_dir + + +def _write_summary(processes: Sequence[BaseProcess], path: Path) -> None: + """Write a summary of all process states.""" + try: + with path.open("w", encoding="utf-8") as f: + f.write("Node Runner Shutdown Report\n") + f.write(f"Generated: {datetime.now().isoformat()}\n") + f.write("=" * 60 + "\n\n") + + for proc in processes: + state = proc.state + f.write(f"Process: {state.name}\n") + f.write(f" Status: {state.status.value}\n") + f.write(f" PID: {state.pid}\n") + f.write(f" Exit Code: {state.exit_code}\n") + f.write(f" Started: {state.start_time}\n") + f.write(f" Errors: {state.error_count}\n") + f.write(f" Warnings: {state.warning_count}\n") + f.write(f" Log Lines: {len(state.log_buffer)}\n") + f.write("\n") + except OSError as e: + logger.error("Failed to write summary: %s", e) + + +def _write_process_log(proc: BaseProcess, report_dir: Path) -> None: + """Write the last N log lines for a process.""" + path = report_dir / f"{proc.name}.log" + entries = list(proc.state.log_buffer)[-CRASH_REPORT_LINES:] + + try: + with path.open("w", encoding="utf-8") as f: + for entry in entries: + f.write(entry.raw) + except OSError as e: + logger.error("Failed to write log for %s: %s", proc.name, e) + + +def _write_error_log(proc: BaseProcess, report_dir: Path) -> None: + """Write only the error/warning lines for a process.""" + if not proc.state.error_log: + return + + path = report_dir / f"{proc.name}.errors.log" + + try: + with path.open("w", encoding="utf-8") as f: + for entry in proc.state.error_log: + f.write(entry.raw) + except OSError as e: + logger.error("Failed to write error log for %s: %s", proc.name, e) + + +def write_error_logs( + processes: Sequence[BaseProcess], + error_dir: Path, +) -> None: + """Write error-only log files at shutdown. + + Dumps ERROR/WARN lines from the in-memory ring buffer to disk, + overwriting any previous file for the same process. + + Args: + processes: List of managed processes + error_dir: Directory for error log files + """ + error_dir.mkdir(parents=True, exist_ok=True) + + for proc in processes: + if not proc.state.error_log: + continue + + path = error_dir / f"{proc.name}.errors.log" + try: + with path.open("w", encoding="utf-8") as f: + for entry in proc.state.error_log: + f.write(entry.raw) + except OSError as e: + logger.error("Failed to write error log for %s: %s", proc.name, e) diff --git a/tools/node-runner/node_runner/exceptions.py b/tools/node-runner/node_runner/exceptions.py new file mode 100644 index 000000000..3b7f301aa --- /dev/null +++ b/tools/node-runner/node_runner/exceptions.py @@ -0,0 +1,46 @@ +"""Custom exceptions for node-runner.""" + +from __future__ import annotations + + +class NodeRunnerError(Exception): + """Base exception for all node-runner errors.""" + + +class ConfigError(NodeRunnerError): + """Invalid configuration.""" + + +class StartupError(NodeRunnerError): + """Process failed to start.""" + + def __init__(self, message: str, process_name: str, port: int | None = None) -> None: + self.process_name = process_name + self.port = port + super().__init__(message) + + +class HealthCheckError(NodeRunnerError): + """Process failed health check within timeout.""" + + def __init__(self, message: str, process_name: str, port: int, timeout: float) -> None: + self.process_name = process_name + self.port = port + self.timeout = timeout + super().__init__(message) + + +class ShutdownError(NodeRunnerError): + """Error during graceful shutdown.""" + + def __init__(self, message: str, pid: int) -> None: + self.pid = pid + super().__init__(message) + + +class InitError(NodeRunnerError): + """EL initialization failed.""" + + def __init__(self, message: str, exit_code: int | None = None) -> None: + self.exit_code = exit_code + super().__init__(message) diff --git a/tools/node-runner/node_runner/models.py b/tools/node-runner/node_runner/models.py new file mode 100644 index 000000000..2f20fe9d4 --- /dev/null +++ b/tools/node-runner/node_runner/models.py @@ -0,0 +1,79 @@ +"""Data models for process state and log entries.""" + +from __future__ import annotations + +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum + +from .config import ERROR_BUFFER_SIZE, LOG_BUFFER_SIZE + + +class ProcessStatus(Enum): + """Lifecycle state of a managed process.""" + + PENDING = "pending" + INITIALIZING = "initializing" + STARTING = "starting" + HEALTHY = "healthy" + UNHEALTHY = "unhealthy" + STOPPING = "stopping" + STOPPED = "stopped" + CRASHED = "crashed" + + +@dataclass(slots=True) +class LogEntry: + """A single parsed log line.""" + + timestamp: datetime + process_name: str + level: str + message: str + raw: str + + @property + def is_error(self) -> bool: + return self.level in ("ERROR", "FATAL", "CRIT") + + @property + def is_warning(self) -> bool: + return self.level in ("WARN", "WARNING") + + @property + def is_notable(self) -> bool: + """True for lines worth persisting (errors and warnings).""" + return self.is_error or self.is_warning + + +@dataclass +class ProcessState: + """Runtime state of a managed process.""" + + name: str + status: ProcessStatus = ProcessStatus.PENDING + pid: int | None = None + start_time: datetime | None = None + exit_code: int | None = None + write_count: int = 0 + """Monotonically increasing counter of total log lines appended.""" + log_buffer: deque[LogEntry] = field( + default_factory=lambda: deque(maxlen=LOG_BUFFER_SIZE) + ) + error_log: deque[LogEntry] = field( + default_factory=lambda: deque(maxlen=ERROR_BUFFER_SIZE) + ) + + @property + def is_terminal(self) -> bool: + """True if the process has reached a final state.""" + return self.status in (ProcessStatus.STOPPED, ProcessStatus.CRASHED) + + @property + def error_count(self) -> int: + return sum(1 for e in self.error_log if e.is_error) + + @property + def warning_count(self) -> int: + return sum(1 for e in self.error_log if e.is_warning) diff --git a/tools/node-runner/node_runner/orchestrator.py b/tools/node-runner/node_runner/orchestrator.py new file mode 100644 index 000000000..b2edc519b --- /dev/null +++ b/tools/node-runner/node_runner/orchestrator.py @@ -0,0 +1,224 @@ +"""Process orchestration: startup sequencing, monitoring, and shutdown. + +Manages the dependency graph between processes for each mode and handles +the full lifecycle from initialization through graceful shutdown. +""" + +from __future__ import annotations + +import asyncio +import contextlib +import logging +import shutil +from typing import TYPE_CHECKING + +from .config import HEALTH_CHECK_TIMEOUT_S, Mode +from .exceptions import HealthCheckError, StartupError +from .processes.base import BaseProcess +from .processes.init_el import InitELProcess +from .processes.nethermind import NethermindProcess +from .processes.nitro_cl import NitroCLProcess +from .processes.nitro_el import NitroELProcess + +if TYPE_CHECKING: + from .config import RunnerConfig + +logger = logging.getLogger(__name__) + + +class Orchestrator: + """Manages the lifecycle of all processes for a node-runner session. + + Responsibilities: + - Build the correct process graph per mode + - Start processes in dependency order with health gates + - Monitor processes for crashes + - Stop all processes in reverse order on shutdown + """ + + def __init__(self, config: RunnerConfig) -> None: + self.config = config + self._processes: list[BaseProcess] = [] + self._shutdown_event = asyncio.Event() + self._started = False + + @property + def processes(self) -> list[BaseProcess]: + """Currently managed processes (ordered by startup dependency).""" + return list(self._processes) + + @property + def is_shutdown_requested(self) -> bool: + return self._shutdown_event.is_set() + + def _build_process_list(self) -> list[BaseProcess]: + """Build ordered process list based on configured mode. + + Processes are ordered so that each process at index i depends on + all processes at indices 0..i-1 being healthy first. + """ + match self.config.mode: + case Mode.COMPARISON: + return [ + NitroELProcess(self.config), + NethermindProcess(self.config), + NitroCLProcess(self.config, Mode.COMPARISON), + ] + case Mode.NITRO_NM: + return [ + NethermindProcess(self.config), + NitroCLProcess(self.config, Mode.NITRO_NM), + ] + case Mode.NITRO_NITRO: + return [ + NitroELProcess(self.config), + NitroCLProcess(self.config, Mode.NITRO_NITRO), + ] + + def _needs_init(self) -> bool: + """Check whether Nitro EL initialization is required. + + For mainnet, init downloads a pre-built genesis snapshot via + --init.latest=genesis. For other networks (e.g., Sepolia), init + creates genesis from the real L1 init message via --init.empty=true + with the parent-chain-reader enabled (default). This ensures the + genesis hash matches the CL's genesis derived from L1. + + Init is needed if: + - Mode requires a Nitro EL + - User passed --init (force_init) OR the data directory is empty + """ + if not self.config.needs_nitro_el: + return False + + if self.config.force_init: + return True + + el_data = self.config.data_path("nitro-el") + if not el_data.exists(): + return True + + # Check for any content (even an empty dir means no init was done) + return not any(el_data.iterdir()) + + async def clean_data(self) -> None: + """Remove all data directories for the current network.""" + base = self.config.data_dir / self.config.network.value + if base.exists(): + logger.info("Cleaning data directory: %s", base) + shutil.rmtree(base) + base.mkdir(parents=True, exist_ok=True) + + async def run_init(self) -> None: + """Run Nitro EL genesis initialization. + + Creates genesis state. On mainnet, downloads a pre-built snapshot. + On other networks, reads the real init message from L1. Blocking one-shot. + + Raises: + InitError: If initialization fails + """ + logger.info("Initializing Nitro EL (downloading genesis from L1)...") + init_proc = InitELProcess(self.config) + self._processes = [init_proc] # preserve for TUI + crash report on failure + await init_proc.run_to_completion() + self._processes = [] # clear on success; start_all rebuilds the list + logger.info("Nitro EL initialization complete") + + async def start_all(self) -> None: + """Start all processes in dependency order with health gates. + + Raises: + StartupError: If a process fails to start + HealthCheckError: If a process doesn't become healthy in time + InitError: If EL initialization fails + """ + if self.config.clean: + await self.clean_data() + + if self._needs_init(): + await self.run_init() + + self._processes = self._build_process_list() + self._started = True + + for proc in self._processes: + if self._shutdown_event.is_set(): + break + + logger.info("Starting %s...", proc.name) + + try: + await proc.start() + except Exception as e: + raise StartupError( + f"Failed to start {proc.name}: {e}", + process_name=proc.name, + ) from e + + port = proc.health_check_port + if port is not None: + logger.info( + "Waiting for %s to become healthy (port %d)...", + proc.name, + port, + ) + + healthy = await proc.wait_for_healthy(timeout=HEALTH_CHECK_TIMEOUT_S) + if not healthy: + raise HealthCheckError( + f"{proc.name} did not become healthy within {HEALTH_CHECK_TIMEOUT_S}s", + process_name=proc.name, + port=port or 0, + timeout=HEALTH_CHECK_TIMEOUT_S, + ) + + logger.info("%s is healthy (PID %d)", proc.name, proc.state.pid or 0) + + async def stop_all(self) -> None: + """Stop all processes in reverse startup order. + + CL is stopped first (it depends on ELs), then ELs. + """ + if not self._processes: + return + + logger.info("Stopping all processes...") + + for proc in reversed(self._processes): + if proc.state.is_terminal: + continue + logger.info("Stopping %s (PID %d)...", proc.name, proc.state.pid or 0) + clean = await proc.stop() + if clean: + logger.info("%s stopped cleanly", proc.name) + else: + logger.warning("%s required force-kill", proc.name) + + async def monitor(self) -> None: + """Monitor processes for crashes until shutdown is requested. + + Runs in a loop checking process status. Does not restart crashed processes + (that would require re-init which is too heavy for automatic recovery). + Instead, it updates state so the TUI can display crash information. + """ + while not self._shutdown_event.is_set(): + for proc in self._processes: + proc.check_for_crash() + + with contextlib.suppress(TimeoutError): + await asyncio.wait_for( + self._shutdown_event.wait(), + timeout=2.0, + ) + + def request_shutdown(self) -> None: + """Signal that shutdown has been requested (e.g., Ctrl+C from TUI).""" + self._shutdown_event.set() + + def summary(self) -> dict[str, str]: + """Return a summary of all process states for display.""" + return { + proc.name: proc.state.status.value + for proc in self._processes + } diff --git a/tools/node-runner/node_runner/processes/__init__.py b/tools/node-runner/node_runner/processes/__init__.py new file mode 100644 index 000000000..0bbf3d9f9 --- /dev/null +++ b/tools/node-runner/node_runner/processes/__init__.py @@ -0,0 +1 @@ +"""Process management for Nitro and Nethermind nodes.""" diff --git a/tools/node-runner/node_runner/processes/base.py b/tools/node-runner/node_runner/processes/base.py new file mode 100644 index 000000000..8a9fa7236 --- /dev/null +++ b/tools/node-runner/node_runner/processes/base.py @@ -0,0 +1,223 @@ +"""Base process class with common lifecycle management. + +All managed processes (Nitro EL, Nitro CL, Nethermind) inherit from BaseProcess. +Uses asyncio.subprocess for non-blocking I/O and process groups for clean shutdown. +""" + +from __future__ import annotations + +import asyncio +import os +import signal +from abc import ABC, abstractmethod +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING + +from ..config import HEALTH_CHECK_INTERVAL_S, HEALTH_CHECK_TIMEOUT_S, SHUTDOWN_GRACE_S +from ..models import LogEntry, ProcessState, ProcessStatus + +if TYPE_CHECKING: + from ..config import RunnerConfig + + +class BaseProcess(ABC): + """Abstract base for managed node processes. + + Provides: + - Async subprocess start/stop with process group isolation + - Non-blocking log consumption from stdout + - TCP health check on configurable port + - Graceful shutdown with SIGTERM/SIGKILL escalation + """ + + def __init__(self, config: RunnerConfig) -> None: + self.config = config + self.state = ProcessState(name=self.name) + self._proc: asyncio.subprocess.Process | None = None + self._log_task: asyncio.Task[None] | None = None + + @property + @abstractmethod + def name(self) -> str: + """Human-readable process name used in logs and TUI.""" + ... + + @abstractmethod + def build_command(self) -> list[str]: + """Build the full command-line argument list.""" + ... + + @abstractmethod + def working_directory(self) -> Path: + """Working directory for the subprocess.""" + ... + + @property + def health_check_port(self) -> int | None: + """TCP port to probe for health. None skips health check.""" + return None + + async def start(self) -> None: + """Start the subprocess and begin consuming its output.""" + if self._proc is not None: + return + + self.state.status = ProcessStatus.STARTING + cmd = self.build_command() + cwd = self.working_directory() + + # Ensure data directory exists + data_path = self.config.data_path(self.name) + data_path.mkdir(parents=True, exist_ok=True) + + self._proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + cwd=cwd, + start_new_session=True, + ) + + self.state.pid = self._proc.pid + self.state.start_time = datetime.now() + self._log_task = asyncio.create_task(self._consume_logs()) + + async def stop(self, grace_s: float = SHUTDOWN_GRACE_S) -> bool: + """Stop the process gracefully. + + Sends SIGTERM to the process group, waits up to grace_s seconds, + then escalates to SIGKILL. + + Returns: + True if the process stopped cleanly (SIGTERM), False if force-killed. + """ + if self._proc is None or self._proc.returncode is not None: + self.state.status = ProcessStatus.STOPPED + self._cancel_log_task() + return True + + self.state.status = ProcessStatus.STOPPING + + try: + os.killpg(self._proc.pid, signal.SIGTERM) + except ProcessLookupError: + self.state.status = ProcessStatus.STOPPED + self._cancel_log_task() + return True + + try: + await asyncio.wait_for(self._proc.wait(), timeout=grace_s) + self.state.status = ProcessStatus.STOPPED + self.state.exit_code = self._proc.returncode + self._cancel_log_task() + return True + except TimeoutError: + pass + + # Escalate to SIGKILL + try: + os.killpg(self._proc.pid, signal.SIGKILL) + await asyncio.wait_for(self._proc.wait(), timeout=5.0) + except (TimeoutError, ProcessLookupError): + pass + + self.state.status = ProcessStatus.STOPPED + self.state.exit_code = self._proc.returncode + self._cancel_log_task() + return False + + async def wait_for_healthy(self, timeout: float = HEALTH_CHECK_TIMEOUT_S) -> bool: + """Wait for the process to become healthy via TCP port probe. + + Returns: + True if healthy within timeout, False otherwise. + """ + port = self.health_check_port + if port is None: + self.state.status = ProcessStatus.HEALTHY + return True + + loop = asyncio.get_running_loop() + deadline = loop.time() + timeout + while loop.time() < deadline: + # Check for early process death + if self._proc is not None and self._proc.returncode is not None: + self.state.status = ProcessStatus.CRASHED + self.state.exit_code = self._proc.returncode + return False + + try: + _, writer = await asyncio.wait_for( + asyncio.open_connection("127.0.0.1", port), + timeout=1.0, + ) + writer.close() + await writer.wait_closed() + self.state.status = ProcessStatus.HEALTHY + return True + except (TimeoutError, OSError): + await asyncio.sleep(HEALTH_CHECK_INTERVAL_S) + + self.state.status = ProcessStatus.UNHEALTHY + return False + + @property + def is_running(self) -> bool: + """Check if the underlying process is alive.""" + return self._proc is not None and self._proc.returncode is None + + def check_for_crash(self) -> bool: + """Check if process has died unexpectedly. Returns True if crashed.""" + if self._proc is None: + return False + if self._proc.returncode is not None and self.state.status == ProcessStatus.HEALTHY: + self.state.status = ProcessStatus.CRASHED + self.state.exit_code = self._proc.returncode + return True + return False + + async def _consume_logs(self) -> None: + """Read stdout line by line and populate the log and error buffers.""" + assert self._proc is not None and self._proc.stdout is not None + try: + async for raw_line in self._proc.stdout: + line = raw_line.decode("utf-8", errors="replace") + entry = self._parse_log_line(line) + self.state.log_buffer.append(entry) + self.state.write_count += 1 + if entry.is_notable: + self.state.error_log.append(entry) + except asyncio.CancelledError: + return + + def _parse_log_line(self, line: str) -> LogEntry: + """Parse a raw log line into a structured LogEntry. + + Uses simple keyword detection. Both Nitro and Nethermind include + level keywords (INFO, WARN, ERROR, DEBUG) in their log output. + """ + upper = line.upper() + if "ERROR" in upper or "ERR " in upper or "FATAL" in upper or "CRIT" in upper: + level = "ERROR" + elif "WARN" in upper: + level = "WARN" + elif "DEBUG" in upper or "DBG " in upper: + level = "DEBUG" + elif "TRACE" in upper or "TRC " in upper: + level = "TRACE" + else: + level = "INFO" + + return LogEntry( + timestamp=datetime.now(), + process_name=self.name, + level=level, + message=line.rstrip(), + raw=line, + ) + + def _cancel_log_task(self) -> None: + """Cancel the log consumption task if running.""" + if self._log_task is not None and not self._log_task.done(): + self._log_task.cancel() diff --git a/tools/node-runner/node_runner/processes/init_el.py b/tools/node-runner/node_runner/processes/init_el.py new file mode 100644 index 000000000..097c2df17 --- /dev/null +++ b/tools/node-runner/node_runner/processes/init_el.py @@ -0,0 +1,113 @@ +"""Nitro EL initialization (one-shot genesis creation). + +For mainnet, downloads a pre-built genesis snapshot via --init.latest=genesis. +For other networks (e.g., Sepolia), creates genesis from the real L1 init +message via --init.empty=true with the parent-chain-reader enabled (default). +Must complete before starting the EL. +""" + +from __future__ import annotations + +import asyncio +from pathlib import Path +from typing import TYPE_CHECKING + +from ..config import INIT_TIMEOUT_S, Network +from ..exceptions import InitError +from ..models import ProcessStatus +from .base import BaseProcess + +if TYPE_CHECKING: + from ..config import RunnerConfig + + +class InitELProcess(BaseProcess): + """One-shot Nitro init-el process. + + Creates genesis and exits. Not a long-running process. + Flags derived from arbitrum-nitro/Makefile:217-237 (init-el target). + """ + + def __init__(self, config: RunnerConfig) -> None: + super().__init__(config) + + @property + def name(self) -> str: + return "init-el" + + @property + def health_check_port(self) -> int | None: + return None # One-shot, no health check port + + def working_directory(self) -> Path: + return self.config.nitro_path + + def build_command(self) -> list[str]: + nc = self.config.network_config + data_path = self.config.data_path("nitro-el") + + cmd = [ + str(self.config.nitro_binary), + f"--chain.id={nc.chain_id}", + f"--parent-chain.id={nc.parent_chain_id}", + f"--persistent.global-config={data_path}", + f"--parent-chain.connection.url={self.config.l1_rpc}", + f"--parent-chain.blob-client.beacon-url={self.config.l1_beacon}", + "--init.then-quit=true", + "--init.validate-genesis-assertion=false", + "--node.sequencer=false", + "--node.batch-poster.enable=false", + "--node.staker.enable=false", + "--node.feed.input.url=", + f"--auth.jwtsecret={self.config.jwt_secret}", + ] + + if self.config.network == Network.MAINNET: + # Mainnet has a pre-built genesis snapshot available + cmd.append("--init.latest=genesis") + else: + # No snapshot for testnets — create genesis from L1 init message. + # parent-chain-reader defaults to enabled, so Nitro reads the real + # init message from L1's delayed bridge instead of creating a fake + # one via json.Marshal(chainConfig). + cmd.append("--init.empty=true") + + return cmd + + async def run_to_completion(self, timeout: float = INIT_TIMEOUT_S) -> None: + """Start the init process and wait for it to exit successfully. + + Args: + timeout: Maximum seconds to wait for completion + + Raises: + InitError: If init fails or times out + """ + self.state.status = ProcessStatus.INITIALIZING + await self.start() + + assert self._proc is not None + + try: + exit_code = await asyncio.wait_for(self._proc.wait(), timeout=timeout) + except TimeoutError: + await self.stop() + raise InitError( + f"EL initialization timed out after {timeout}s. " + f"Check L1 RPC connectivity: {self.config.l1_rpc}", + ) from None + + self.state.exit_code = exit_code + self._cancel_log_task() + + if exit_code != 0: + # Grab last few error lines for diagnostics + errors = [e.message for e in self.state.error_log][-10:] + error_context = "\n ".join(errors) if errors else "(no error output captured)" + raise InitError( + f"EL initialization failed with exit code {exit_code}.\n" + f"Recent errors:\n {error_context}", + exit_code=exit_code, + ) + + self.state.status = ProcessStatus.STOPPED diff --git a/tools/node-runner/node_runner/processes/nethermind.py b/tools/node-runner/node_runner/processes/nethermind.py new file mode 100644 index 000000000..18505e5aa --- /dev/null +++ b/tools/node-runner/node_runner/processes/nethermind.py @@ -0,0 +1,64 @@ +"""Nethermind Execution Layer process. + +Runs Nethermind via `dotnet nethermind.dll` with Arbitrum configuration. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +from .base import BaseProcess + +if TYPE_CHECKING: + from ..config import RunnerConfig + + +class NethermindProcess(BaseProcess): + """Nethermind node as Arbitrum execution layer. + + Command pattern derived from nethermind-arbitrum/Makefile:23-27 (run-nethermind macro). + Uses network-specific config (arbitrum-sepolia, arbitrum-mainnet). + """ + + def __init__(self, config: RunnerConfig) -> None: + super().__init__(config) + + @property + def name(self) -> str: + return "nethermind" + + @property + def health_check_port(self) -> int: + return self.config.ports.nethermind_engine + + def working_directory(self) -> Path: + return self.config.nethermind_build_dir + + def build_command(self) -> list[str]: + nc = self.config.network_config + data_path = self.config.data_path("nethermind") + + cmd = [ + "dotnet", + "nethermind.dll", + "-c", + nc.nethermind_config_name, + "--data-dir", + str(data_path), + f"--JsonRpc.JwtSecretFile={self.config.jwt_secret}", + f"--JsonRpc.Port={self.config.ports.nethermind_http}", + f"--JsonRpc.EnginePort={self.config.ports.nethermind_engine}", + "--JsonRpc.Host=0.0.0.0", + "--JsonRpc.EngineHost=0.0.0.0", + f"--log={self.config.log_level.value}", + ] + + if self.config.verification is not None: + cmd.append("--VerifyBlockHash.Enabled=true") + if self.config.verification > 0: + cmd.append( + f"--VerifyBlockHash.VerifyEveryNBlocks={self.config.verification}" + ) + + return cmd diff --git a/tools/node-runner/node_runner/processes/nitro_cl.py b/tools/node-runner/node_runner/processes/nitro_cl.py new file mode 100644 index 000000000..cb5d50a2d --- /dev/null +++ b/tools/node-runner/node_runner/processes/nitro_cl.py @@ -0,0 +1,104 @@ +"""Nitro Consensus Layer process. + +Runs the Nitro binary as CL, connecting to an external EL. +Mode-aware: builds different flags for comparison, nitro-nm, and nitro-nitro. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +from ..config import Mode +from .base import BaseProcess + +if TYPE_CHECKING: + from ..config import RunnerConfig + + +class NitroCLProcess(BaseProcess): + """Nitro node as consensus layer connecting to external execution layer(s). + + Flags derived from arbitrum-nitro/Makefile:269-323 (run-cl and run-cl-comparison). + """ + + def __init__(self, config: RunnerConfig, mode: Mode) -> None: + super().__init__(config) + self._mode = mode + + @property + def name(self) -> str: + return "nitro-cl" + + @property + def health_check_port(self) -> int: + return self.config.ports.nitro_cl_http + + def working_directory(self) -> Path: + return self.config.nitro_path + + def build_command(self) -> list[str]: + nc = self.config.network_config + data_path = self.config.data_path("nitro-cl") + ports = self.config.ports + + cmd = [ + str(self.config.nitro_binary), + f"--chain.id={nc.chain_id}", + f"--parent-chain.id={nc.parent_chain_id}", + f"--persistent.global-config={data_path}", + # L1 connection + f"--parent-chain.connection.url={self.config.l1_rpc}", + f"--parent-chain.blob-client.beacon-url={self.config.l1_beacon}", + # Init + "--init.empty=true", + "--init.validate-genesis-assertion=false", + # Disable features not needed for sync-only + "--node.sequencer=false", + "--node.batch-poster.enable=false", + "--node.staker.enable=false", + "--node.feed.input.url=", + "--execution.forwarding-target=null", + # Auth + f"--auth.jwtsecret={self.config.jwt_secret}", + # CL RPC endpoints + "--ws.addr=0.0.0.0", + f"--ws.port={ports.nitro_cl_ws}", + "--http.addr=0.0.0.0", + f"--http.port={ports.nitro_cl_http}", + # Log level + f"--log-level={self.config.log_level.value}", + ] + + # Mode-specific EL connection + match self._mode: + case Mode.COMPARISON: + cmd.extend(self._comparison_flags(ports)) + case Mode.NITRO_NM: + cmd.extend(self._nitro_nm_flags(ports)) + case Mode.NITRO_NITRO: + cmd.extend(self._nitro_nitro_flags(ports)) + + return cmd + + def _comparison_flags(self, ports: object) -> list[str]: + """Flags for comparison mode: Nitro EL as primary, Nethermind as secondary.""" + return [ + f"--node.execution-rpc-client.url=ws://localhost:{self.config.ports.nitro_el_ws}", + "--node.comparison-execution.enable=true", + f"--node.comparison-execution.secondary-rpc-client.url=http://localhost:{self.config.ports.nethermind_engine}", + f"--node.comparison-execution.secondary-rpc-client.jwtsecret={self.config.jwt_secret}", + ] + + def _nitro_nm_flags(self, ports: object) -> list[str]: + """Flags for nitro-nm mode: Nethermind as sole EL.""" + return [ + f"--node.execution-rpc-client.url=http://localhost:{self.config.ports.nethermind_engine}", + f"--node.execution-rpc-client.jwtsecret={self.config.jwt_secret}", + ] + + def _nitro_nitro_flags(self, ports: object) -> list[str]: + """Flags for nitro-nitro mode: Nitro EL as sole EL.""" + return [ + f"--node.execution-rpc-client.url=ws://localhost:{self.config.ports.nitro_el_ws}", + ] diff --git a/tools/node-runner/node_runner/processes/nitro_el.py b/tools/node-runner/node_runner/processes/nitro_el.py new file mode 100644 index 000000000..06127a49a --- /dev/null +++ b/tools/node-runner/node_runner/processes/nitro_el.py @@ -0,0 +1,75 @@ +"""Nitro Execution Layer process (execution-only mode). + +Runs the Nitro binary as a standalone EL without L1 listener. +Exposes RPC on WS and HTTP for CL to connect to. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +from .base import BaseProcess + +if TYPE_CHECKING: + from ..config import RunnerConfig + + +class NitroELProcess(BaseProcess): + """Nitro node in execution-only mode. + + Flags derived from arbitrum-nitro/Makefile:240-267 (run-el target). + """ + + def __init__(self, config: RunnerConfig) -> None: + super().__init__(config) + + @property + def name(self) -> str: + return "nitro-el" + + @property + def health_check_port(self) -> int: + return self.config.ports.nitro_el_ws + + def working_directory(self) -> Path: + return self.config.nitro_path + + def build_command(self) -> list[str]: + nc = self.config.network_config + data_path = self.config.data_path("nitro-el") + ports = self.config.ports + + return [ + str(self.config.nitro_binary), + f"--chain.id={nc.chain_id}", + f"--parent-chain.id={nc.parent_chain_id}", + f"--persistent.global-config={data_path}", + "--init.empty=true", + "--init.validate-genesis-assertion=false", + # Execution-only: no L1 interaction + "--node.dangerous.no-l1-listener=true", + "--node.parent-chain-reader.enable=false", + "--node.sequencer=false", + "--node.batch-poster.enable=false", + "--node.staker.enable=false", + "--node.feed.input.url=", + # RPC server for CL communication + "--execution.rpc-server.enable=true", + "--execution.rpc-server.authenticated=false", + "--execution.rpc-server.public=true", + # Auth RPC + f"--auth.jwtsecret={self.config.jwt_secret}", + "--auth.addr=0.0.0.0", + f"--auth.port={ports.nitro_el_auth}", + # WebSocket + "--ws.addr=0.0.0.0", + f"--ws.port={ports.nitro_el_ws}", + "--ws.api=net,web3,eth,arb,nitroexecution", + # HTTP + "--http.addr=0.0.0.0", + f"--http.port={ports.nitro_el_http}", + "--http.api=net,web3,eth,arb,nitroexecution", + # Log level + f"--log-level={self.config.log_level.value}", + ] diff --git a/tools/node-runner/node_runner/tui/__init__.py b/tools/node-runner/node_runner/tui/__init__.py new file mode 100644 index 000000000..096fb6ccc --- /dev/null +++ b/tools/node-runner/node_runner/tui/__init__.py @@ -0,0 +1 @@ +"""Textual TUI for node-runner.""" diff --git a/tools/node-runner/node_runner/tui/app.py b/tools/node-runner/node_runner/tui/app.py new file mode 100644 index 000000000..c91f81024 --- /dev/null +++ b/tools/node-runner/node_runner/tui/app.py @@ -0,0 +1,371 @@ +"""Main Textual application for node-runner. + +Composes the TUI layout, starts processes via the orchestrator, +streams logs to panes, and handles graceful shutdown. +""" + +from __future__ import annotations + +import asyncio +import logging +import shutil +import subprocess +from pathlib import Path +from typing import TYPE_CHECKING + +from textual.app import App, ComposeResult +from textual.binding import Binding +from textual.containers import Container +from textual.widgets import Footer, Static + +from ..crash_report import write_crash_report, write_error_logs +from ..orchestrator import Orchestrator +from .widgets import CombinedLogPane, ProcessLogPane + +# Route init process logs to the corresponding runtime pane +_PANE_FALLBACK: dict[str, str] = { + "init-el": "nitro-el", +} + +if TYPE_CHECKING: + from ..config import RunnerConfig + +logger = logging.getLogger(__name__) + +_CSS_PATH = Path(__file__).parent / "styles.tcss" + + +class NodeRunnerApp(App[None]): + """Textual application for running and monitoring Arbitrum node processes. + + Layout: + - Status bar (top): mode, network, process health + - Process panes (center): split or combined view + - Help bar (bottom): key bindings + """ + + CSS_PATH = _CSS_PATH + TITLE = "node-runner" + + BINDINGS = [ + Binding("q", "quit_app", "Quit", priority=True), + Binding("x", "stop_nodes", "Stop nodes"), + Binding("s", "split_view", "Split view"), + Binding("c", "combined_view", "Combined view"), + Binding("f", "toggle_filter", "Errors only"), + Binding("y", "copy_log", "Copy log"), + Binding("1", "focus_pane(1)", "Pane 1", show=False), + Binding("2", "focus_pane(2)", "Pane 2", show=False), + Binding("3", "focus_pane(3)", "Pane 3", show=False), + ] + + def __init__(self, config: RunnerConfig) -> None: + super().__init__() + self.config = config + self._orchestrator = Orchestrator(config) + self._panes: dict[str, ProcessLogPane] = {} + self._combined_pane: CombinedLogPane | None = None + self._log_pointers: dict[str, int] = {} + self._stream_task: asyncio.Task[None] | None = None + self._monitor_task: asyncio.Task[None] | None = None + self._in_split_view = True + self._focused_pane_name: str | None = None + + def compose(self) -> ComposeResult: + # Status bar + mode_text = self.config.mode.value.upper() + network_text = self.config.network.value.capitalize() + yield Static( + f" {mode_text} | {network_text} | Starting...", + id="status-bar", + ) + + # Build process panes based on mode. + # Verification EL is a background process — no dedicated pane, + # its logs only appear in combined view. + proc_list = self._orchestrator._build_process_list() + pane_count = len(proc_list) + + container_class = "three-pane" if pane_count == 3 else "two-pane" + with Container(id="split-container", classes=container_class): + for i, proc in enumerate(proc_list): + # In 3-pane grid, last pane spans full width + classes = "span-full" if (pane_count == 3 and i == 2) else "" + pane = ProcessLogPane( + proc.name, + id=f"pane-{proc.name}", + classes=classes, + ) + self._panes[proc.name] = pane + yield pane + + # Combined view (hidden by default) + with Container(id="combined-container"): + self._combined_pane = CombinedLogPane(id="combined-pane") + yield self._combined_pane + + yield Footer() + + async def on_mount(self) -> None: + """Start processes and background workers after TUI mounts.""" + self.run_worker(self._startup_and_monitor(), exclusive=True) + + async def _startup_and_monitor(self) -> None: + """Orchestrate startup, then stream logs and monitor.""" + status_bar = self.query_one("#status-bar", Static) + + try: + await self._orchestrator.start_all() + except Exception as e: + mode_text = self.config.mode.value.upper() + network_text = self.config.network.value.capitalize() + status_bar.update( + f" {mode_text} | {network_text} | STARTUP FAILED: {e}" + ) + logger.error("Startup failed: %s", e) + # Still stream whatever logs were captured + self._stream_task = asyncio.create_task(self._stream_logs()) + return + + # Update status bar + mode_text = self.config.mode.value.upper() + network_text = self.config.network.value.capitalize() + procs = self._orchestrator.processes + status_parts = [f"{p.name}:{p.state.status.value}" for p in procs] + status_bar.update( + f" {mode_text} | {network_text} | {' | '.join(status_parts)}" + ) + + # Start background tasks + self._stream_task = asyncio.create_task(self._stream_logs()) + self._monitor_task = asyncio.create_task(self._monitor_loop()) + + async def _stream_logs(self) -> None: + """Poll log buffers and push new entries to TUI panes.""" + # Initialize pointers to current buffer positions + for proc in self._orchestrator.processes: + self._log_pointers[proc.name] = 0 + + while True: + for proc in self._orchestrator.processes: + last_seen = self._log_pointers.get(proc.name, 0) + current = proc.state.write_count + new_count = current - last_seen + + if new_count > 0: + buf_list = list(proc.state.log_buffer) + # If more were written than the buffer holds, we missed some + new_count = min(new_count, len(buf_list)) + new_entries = buf_list[-new_count:] + + pane = self._panes.get(proc.name) + if pane is None: + pane = self._panes.get(_PANE_FALLBACK.get(proc.name, "")) + for entry in new_entries: + if pane is not None: + pane.write_entry(entry) + if self._combined_pane is not None: + self._combined_pane.write_entry(entry) + + self._log_pointers[proc.name] = current + + # Update pane header status + pane = self._panes.get(proc.name) + if pane is None: + pane = self._panes.get(_PANE_FALLBACK.get(proc.name, "")) + if pane is not None: + pane.update_status( + proc.state.status, + proc.state.error_count, + proc.state.warning_count, + ) + + await asyncio.sleep(0.1) + + async def _monitor_loop(self) -> None: + """Monitor for process crashes and update status bar.""" + status_bar = self.query_one("#status-bar", Static) + + while not self._orchestrator.is_shutdown_requested: + for proc in self._orchestrator.processes: + proc.check_for_crash() + + # Update status bar + mode_text = self.config.mode.value.upper() + network_text = self.config.network.value.capitalize() + procs = self._orchestrator.processes + status_parts = [] + for p in procs: + status_parts.append(f"{p.name}:{p.state.status.value}") + status_bar.update( + f" {mode_text} | {network_text} | {' | '.join(status_parts)}" + ) + + await asyncio.sleep(2.0) + + async def _shutdown(self) -> None: + """Graceful shutdown: stop processes, write reports.""" + # Cancel background tasks + if self._stream_task and not self._stream_task.done(): + self._stream_task.cancel() + if self._monitor_task and not self._monitor_task.done(): + self._monitor_task.cancel() + + # Signal orchestrator + self._orchestrator.request_shutdown() + + # Update status bar + try: + status_bar = self.query_one("#status-bar", Static) + status_bar.update(" Shutting down...") + except Exception: + pass + + # Stop all processes + await self._orchestrator.stop_all() + + # Write crash report and error logs + processes = self._orchestrator.processes + if processes: + write_crash_report(processes, self.config.crash_report_dir()) + write_error_logs(processes, self.config.error_log_dir()) + + # ── Actions ── + + async def action_quit_app(self) -> None: + """Graceful shutdown then exit.""" + await self._shutdown() + self.exit() + + async def action_stop_nodes(self) -> None: + """Stop all processes but keep the TUI open for log review.""" + if self._orchestrator.is_shutdown_requested: + return + + # Stop the monitor loop (processes are going down) + if self._monitor_task and not self._monitor_task.done(): + self._monitor_task.cancel() + + self._orchestrator.request_shutdown() + + status_bar = self.query_one("#status-bar", Static) + status_bar.update(" Stopping nodes...") + + await self._orchestrator.stop_all() + + # Write reports while logs are still in memory + processes = self._orchestrator.processes + if processes: + write_crash_report(processes, self.config.crash_report_dir()) + write_error_logs(processes, self.config.error_log_dir()) + + # Update status bar and pane headers + mode_text = self.config.mode.value.upper() + network_text = self.config.network.value.capitalize() + status_bar.update(f" {mode_text} | {network_text} | STOPPED (q to quit)") + + for proc in processes: + pane = self._panes.get(proc.name) + if pane is None: + pane = self._panes.get(_PANE_FALLBACK.get(proc.name, "")) + if pane is not None: + pane.update_status( + proc.state.status, proc.state.error_count, proc.state.warning_count + ) + + def action_split_view(self) -> None: + """Switch to split (per-process) view.""" + if self._in_split_view: + return + self._in_split_view = True + split = self.query_one("#split-container") + combined = self.query_one("#combined-container") + split.styles.display = "block" + combined.styles.display = "none" + + def action_combined_view(self) -> None: + """Switch to combined (interleaved) view.""" + if not self._in_split_view: + return + self._in_split_view = False + split = self.query_one("#split-container") + combined = self.query_one("#combined-container") + split.styles.display = "none" + combined.styles.display = "block" + + def action_toggle_filter(self) -> None: + """Toggle errors-only filter on all panes.""" + for pane in self._panes.values(): + pane.toggle_errors_only() + pane.clear_log() + if self._combined_pane is not None: + self._combined_pane.toggle_errors_only() + self._combined_pane.clear_log() + + # Re-stream all buffered entries through the filter + for proc in self._orchestrator.processes: + pane = self._panes.get(proc.name) + for entry in proc.state.log_buffer: + if pane is not None: + pane.write_entry(entry) + if self._combined_pane is not None: + self._combined_pane.write_entry(entry) + self._log_pointers[proc.name] = proc.state.write_count + + def action_focus_pane(self, pane_number: int) -> None: + """Focus a specific process pane by number (1-indexed).""" + pane_names = list(self._panes.keys()) + idx = pane_number - 1 + if 0 <= idx < len(pane_names): + # Remove focus from all panes + for pane in self._panes.values(): + pane.remove_class("focused-pane") + # Focus selected pane + name = pane_names[idx] + self._focused_pane_name = name + target = self._panes[name] + target.add_class("focused-pane") + target.focus() + + def action_copy_log(self) -> None: + """Copy log content from the focused pane (or all) to system clipboard.""" + lines: list[str] = [] + + if self._in_split_view and self._focused_pane_name: + # Copy only the focused pane's process logs + for proc in self._orchestrator.processes: + if proc.name == self._focused_pane_name: + lines = [e.message for e in proc.state.log_buffer] + break + else: + # Combined view or no pane focused: copy all process logs + for proc in self._orchestrator.processes: + for entry in proc.state.log_buffer: + lines.append(f"[{entry.process_name}] {entry.message}") + + if not lines: + self.notify("No log content to copy", severity="warning", timeout=2) + return + + text = "\n".join(lines) + + clip_cmd = shutil.which("pbcopy") or shutil.which("xclip") or shutil.which("xsel") + if clip_cmd is None: + self.notify("No clipboard tool found", severity="error", timeout=3) + return + + args = [clip_cmd] + if "xclip" in clip_cmd: + args += ["-selection", "clipboard"] + elif "xsel" in clip_cmd: + args += ["--clipboard", "--input"] + + try: + subprocess.run(args, input=text.encode(), check=True) # noqa: S603 + except subprocess.SubprocessError: + self.notify("Failed to copy to clipboard", severity="error", timeout=3) + return + + count = len(lines) + label = self._focused_pane_name or "all" + self.notify(f"Copied {count} lines ({label})", timeout=2) diff --git a/tools/node-runner/node_runner/tui/styles.tcss b/tools/node-runner/node_runner/tui/styles.tcss new file mode 100644 index 000000000..5854f4801 --- /dev/null +++ b/tools/node-runner/node_runner/tui/styles.tcss @@ -0,0 +1,133 @@ +/* Node Runner TUI Styles */ + +Screen { + layout: vertical; +} + +/* ── Status bar at top ── */ + +#status-bar { + dock: top; + height: 1; + background: $primary-background-darken-2; + color: $text; + padding: 0 1; +} + +#status-bar .mode-label { + color: $accent; + text-style: bold; +} + +#status-bar .network-label { + color: $secondary; + margin: 0 1; +} + +/* ── Split pane container ── */ + +#split-container { + height: 1fr; +} + +#split-container.two-pane { + layout: horizontal; +} + +#split-container.three-pane { + layout: grid; + grid-size: 2 2; + grid-gutter: 0; +} + +/* ── Combined view container ── */ + +#combined-container { + height: 1fr; + display: none; +} + +/* ── Process log pane ── */ + +ProcessLogPane { + height: 1fr; + width: 1fr; + border: solid $primary-background-lighten-2; + padding: 0; +} + +ProcessLogPane.focused-pane { + border: solid $accent; +} + +/* Third pane spans full width in 3-pane grid */ +ProcessLogPane.span-full { + column-span: 2; +} + +.pane-header { + dock: top; + height: 1; + padding: 0 1; + background: $primary-background-darken-1; + text-style: bold; +} + +.pane-header .process-name { + color: $text; +} + +.pane-header .status-healthy { + color: $success; +} + +.pane-header .status-starting { + color: $warning; +} + +.pane-header .status-error { + color: $error; +} + +.pane-header .error-count { + color: $error; + dock: right; +} + +.log-view { + height: 1fr; + scrollbar-size: 1 1; +} + +/* ── Footer / help bar ── */ + +#help-bar { + dock: bottom; + height: 1; + background: $primary-background-darken-2; + color: $text-muted; + padding: 0 1; +} + +/* ── Log level colors ── */ + +.log-error { + color: $error; + text-style: bold; +} + +.log-warn { + color: $warning; +} + +.log-info { + color: $text; +} + +.log-debug { + color: $text-muted; +} + +.log-trace { + color: $text-disabled; +} diff --git a/tools/node-runner/node_runner/tui/widgets.py b/tools/node-runner/node_runner/tui/widgets.py new file mode 100644 index 000000000..8f3deee71 --- /dev/null +++ b/tools/node-runner/node_runner/tui/widgets.py @@ -0,0 +1,230 @@ +"""TUI widgets for node-runner. + +Provides ProcessLogPane: a self-contained panel showing a process's logs +with a status header and auto-scrolling RichLog. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from rich.text import Text +from textual.reactive import reactive +from textual.widget import Widget +from textual.widgets import RichLog, Static + +from ..models import LogEntry, ProcessStatus + +if TYPE_CHECKING: + from textual.app import ComposeResult + + +_STATUS_STYLE: dict[ProcessStatus, tuple[str, str]] = { + ProcessStatus.PENDING: ("PENDING", "status-starting"), + ProcessStatus.INITIALIZING: ("INIT", "status-starting"), + ProcessStatus.STARTING: ("STARTING", "status-starting"), + ProcessStatus.HEALTHY: ("HEALTHY", "status-healthy"), + ProcessStatus.UNHEALTHY: ("UNHEALTHY", "status-error"), + ProcessStatus.STOPPING: ("STOPPING", "status-starting"), + ProcessStatus.STOPPED: ("STOPPED", "status-error"), + ProcessStatus.CRASHED: ("CRASHED", "status-error"), +} + +_LEVEL_STYLE: dict[str, str] = { + "ERROR": "bold red", + "FATAL": "bold red", + "CRIT": "bold red", + "WARN": "yellow", + "WARNING": "yellow", + "INFO": "", + "DEBUG": "dim", + "TRACE": "dim italic", +} + + +class ProcessHeader(Static): + """Single-line status header for a process pane.""" + + process_name: reactive[str] = reactive("") + status: reactive[ProcessStatus] = reactive(ProcessStatus.PENDING) + error_count: reactive[int] = reactive(0) + warning_count: reactive[int] = reactive(0) + + def __init__( + self, + process_name: str, + **kwargs: object, + ) -> None: + super().__init__(**kwargs) + self.process_name = process_name + + def render(self) -> Text: + label, css_class = _STATUS_STYLE.get( + self.status, ("UNKNOWN", "status-error") + ) + + text = Text() + text.append(f" {self.process_name} ", style="bold") + text.append("[") + + status_style = { + "status-healthy": "green", + "status-starting": "yellow", + "status-error": "red", + }.get(css_class, "") + text.append(label, style=status_style) + text.append("]") + + if self.error_count > 0 or self.warning_count > 0: + text.append(" ") + if self.error_count > 0: + text.append(f"E:{self.error_count}", style="bold red") + text.append(" ") + if self.warning_count > 0: + text.append(f"W:{self.warning_count}", style="yellow") + + return text + + +class ProcessLogPane(Widget): + """A pane displaying a process's log output with a status header. + + Attributes: + process_name: The name of the process this pane tracks. + """ + + DEFAULT_CSS = """ + ProcessLogPane { + layout: vertical; + } + """ + + def __init__( + self, + process_name: str, + *, + id: str | None = None, + classes: str | None = None, + ) -> None: + super().__init__(id=id, classes=classes) + self.process_name = process_name + self._errors_only = False + self._log_widget: RichLog | None = None + self._header_widget: ProcessHeader | None = None + + def compose(self) -> ComposeResult: + self._header_widget = ProcessHeader( + self.process_name, + classes="pane-header", + ) + yield self._header_widget + self._log_widget = RichLog( + highlight=False, + markup=False, + wrap=True, + auto_scroll=True, + classes="log-view", + ) + yield self._log_widget + + def write_entry(self, entry: LogEntry) -> None: + """Append a log entry to this pane.""" + if self._log_widget is None: + return + + if self._errors_only and not entry.is_notable: + return + + text = _styled_log_line(entry) + self._log_widget.write(text) + + def update_status( + self, + status: ProcessStatus, + error_count: int = 0, + warning_count: int = 0, + ) -> None: + """Update the header status indicators.""" + if self._header_widget is not None: + self._header_widget.status = status + self._header_widget.error_count = error_count + self._header_widget.warning_count = warning_count + + def toggle_errors_only(self) -> bool: + """Toggle the errors-only filter. Returns the new state.""" + self._errors_only = not self._errors_only + return self._errors_only + + def clear_log(self) -> None: + """Clear all log lines from the display.""" + if self._log_widget is not None: + self._log_widget.clear() + + @property + def errors_only(self) -> bool: + return self._errors_only + + +def _styled_log_line(entry: LogEntry, *, show_process: bool = False) -> Text: + """Create a Rich Text object with appropriate styling for a log entry.""" + style = _LEVEL_STYLE.get(entry.level, "") + text = Text() + + if show_process: + text.append(f"[{entry.process_name}] ", style="bold cyan") + + text.append(entry.message, style=style) + return text + + +class CombinedLogPane(Widget): + """A pane that shows interleaved logs from all processes.""" + + DEFAULT_CSS = """ + CombinedLogPane { + layout: vertical; + } + """ + + def __init__(self, **kwargs: object) -> None: + super().__init__(**kwargs) + self._errors_only = False + self._log_widget: RichLog | None = None + + def compose(self) -> ComposeResult: + header = Static( + " All Processes (combined)", + classes="pane-header", + ) + yield header + self._log_widget = RichLog( + highlight=False, + markup=False, + wrap=True, + auto_scroll=True, + classes="log-view", + ) + yield self._log_widget + + def write_entry(self, entry: LogEntry) -> None: + """Append a log entry with process prefix.""" + if self._log_widget is None: + return + + if self._errors_only and not entry.is_notable: + return + + text = _styled_log_line(entry, show_process=True) + self._log_widget.write(text) + + def toggle_errors_only(self) -> bool: + self._errors_only = not self._errors_only + return self._errors_only + + def clear_log(self) -> None: + if self._log_widget is not None: + self._log_widget.clear() + + @property + def errors_only(self) -> bool: + return self._errors_only diff --git a/tools/node-runner/pyproject.toml b/tools/node-runner/pyproject.toml new file mode 100644 index 000000000..9e7a6c5e8 --- /dev/null +++ b/tools/node-runner/pyproject.toml @@ -0,0 +1,65 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "node-runner" +version = "0.1.0" +description = "TUI tool for running Arbitrum Nitro + Nethermind nodes in multiple modes" +readme = "README.md" +license = {text = "LGPL-3.0"} +requires-python = ">=3.12" +authors = [ + {name = "Nethermind", email = "hello@nethermind.io"} +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Software Development :: Testing", +] +dependencies = [ + "textual>=1.0", + "pydantic>=2.0", + "rich>=13.0", + "python-dotenv>=1.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0", + "pytest-asyncio>=0.23", + "pytest-cov>=4.0", + "mypy>=1.8", + "ruff>=0.2", +] + +[project.scripts] +node-runner = "node_runner.__main__:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["node_runner*"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "-v --tb=short" +python_files = ["test_*.py"] +asyncio_mode = "auto" + +[tool.mypy] +python_version = "3.12" +strict = true +warn_return_any = true +warn_unused_configs = true + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["E", "F", "W", "I", "N", "UP", "B", "C4", "SIM"] diff --git a/tools/node-runner/tests/__init__.py b/tools/node-runner/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tools/node-runner/tests/test_config.py b/tools/node-runner/tests/test_config.py new file mode 100644 index 000000000..4e8f7853a --- /dev/null +++ b/tools/node-runner/tests/test_config.py @@ -0,0 +1,198 @@ +"""Tests for configuration models and CLI argument parsing.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from pydantic import ValidationError + +from node_runner.config import ( + DEFAULT_JWT_PATH, + NETWORKS, + LogLevel, + Mode, + Network, + PortConfig, + RunnerConfig, +) + + +class TestEnums: + def test_network_values(self) -> None: + assert Network.SEPOLIA.value == "sepolia" + assert Network.MAINNET.value == "mainnet" + + def test_mode_values(self) -> None: + assert Mode.COMPARISON.value == "comparison" + assert Mode.NITRO_NM.value == "nitro-nm" + assert Mode.NITRO_NITRO.value == "nitro-nitro" + + def test_log_level_values(self) -> None: + assert LogLevel.ERROR.value == "error" + assert LogLevel.WARN.value == "warn" + assert LogLevel.INFO.value == "info" + assert LogLevel.DEBUG.value == "debug" + assert LogLevel.TRACE.value == "trace" + + +class TestNetworkConfig: + def test_sepolia_config(self) -> None: + cfg = NETWORKS[Network.SEPOLIA] + assert cfg.chain_id == 421614 + assert cfg.parent_chain_id == 11155111 + assert cfg.nethermind_config_name == "arbitrum-sepolia" + + def test_mainnet_config(self) -> None: + cfg = NETWORKS[Network.MAINNET] + assert cfg.chain_id == 42161 + assert cfg.parent_chain_id == 1 + assert cfg.nethermind_config_name == "arbitrum-mainnet" + + def test_all_networks_have_configs(self) -> None: + for network in Network: + assert network in NETWORKS + + def test_frozen(self) -> None: + cfg = NETWORKS[Network.SEPOLIA] + with pytest.raises(AttributeError): + cfg.chain_id = 999 # type: ignore[misc] + + +class TestPortConfig: + def test_defaults(self) -> None: + ports = PortConfig() + assert ports.nitro_el_ws == 20552 + assert ports.nitro_el_http == 8547 + assert ports.nitro_el_auth == 8551 + assert ports.nitro_cl_ws == 8559 + assert ports.nitro_cl_http == 8558 + assert ports.nethermind_http == 20545 + assert ports.nethermind_engine == 20551 + + def test_frozen(self) -> None: + ports = PortConfig() + with pytest.raises(ValidationError): + ports.nitro_el_ws = 9999 # type: ignore[misc] + + def test_no_port_conflicts(self) -> None: + """All default ports must be unique to avoid bind conflicts.""" + ports = PortConfig() + values = [ + ports.nitro_el_ws, + ports.nitro_el_http, + ports.nitro_el_auth, + ports.nitro_cl_ws, + ports.nitro_cl_http, + ports.nethermind_http, + ports.nethermind_engine, + ] + assert len(values) == len(set(values)), "Port conflict detected in defaults" + + +class TestRunnerConfig: + @pytest.fixture() + def config(self, tmp_path: Path) -> RunnerConfig: + """Create a minimal valid RunnerConfig for testing.""" + nitro = tmp_path / "nitro" + nitro.mkdir() + nm = tmp_path / "nethermind" + nm.mkdir() + return RunnerConfig( + mode=Mode.COMPARISON, + network=Network.SEPOLIA, + nitro_path=nitro, + nethermind_path=nm, + data_dir=tmp_path / "data", + ) + + def test_nitro_binary_path(self, config: RunnerConfig) -> None: + assert config.nitro_binary == config.nitro_path / "target" / "bin" / "nitro" + + def test_nethermind_build_dir(self, config: RunnerConfig) -> None: + expected = ( + config.nethermind_path + / "src" / "Nethermind" / "src" / "Nethermind" + / "artifacts" / "bin" / "Nethermind.Runner" / "debug" + ) + assert config.nethermind_build_dir == expected + + def test_network_config_property(self, config: RunnerConfig) -> None: + assert config.network_config == NETWORKS[Network.SEPOLIA] + assert config.network_config.chain_id == 421614 + + def test_l1_rpc_default(self, config: RunnerConfig) -> None: + assert config.l1_rpc == NETWORKS[Network.SEPOLIA].l1_rpc + + def test_l1_rpc_override(self, tmp_path: Path) -> None: + nitro = tmp_path / "nitro" + nitro.mkdir() + nm = tmp_path / "nm" + nm.mkdir() + cfg = RunnerConfig( + mode=Mode.COMPARISON, + network=Network.SEPOLIA, + nitro_path=nitro, + nethermind_path=nm, + l1_rpc_override="wss://custom.rpc", + ) + assert cfg.l1_rpc == "wss://custom.rpc" + + def test_l1_beacon_override(self, tmp_path: Path) -> None: + nitro = tmp_path / "nitro" + nitro.mkdir() + nm = tmp_path / "nm" + nm.mkdir() + cfg = RunnerConfig( + mode=Mode.COMPARISON, + network=Network.SEPOLIA, + nitro_path=nitro, + nethermind_path=nm, + l1_beacon_override="https://custom.beacon", + ) + assert cfg.l1_beacon == "https://custom.beacon" + + def test_data_path_namespaced(self, config: RunnerConfig) -> None: + path = config.data_path("nitro-el") + assert path == config.data_dir / "sepolia" / "nitro-el" + + def test_crash_report_dir(self, config: RunnerConfig) -> None: + assert config.crash_report_dir() == config.data_dir / "crash_reports" + + def test_error_log_dir(self, config: RunnerConfig) -> None: + assert config.error_log_dir() == config.data_dir / "errors" + + @pytest.mark.parametrize( + ("mode", "needs_el", "needs_nm"), + [ + (Mode.COMPARISON, True, True), + (Mode.NITRO_NM, False, True), + (Mode.NITRO_NITRO, True, False), + ], + ) + def test_needs_process_flags( + self, + tmp_path: Path, + mode: Mode, + needs_el: bool, + needs_nm: bool, + ) -> None: + nitro = tmp_path / "nitro" + nitro.mkdir() + nm = tmp_path / "nm" + nm.mkdir() + cfg = RunnerConfig( + mode=mode, + network=Network.SEPOLIA, + nitro_path=nitro, + nethermind_path=nm, + ) + assert cfg.needs_nitro_el is needs_el + assert cfg.needs_nethermind is needs_nm + + def test_frozen(self, config: RunnerConfig) -> None: + with pytest.raises(ValidationError): + config.mode = Mode.NITRO_NM # type: ignore[misc] + + def test_default_jwt_path(self, config: RunnerConfig) -> None: + assert config.jwt_secret == DEFAULT_JWT_PATH diff --git a/tools/node-runner/tests/test_models.py b/tools/node-runner/tests/test_models.py new file mode 100644 index 000000000..a81ce35ec --- /dev/null +++ b/tools/node-runner/tests/test_models.py @@ -0,0 +1,178 @@ +"""Tests for data models: ProcessState, LogEntry, ProcessStatus.""" + +from __future__ import annotations + +from datetime import datetime + +import pytest + +from node_runner.config import ERROR_BUFFER_SIZE, LOG_BUFFER_SIZE +from node_runner.models import LogEntry, ProcessState, ProcessStatus + + +class TestLogEntry: + @pytest.fixture() + def error_entry(self) -> LogEntry: + return LogEntry( + timestamp=datetime.now(), + process_name="test", + level="ERROR", + message="something failed", + raw="ERROR something failed\n", + ) + + @pytest.fixture() + def warn_entry(self) -> LogEntry: + return LogEntry( + timestamp=datetime.now(), + process_name="test", + level="WARN", + message="something suspicious", + raw="WARN something suspicious\n", + ) + + @pytest.fixture() + def info_entry(self) -> LogEntry: + return LogEntry( + timestamp=datetime.now(), + process_name="test", + level="INFO", + message="all good", + raw="INFO all good\n", + ) + + def test_is_error(self, error_entry: LogEntry) -> None: + assert error_entry.is_error is True + assert error_entry.is_warning is False + assert error_entry.is_notable is True + + def test_is_warning(self, warn_entry: LogEntry) -> None: + assert warn_entry.is_error is False + assert warn_entry.is_warning is True + assert warn_entry.is_notable is True + + def test_info_not_notable(self, info_entry: LogEntry) -> None: + assert info_entry.is_error is False + assert info_entry.is_warning is False + assert info_entry.is_notable is False + + def test_fatal_is_error(self) -> None: + entry = LogEntry( + timestamp=datetime.now(), + process_name="test", + level="FATAL", + message="crash", + raw="FATAL crash\n", + ) + assert entry.is_error is True + + def test_crit_is_error(self) -> None: + entry = LogEntry( + timestamp=datetime.now(), + process_name="test", + level="CRIT", + message="critical", + raw="CRIT critical\n", + ) + assert entry.is_error is True + + def test_warning_level_is_warning(self) -> None: + entry = LogEntry( + timestamp=datetime.now(), + process_name="test", + level="WARNING", + message="warning", + raw="WARNING warning\n", + ) + assert entry.is_warning is True + + +class TestProcessStatus: + def test_all_values(self) -> None: + expected = { + "pending", "initializing", "starting", "healthy", + "unhealthy", "stopping", "stopped", "crashed", + } + actual = {s.value for s in ProcessStatus} + assert actual == expected + + +class TestProcessState: + def test_default_state(self) -> None: + state = ProcessState(name="test") + assert state.status == ProcessStatus.PENDING + assert state.pid is None + assert state.start_time is None + assert state.exit_code is None + assert len(state.log_buffer) == 0 + assert len(state.error_log) == 0 + + def test_is_terminal(self) -> None: + state = ProcessState(name="test") + assert state.is_terminal is False + + state.status = ProcessStatus.STOPPED + assert state.is_terminal is True + + state.status = ProcessStatus.CRASHED + assert state.is_terminal is True + + state.status = ProcessStatus.HEALTHY + assert state.is_terminal is False + + def test_log_buffer_ring_behavior(self) -> None: + state = ProcessState(name="test") + assert state.log_buffer.maxlen == LOG_BUFFER_SIZE + + # Fill beyond capacity + for i in range(LOG_BUFFER_SIZE + 100): + entry = LogEntry( + timestamp=datetime.now(), + process_name="test", + level="INFO", + message=f"line {i}", + raw=f"INFO line {i}\n", + ) + state.log_buffer.append(entry) + + assert len(state.log_buffer) == LOG_BUFFER_SIZE + # Oldest entries should be evicted + assert state.log_buffer[0].message == "line 100" + + def test_error_buffer_ring_behavior(self) -> None: + state = ProcessState(name="test") + assert state.error_log.maxlen == ERROR_BUFFER_SIZE + + for i in range(ERROR_BUFFER_SIZE + 50): + entry = LogEntry( + timestamp=datetime.now(), + process_name="test", + level="ERROR", + message=f"error {i}", + raw=f"ERROR error {i}\n", + ) + state.error_log.append(entry) + + assert len(state.error_log) == ERROR_BUFFER_SIZE + assert state.error_log[0].message == "error 50" + + def test_error_count(self) -> None: + state = ProcessState(name="test") + now = datetime.now() + + state.error_log.append(LogEntry(now, "test", "ERROR", "e1", "e1\n")) + state.error_log.append(LogEntry(now, "test", "WARN", "w1", "w1\n")) + state.error_log.append(LogEntry(now, "test", "ERROR", "e2", "e2\n")) + + assert state.error_count == 2 + assert state.warning_count == 1 + + def test_warning_count(self) -> None: + state = ProcessState(name="test") + now = datetime.now() + + state.error_log.append(LogEntry(now, "test", "WARN", "w1", "w1\n")) + state.error_log.append(LogEntry(now, "test", "WARNING", "w2", "w2\n")) + + assert state.warning_count == 2 + assert state.error_count == 0 diff --git a/tools/node-runner/tests/test_orchestrator.py b/tools/node-runner/tests/test_orchestrator.py new file mode 100644 index 000000000..f6dcd7455 --- /dev/null +++ b/tools/node-runner/tests/test_orchestrator.py @@ -0,0 +1,200 @@ +"""Tests for the Orchestrator process graph and lifecycle.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from node_runner.config import Mode, Network, RunnerConfig +from node_runner.orchestrator import Orchestrator +from node_runner.processes.nethermind import NethermindProcess +from node_runner.processes.nitro_cl import NitroCLProcess +from node_runner.processes.nitro_el import NitroELProcess + +VERIFY_FLAG = "--VerifyBlockHash.Enabled=true" + + +@pytest.fixture() +def make_config(tmp_path: Path): + """Factory for RunnerConfig with a given mode.""" + + def _make( + mode: Mode, + *, + force_init: bool = False, + network: Network = Network.SEPOLIA, + verification: int | None = None, + ) -> RunnerConfig: + nitro = tmp_path / "nitro" + nitro.mkdir(exist_ok=True) + nm = tmp_path / "nm" + nm.mkdir(exist_ok=True) + return RunnerConfig( + mode=mode, + network=network, + nitro_path=nitro, + nethermind_path=nm, + data_dir=tmp_path / "data", + force_init=force_init, + verification=verification, + ) + + return _make + + +class TestProcessGraph: + """Verify the correct process list is built per mode.""" + + def test_comparison_mode_three_processes(self, make_config) -> None: + config = make_config(Mode.COMPARISON) + orch = Orchestrator(config) + procs = orch._build_process_list() + assert len(procs) == 3 + assert isinstance(procs[0], NitroELProcess) + assert isinstance(procs[1], NethermindProcess) + assert isinstance(procs[2], NitroCLProcess) + + def test_nitro_nm_mode_two_processes(self, make_config) -> None: + config = make_config(Mode.NITRO_NM) + orch = Orchestrator(config) + procs = orch._build_process_list() + assert len(procs) == 2 + assert isinstance(procs[0], NethermindProcess) + assert isinstance(procs[1], NitroCLProcess) + + def test_nitro_nm_verification_same_process_count(self, make_config) -> None: + """Verification doesn't change the process graph (it's a Nethermind-side flag).""" + config = make_config(Mode.NITRO_NM, verification=0) + orch = Orchestrator(config) + procs = orch._build_process_list() + assert len(procs) == 2 + assert isinstance(procs[0], NethermindProcess) + assert isinstance(procs[1], NitroCLProcess) + + def test_nitro_nitro_mode_two_processes(self, make_config) -> None: + config = make_config(Mode.NITRO_NITRO) + orch = Orchestrator(config) + procs = orch._build_process_list() + assert len(procs) == 2 + assert isinstance(procs[0], NitroELProcess) + assert isinstance(procs[1], NitroCLProcess) + + def test_comparison_startup_order(self, make_config) -> None: + """ELs must start before CL (dependency chain).""" + config = make_config(Mode.COMPARISON) + orch = Orchestrator(config) + procs = orch._build_process_list() + names = [p.name for p in procs] + # CL must be last + assert names[-1] == "nitro-cl" + # Both ELs before CL + assert "nitro-el" in names[:2] + assert "nethermind" in names[:2] + + def test_process_names_are_unique(self, make_config) -> None: + for mode in Mode: + config = make_config(mode) + orch = Orchestrator(config) + procs = orch._build_process_list() + names = [p.name for p in procs] + assert len(names) == len(set(names)), f"Duplicate names in {mode}" + + +class TestNeedsInit: + def test_not_needed_for_nitro_nm(self, make_config) -> None: + config = make_config(Mode.NITRO_NM, network=Network.MAINNET) + orch = Orchestrator(config) + assert not orch._needs_init() + + def test_needed_for_sepolia_when_data_missing(self, make_config) -> None: + """Sepolia also needs init when data dir doesn't exist.""" + config = make_config(Mode.COMPARISON, network=Network.SEPOLIA) + orch = Orchestrator(config) + assert orch._needs_init() + + def test_needed_when_force_init(self, make_config) -> None: + config = make_config(Mode.COMPARISON, force_init=True, network=Network.MAINNET) + orch = Orchestrator(config) + assert orch._needs_init() + + def test_needed_when_data_dir_missing(self, make_config) -> None: + config = make_config(Mode.COMPARISON, network=Network.MAINNET) + orch = Orchestrator(config) + # data_path("nitro-el") doesn't exist → needs init + assert orch._needs_init() + + def test_needed_when_data_dir_empty(self, make_config) -> None: + config = make_config(Mode.COMPARISON, network=Network.MAINNET) + # Create empty data dir + data_path = config.data_path("nitro-el") + data_path.mkdir(parents=True) + orch = Orchestrator(config) + assert orch._needs_init() + + def test_not_needed_when_data_exists(self, make_config) -> None: + config = make_config(Mode.COMPARISON, network=Network.MAINNET) + data_path = config.data_path("nitro-el") + data_path.mkdir(parents=True) + # Create a file to indicate init was done + (data_path / "CURRENT").touch() + orch = Orchestrator(config) + assert not orch._needs_init() + + +class TestShutdown: + def test_request_shutdown_sets_event(self, make_config) -> None: + config = make_config(Mode.COMPARISON) + orch = Orchestrator(config) + assert not orch.is_shutdown_requested + orch.request_shutdown() + assert orch.is_shutdown_requested + + def test_processes_empty_before_start(self, make_config) -> None: + config = make_config(Mode.COMPARISON) + orch = Orchestrator(config) + assert orch.processes == [] + + def test_summary_empty_before_start(self, make_config) -> None: + config = make_config(Mode.COMPARISON) + orch = Orchestrator(config) + assert orch.summary() == {} + + +class TestVerifyBlockHash: + """Verification adds --VerifyBlockHash.Enabled=true to Nethermind, nothing else.""" + + def test_nethermind_has_flag_when_enabled(self, make_config) -> None: + config = make_config(Mode.NITRO_NM, verification=0) + proc = NethermindProcess(config) + cmd = proc.build_command() + assert VERIFY_FLAG in cmd + + def test_nethermind_no_flag_by_default(self, make_config) -> None: + config = make_config(Mode.NITRO_NM) + proc = NethermindProcess(config) + cmd = proc.build_command() + assert VERIFY_FLAG not in cmd + + def test_every_n_blocks_passed_when_set(self, make_config) -> None: + config = make_config(Mode.NITRO_NM, verification=500) + proc = NethermindProcess(config) + cmd = proc.build_command() + assert VERIFY_FLAG in cmd + assert "--VerifyBlockHash.VerifyEveryNBlocks=500" in cmd + + def test_every_n_blocks_omitted_when_zero(self, make_config) -> None: + """Zero means use Nethermind's default — don't pass the interval flag.""" + config = make_config(Mode.NITRO_NM, verification=0) + proc = NethermindProcess(config) + cmd = proc.build_command() + assert not any(a.startswith("--VerifyBlockHash.VerifyEveryNBlocks") for a in cmd) + + def test_cl_unchanged_with_verification(self, make_config) -> None: + """CL command must be identical with and without verification.""" + base = make_config(Mode.NITRO_NM) + with_verify = make_config(Mode.NITRO_NM, verification=0) + + base_cmd = NitroCLProcess(base, Mode.NITRO_NM).build_command() + verify_cmd = NitroCLProcess(with_verify, Mode.NITRO_NM).build_command() + assert base_cmd == verify_cmd