diff --git a/op-wheel/cmd/main.go b/op-wheel/cmd/main.go index 9da15270eda..3c96426f83e 100644 --- a/op-wheel/cmd/main.go +++ b/op-wheel/cmd/main.go @@ -38,6 +38,7 @@ func main() { app.ErrWriter = os.Stderr app.Commands = []*cli.Command{ wheel.CheatCmd, + wheel.CheatRethCmd, wheel.EngineCmd, } diff --git a/op-wheel/commands.go b/op-wheel/commands.go index ee6ba274fd6..9b66edabab2 100644 --- a/op-wheel/commands.go +++ b/op-wheel/commands.go @@ -672,6 +672,30 @@ var ( }), } + EngineRewindRethCmd = &cli.Command{ + Name: "rewind-reth", + Description: "Rewind a reth node offline by running 'reth stage unwind'. The reth node must be stopped.", + Flags: rethFlags( + &cli.Uint64Flag{ + Name: "to", + Usage: "Block number to rewind chain to", + Required: true, + EnvVars: prefixEnvVars("REWIND_TO"), + }, + ), + Action: func(ctx *cli.Context) error { + lgr := initLogger(ctx) + return engine.RethRewind( + ctx.Context, + lgr, + ctx.String("reth-binary"), + ctx.String("reth-datadir"), + ctx.String("reth-chain"), + ctx.Uint64("to"), + ) + }, + } + EngineJSONCmd = &cli.Command{ Name: "json", Description: "read json values from remaining args, or STDIN, and use them as RPC params to call the engine RPC method (first arg)", @@ -700,6 +724,95 @@ var ( } ) +func rethFlags(flags ...cli.Flag) []cli.Flag { + return append(append(flags, + &cli.StringFlag{ + Name: "reth-binary", + Usage: "Path to the reth binary", + Required: true, + EnvVars: prefixEnvVars("RETH_BINARY"), + }, + &cli.StringFlag{ + Name: "reth-datadir", + Usage: "Reth data directory path", + Required: true, + EnvVars: prefixEnvVars("RETH_DATADIR"), + }, + &cli.StringFlag{ + Name: "reth-chain", + Usage: "Chain spec name or path (e.g., 'optimism', 'dev', or path to genesis file)", + Required: true, + EnvVars: prefixEnvVars("RETH_CHAIN"), + }, + ), oplog.CLIFlags(envVarPrefix)...) +} + +var ( + CheatRethStateCmd = &cli.Command{ + Name: "state", + Description: "Read account state (balance, nonce, code, storage) from a reth database offline.", + Flags: rethFlags( + &cli.StringFlag{ + Name: "address", + Usage: "Account address to inspect", + Required: true, + EnvVars: prefixEnvVars("ADDRESS"), + }, + &cli.StringFlag{ + Name: "block", + Usage: "Block number to query state at (uses latest if not provided)", + EnvVars: prefixEnvVars("BLOCK"), + }, + &cli.Uint64Flag{ + Name: "limit", + Usage: "Maximum number of storage slots to display", + Value: 100, + EnvVars: prefixEnvVars("LIMIT"), + }, + ), + Action: func(ctx *cli.Context) error { + lgr := initLogger(ctx) + return engine.RethState( + ctx.Context, + lgr, + ctx.String("reth-binary"), + ctx.String("reth-datadir"), + ctx.String("reth-chain"), + ctx.String("address"), + ctx.String("block"), + ctx.Uint64("limit"), + ) + }, + } + + CheatRethHeadCmd = &cli.Command{ + Name: "head", + Description: "Show the current head block via stage checkpoints from a reth database offline.", + Flags: rethFlags(), + Action: func(ctx *cli.Context) error { + lgr := initLogger(ctx) + return engine.RethHead( + ctx.Context, + lgr, + ctx.String("reth-binary"), + ctx.String("reth-datadir"), + ctx.String("reth-chain"), + ) + }, + } +) + +var CheatRethCmd = &cli.Command{ + Name: "cheat-reth", + Usage: "Read-only inspection commands for a reth database (offline).", + Description: "Each sub-command invokes reth CLI tools against the database. " + + "The reth node must be stopped before running these commands.", + Subcommands: []*cli.Command{ + CheatRethStateCmd, + CheatRethHeadCmd, + }, +} + var CheatCmd = &cli.Command{ Name: "cheat", Usage: "Cheating commands to modify a Geth database.", @@ -728,6 +841,7 @@ var EngineCmd = &cli.Command{ EngineSetForkchoiceCmd, EngineSetForkchoiceHashCmd, EngineRewindCmd, + EngineRewindRethCmd, EngineJSONCmd, }, } diff --git a/op-wheel/engine/reth.go b/op-wheel/engine/reth.go new file mode 100644 index 00000000000..73915cf04a6 --- /dev/null +++ b/op-wheel/engine/reth.go @@ -0,0 +1,103 @@ +package engine + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "strconv" + + "github.com/ethereum/go-ethereum/log" +) + +// RethRewind performs an offline rewind of a reth node by executing +// `reth stage unwind to-block ` as a subprocess. +// The reth node must be stopped before calling this function. +func RethRewind(ctx context.Context, lgr log.Logger, rethBinary string, datadir string, chain string, toBlock uint64) error { + cmd, err := buildRethUnwindCmd(ctx, rethBinary, datadir, chain, toBlock) + if err != nil { + return err + } + return runRethCmd(ctx, lgr, cmd, "reth stage unwind") +} + +// RethState runs `reth db state
` to inspect account state offline. +func RethState(ctx context.Context, lgr log.Logger, rethBinary string, datadir string, chain string, address string, block string, limit uint64) error { + cmd, err := buildRethDBCmd(ctx, rethBinary, datadir, chain, "state", address, "--format", "json", "--limit", strconv.FormatUint(limit, 10)) + if err != nil { + return err + } + if block != "" { + cmd.Args = append(cmd.Args, "--block", block) + } + return runRethCmd(ctx, lgr, cmd, "reth db state") +} + +// RethHead runs `reth db stage-checkpoints get` to show the current head (stage checkpoints). +func RethHead(ctx context.Context, lgr log.Logger, rethBinary string, datadir string, chain string) error { + cmd, err := buildRethDBCmd(ctx, rethBinary, datadir, chain, "stage-checkpoints", "get") + if err != nil { + return err + } + return runRethCmd(ctx, lgr, cmd, "reth db stage-checkpoints") +} + +// runRethCmd executes a reth command, streaming output and handling exit codes. +func runRethCmd(_ context.Context, lgr log.Logger, cmd *exec.Cmd, label string) error { + lgr.Info("Executing "+label, "binary", cmd.Path, "args", cmd.Args[1:]) + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return fmt.Errorf("%s failed with exit code %d: %w", label, exitErr.ExitCode(), err) + } + return fmt.Errorf("failed to execute reth: %w", err) + } + + lgr.Info(label + " completed successfully") + return nil +} + +// resolveRethBinary validates the reth binary exists and returns its resolved path. +func resolveRethBinary(rethBinary string) (string, error) { + resolvedPath, err := exec.LookPath(rethBinary) + if err != nil { + return "", fmt.Errorf("reth binary not found at %q: %w", rethBinary, err) + } + return resolvedPath, nil +} + +// buildRethUnwindCmd constructs the exec.Cmd for `reth stage unwind to-block `. +func buildRethUnwindCmd(ctx context.Context, rethBinary string, datadir string, chain string, toBlock uint64) (*exec.Cmd, error) { + resolvedPath, err := resolveRethBinary(rethBinary) + if err != nil { + return nil, err + } + + args := []string{ + "stage", "unwind", + "--datadir", datadir, + "--chain", chain, + "to-block", strconv.FormatUint(toBlock, 10), + } + + return exec.CommandContext(ctx, resolvedPath, args...), nil +} + +// buildRethDBCmd constructs an exec.Cmd for `reth db --datadir --chain [args...]`. +func buildRethDBCmd(ctx context.Context, rethBinary string, datadir string, chain string, subArgs ...string) (*exec.Cmd, error) { + resolvedPath, err := resolveRethBinary(rethBinary) + if err != nil { + return nil, err + } + + // reth db --datadir --chain [args...] + args := []string{"db", "--datadir", datadir, "--chain", chain} + args = append(args, subArgs...) + + return exec.CommandContext(ctx, resolvedPath, args...), nil +} diff --git a/op-wheel/engine/reth_test.go b/op-wheel/engine/reth_test.go new file mode 100644 index 00000000000..f21010afac9 --- /dev/null +++ b/op-wheel/engine/reth_test.go @@ -0,0 +1,132 @@ +package engine + +import ( + "context" + "fmt" + "os" + "os/exec" + "strconv" + "testing" + + "github.com/ethereum/go-ethereum/log" +) + +func TestBuildRethUnwindCmd_Args(t *testing.T) { + self, err := os.Executable() + if err != nil { + t.Fatal(err) + } + + cmd, err := buildRethUnwindCmd(context.Background(), self, "/data/reth", "op-mainnet", 12345678) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + wantArgs := []string{ + self, + "stage", "unwind", + "--datadir", "/data/reth", + "--chain", "op-mainnet", + "to-block", "12345678", + } + assertArgs(t, cmd.Args, wantArgs) +} + +func TestBuildRethUnwindCmd_BinaryNotFound(t *testing.T) { + _, err := buildRethUnwindCmd(context.Background(), "/nonexistent/reth", "/data", "optimism", 100) + if err == nil { + t.Fatal("expected error for nonexistent binary, got nil") + } +} + +func TestBuildRethDBCmd_State(t *testing.T) { + self, err := os.Executable() + if err != nil { + t.Fatal(err) + } + + cmd, err := buildRethDBCmd(context.Background(), self, "/db", "optimism", + "state", "0xdead", "--format", "json", "--limit", "100") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + wantArgs := []string{ + self, + "db", "--datadir", "/db", "--chain", "optimism", + "state", "0xdead", "--format", "json", "--limit", "100", + } + assertArgs(t, cmd.Args, wantArgs) +} + +func TestBuildRethDBCmd_StageCheckpoints(t *testing.T) { + self, err := os.Executable() + if err != nil { + t.Fatal(err) + } + + cmd, err := buildRethDBCmd(context.Background(), self, "/db", "dev", + "stage-checkpoints", "get") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + wantArgs := []string{ + self, + "db", "--datadir", "/db", "--chain", "dev", + "stage-checkpoints", "get", + } + assertArgs(t, cmd.Args, wantArgs) +} + +func TestRethRewind_SubprocessExit(t *testing.T) { + if os.Getenv("GO_TEST_HELPER_PROCESS") == "1" { + code, _ := strconv.Atoi(os.Getenv("GO_TEST_EXIT_CODE")) + os.Exit(code) + } + + self, err := os.Executable() + if err != nil { + t.Fatal(err) + } + lgr := log.NewLogger(log.DiscardHandler()) + + t.Run("success", func(t *testing.T) { + err := rethCmdWithHelper(context.Background(), lgr, self, 0, "test") + if err != nil { + t.Fatalf("expected success, got: %v", err) + } + }) + + t.Run("failure", func(t *testing.T) { + err := rethCmdWithHelper(context.Background(), lgr, self, 1, "test") + if err == nil { + t.Fatal("expected error for exit code 1, got nil") + } + }) +} + +// rethCmdWithHelper runs a subprocess using the test binary as a fake reth, +// configured to exit with the given code. +func rethCmdWithHelper(ctx context.Context, lgr log.Logger, testBinary string, exitCode int, label string) error { + cmd := exec.CommandContext(ctx, testBinary, + "-test.run=TestRethRewind_SubprocessExit", + ) + cmd.Env = append(os.Environ(), + "GO_TEST_HELPER_PROCESS=1", + fmt.Sprintf("GO_TEST_EXIT_CODE=%d", exitCode), + ) + return runRethCmd(ctx, lgr, cmd, label) +} + +func assertArgs(t *testing.T, got, want []string) { + t.Helper() + if len(got) != len(want) { + t.Fatalf("args length mismatch: got %d, want %d\ngot: %v\nwant: %v", len(got), len(want), got, want) + } + for i, w := range want { + if got[i] != w { + t.Errorf("arg[%d] = %q, want %q", i, got[i], w) + } + } +} diff --git a/op-wheel/examples/e2e-test-rewind-reth.md b/op-wheel/examples/e2e-test-rewind-reth.md new file mode 100644 index 00000000000..162a4b4d13b --- /dev/null +++ b/op-wheel/examples/e2e-test-rewind-reth.md @@ -0,0 +1,162 @@ +# E2E Testing: op-wheel rewind-reth + +End-to-end test to verify `op-wheel engine rewind-reth` properly rewinds a reth node. + +## Prerequisites + +- Rust toolchain (for building reth) +- Go toolchain (for building op-wheel) +- `curl` and `jq` + +## 1. Build reth + +```bash +cd /path/to/reth +cargo build -p reth +# Binary at: target/debug/reth +``` + +## 2. Build op-wheel + +```bash +cd /path/to/optimism +go build -o op-wheel ./op-wheel/cmd +# Binary at: ./op-wheel +``` + +## 3. Start reth in dev mode + +```bash +DATADIR=$(mktemp -d) +echo "Using datadir: $DATADIR" + +reth node --dev \ + --dev.block-time 1s \ + --datadir "$DATADIR" \ + --http \ + --http.api all & + +RETH_PID=$! +echo "reth PID: $RETH_PID" +``` + +Wait for RPC to be ready: + +```bash +until curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' | jq -r '.result' > /dev/null 2>&1; do + sleep 1 +done +echo "RPC ready" +``` + +## 4. Wait for blocks to be produced + +Wait until the chain has enough blocks (e.g., at least 20): + +```bash +while true; do + HEX=$(curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' | jq -r '.result') + BLOCK=$((HEX)) + echo "Current block: $BLOCK" + if [ "$BLOCK" -ge 20 ]; then + break + fi + sleep 2 +done +``` + +Record the head before rewinding: + +```bash +HEAD_BEFORE=$BLOCK +REWIND_TO=10 +echo "Head before rewind: $HEAD_BEFORE" +echo "Will rewind to: $REWIND_TO" +``` + +## 5. Stop reth + +```bash +kill $RETH_PID +wait $RETH_PID 2>/dev/null +echo "reth stopped" +``` + +## 6. Run op-wheel rewind-reth + +```bash +./op-wheel engine rewind-reth \ + --to $REWIND_TO \ + --reth-binary $(which reth || echo ./target/debug/reth) \ + --reth-datadir "$DATADIR" \ + --reth-chain dev + +echo "Exit code: $?" +``` + +Expected output: +- Log line: `Executing reth stage unwind ...` with args `[stage unwind --datadir ... --chain dev to-block 10]` +- Log line: `Successfully rewound reth to block ...` +- Exit code: `0` + +## 7. Restart reth and verify + +```bash +reth node --dev \ + --dev.block-time 1s \ + --datadir "$DATADIR" \ + --http \ + --http.api all & + +RETH_PID=$! + +# Wait for RPC +until curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' | jq -r '.result' > /dev/null 2>&1; do + sleep 1 +done + +# Check head +HEX=$(curl -s -X POST http://localhost:8545 \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' | jq -r '.result') +HEAD_AFTER=$((HEX)) +echo "Head after rewind: $HEAD_AFTER" +``` + +Verify: +- `HEAD_AFTER` should be `$REWIND_TO` (10) immediately on startup, before new blocks are mined. + Note: dev mode resumes mining, so the head will increase quickly — check immediately. + +## 8. Cleanup + +```bash +kill $RETH_PID 2>/dev/null +rm -rf "$DATADIR" +``` + +## What success looks like + +``` +Head before rewind: 25 +Will rewind to: 10 +reth stopped +Executing reth stage unwind ... +Successfully rewound reth to block 10 +Exit code: 0 +Head after rewind: 10 +``` + +## Troubleshooting + +| Issue | Cause | Fix | +|---|---|---| +| `reth binary not found` | Wrong `--reth-binary` path | Use absolute path to reth binary | +| `reth stage unwind failed with exit code 1` | Database locked or corrupted | Make sure reth is fully stopped before rewinding | +| Head after rewind is higher than expected | Dev mode resumed mining before you checked | Query `eth_blockNumber` immediately, or start without `--dev.block-time` | +| `No such file or directory` for datadir | Wrong `--reth-datadir` | Check the path printed in step 3 | diff --git a/op-wheel/examples/k8s-reth-rewind-job.yaml b/op-wheel/examples/k8s-reth-rewind-job.yaml new file mode 100644 index 00000000000..d28d81a241b --- /dev/null +++ b/op-wheel/examples/k8s-reth-rewind-job.yaml @@ -0,0 +1,96 @@ +# K8s Job to rewind a reth node using op-wheel. +# +# Based on the op-reth Helm chart (optimism-charts/charts/node/charts/op-reth) +# and the prod-mainnet deployment (k8s/opc/oplabs-prod-mainnet/op-opn-reth-f-snapsync-0). +# +# Chart defaults used: +# - StatefulSet name: "op-reth" (fullnameOverride in values.yaml) +# - Container name: "op-reth" (.Chart.Name) +# - Volume name: "datadir" (volumeClaimTemplates name for NVMe) +# - PVC name: "datadir-op-reth-0" (template: {vct-name}-{sts-name}-{ordinal}) +# - Mount path: "/db" +# - Binary path: "/usr/local/bin/op-reth" +# - Chain: "optimism" (RETH_CHAIN env var) +# +# Usage: +# 1. Scale down the reth StatefulSet: +# kubectl scale statefulset op-reth -n op-opn-reth-f-snapsync-0 --replicas=0 +# 2. Wait for the pod to terminate: +# kubectl wait --for=delete pod/op-reth-0 -n op-opn-reth-f-snapsync-0 --timeout=300s +# 3. Apply this Job (set the target block number first): +# kubectl apply -f k8s-reth-rewind-job.yaml +# 4. Wait for completion: +# kubectl wait --for=condition=complete job/reth-rewind -n op-opn-reth-f-snapsync-0 --timeout=3600s +# 5. Scale the StatefulSet back up: +# kubectl scale statefulset op-reth -n op-opn-reth-f-snapsync-0 --replicas=1 + +apiVersion: batch/v1 +kind: Job +metadata: + name: reth-rewind + namespace: op-opn-reth-f-snapsync-0 +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 86400 + template: + spec: + restartPolicy: Never + terminationGracePeriodSeconds: 600 + + # Must schedule on the same node as the reth pod. + # NVMe PVCs use local-storage, so the PV is bound to a specific node. + nodeSelector: + datadir: op-opn-reth-f-snapsync-0 + tolerations: + - key: "purpose" + operator: "Equal" + value: "nvme" + effect: "NoSchedule" + + # Copy the reth binary from the reth image into a shared volume + # so the op-wheel container can invoke it. + initContainers: + - name: copy-reth + image: us-docker.pkg.dev/oplabs-tools-artifacts/images/op-reth:v1.11.5-rc.1 + command: ["cp", "/usr/local/bin/op-reth", "/shared/op-reth"] + volumeMounts: + - name: shared-bin + mountPath: /shared + + containers: + - name: rewind + image: us-docker.pkg.dev/oplabs-tools-artifacts/images/op-wheel:latest + command: ["op-wheel"] + args: + - engine + - rewind-reth + - --to + - "12345678" # <-- Set target block number + - --reth-binary + - /shared/op-reth # Copied from init container + - --reth-datadir + - /db # Matches RETH_DATADIR from chart defaults + - --reth-chain + - optimism # Matches RETH_CHAIN from prod-mainnet config + resources: + requests: + cpu: "2" + memory: 4Gi + limits: + cpu: "4" + memory: 8Gi + volumeMounts: + - name: datadir + mountPath: /db + - name: shared-bin + mountPath: /shared + + volumes: + # The PVC created by the op-reth StatefulSet's volumeClaimTemplates. + # Name format: {vct-name}-{statefulset-name}-{ordinal} + # vct-name = "datadir", statefulset-name = "op-reth", ordinal = 0 + - name: datadir + persistentVolumeClaim: + claimName: datadir-op-reth-0 + - name: shared-bin + emptyDir: {}