Skip to content

Commit 016a70d

Browse files
committed
fix(gc): skip deleted miner actors in StorageGCMark instead of failing
When a miner actor no longer exists on-chain (e.g. removed from config after deletion), StorageGCMark would fail with 'actor not found' and enter a permanent retry loop, blocking all storage GC. Handle the actor-not-found case gracefully in both Stage 1 (sector liveness check) and Stage 3 (snap sector-key cleanup): - Stage 1: Skip loading miner state for deleted actors. Their sectors remain in the toRemove set since there are no on-chain precommits, live, or unproven sectors to subtract. - Stage 3: Skip finality-tipset actor lookups for deleted miners. Snap sector-key cleanup is irrelevant for non-existent miners. Only the specific 'actor not found' error triggers this path. Transient RPC errors (timeouts, connection issues) still fail the task as before, preventing accidental GC of sectors for healthy miners during network disruptions. Fixes a scenario where removing a calibration/test miner from config causes StorageGCMark to fail 100% of runs indefinitely.
1 parent a3473a4 commit 016a70d

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

tasks/gc/storage_gc_mark.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package gc
33
import (
44
"context"
55
"fmt"
6+
"strings"
67
"time"
78

89
cbor "github.com/ipfs/go-ipld-cbor"
@@ -122,6 +123,13 @@ func (s *StorageGCMark) Do(taskID harmonytask.TaskID, stillOwned func() bool) (d
122123

123124
mact, err := s.api.StateGetActor(ctx, maddr, types.EmptyTSK)
124125
if err != nil {
126+
if isActorNotFoundErr(err) {
127+
// Miner actor no longer exists on-chain. Sector files for this
128+
// miner are orphaned — skip loading state so that all its sectors
129+
// remain in toRemove (no precommit/live/unproven subtraction).
130+
log.Warnw("miner actor not found on-chain, treating sectors as orphaned for GC", "miner", maddr)
131+
continue
132+
}
125133
return false, xerrors.Errorf("get miner actor %s: %w", maddr, err)
126134
}
127135

@@ -383,6 +391,12 @@ func (s *StorageGCMark) Do(taskID harmonytask.TaskID, stillOwned func() bool) (d
383391

384392
mact, err := s.api.StateGetActor(ctx, maddr, finalityTipset.Key())
385393
if err != nil {
394+
if isActorNotFoundErr(err) {
395+
// Miner actor no longer exists on-chain at finality height.
396+
// Skip — snap sector key cleanup is irrelevant for a deleted miner.
397+
log.Warnw("miner actor not found at finality height, skipping snap-key GC", "miner", maddr)
398+
continue
399+
}
386400
return false, xerrors.Errorf("get miner actor %s at finality: %w", maddr, err)
387401
}
388402

@@ -501,3 +515,15 @@ func (s *StorageGCMark) Adder(taskFunc harmonytask.AddTaskFunc) {
501515

502516
var _ harmonytask.TaskInterface = &StorageGCMark{}
503517
var _ = harmonytask.Reg(&StorageGCMark{})
518+
519+
// isActorNotFoundErr checks whether the error indicates that a miner actor
520+
// does not exist on-chain. Because Curio talks to Lotus via JSON-RPC, the
521+
// typed types.ErrActorNotFound / api.ErrActorNotFound may not survive the
522+
// round-trip. Fall back to a string check consistent with existing callers
523+
// in sptool (see cmd/sptool/toolbox_deal_client.go).
524+
func isActorNotFoundErr(err error) bool {
525+
if err == nil {
526+
return false
527+
}
528+
return strings.Contains(err.Error(), "actor not found")
529+
}

0 commit comments

Comments
 (0)