Skip to content

Commit ee52494

Browse files
committed
fix: safeguard against network mismatch in GC actor-not-found handling
Lex raised a valid concern: 'actor not found' can also happen when a node is built for the wrong network or missed a network upgrade. In that case, the miner is healthy but the node can't see it. Added a cross-check: before treating an 'actor not found' miner as deleted, verify it doesn't appear in any harmony_config layer. If it does, the error is likely a misconfiguration — fail the task loudly instead of marking sectors for GC. The orphaned-sector GC path now only triggers when: 1. StateGetActor returns 'actor not found', AND 2. The miner address is NOT in any config layer This prevents accidental GC of sectors for healthy miners that appear missing due to wrong-network or upgrade issues.
1 parent f0690de commit ee52494

File tree

1 file changed

+72
-7
lines changed

1 file changed

+72
-7
lines changed

tasks/gc/storage_gc_mark.go

Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ func NewStorageGCMark(si paths.SectorIndex, remote *paths.Remote, db *harmonydb.
6161
func (s *StorageGCMark) Do(taskID harmonytask.TaskID, stillOwned func() bool) (done bool, err error) {
6262
ctx := context.Background()
6363

64+
// Load configured miners from all harmony_config layers. Used to guard
65+
// against marking sectors for miners that appear "not found" due to
66+
// misconfiguration (wrong network build, missed upgrade) rather than
67+
// actual deletion.
68+
cfgMiners, err := configuredMiners(ctx, s.db)
69+
if err != nil {
70+
log.Warnw("failed to load configured miners for GC safety check, proceeding without", "error", err)
71+
cfgMiners = make(map[address.Address]bool)
72+
}
73+
6474
/*
6575
CREATE TABLE storage_removal_marks (
6676
sp_id BIGINT NOT NULL,
@@ -124,10 +134,15 @@ func (s *StorageGCMark) Do(taskID harmonytask.TaskID, stillOwned func() bool) (d
124134
mact, err := s.api.StateGetActor(ctx, maddr, types.EmptyTSK)
125135
if err != nil {
126136
if isActorNotFoundErr(err) {
127-
// Miner actor no longer exists on-chain. Sector files for this
128-
// miner are orphaned — don't load state so that all its sectors
129-
// remain in toRemove (no precommit/live/unproven subtraction).
130-
log.Warnw("miner actor not found on-chain, treating sectors as orphaned for GC", "miner", maddr)
137+
if cfgMiners[maddr] {
138+
// Miner is in config but not found on-chain — likely wrong
139+
// network build or missed upgrade. Do NOT mark for GC.
140+
return false, xerrors.Errorf("miner %s is configured but not found on-chain — possible network mismatch, refusing to GC", maddr)
141+
}
142+
// Miner actor no longer exists on-chain and is not in any config
143+
// layer. Sector files are truly orphaned — don't load state so
144+
// that all its sectors remain in toRemove.
145+
log.Warnw("miner actor not found on-chain and not in config, treating sectors as orphaned for GC", "miner", maddr)
131146
toRemove[decl.Miner].Set(uint64(decl.Number))
132147
continue
133148
}
@@ -393,9 +408,10 @@ func (s *StorageGCMark) Do(taskID harmonytask.TaskID, stillOwned func() bool) (d
393408
mact, err := s.api.StateGetActor(ctx, maddr, finalityTipset.Key())
394409
if err != nil {
395410
if isActorNotFoundErr(err) {
396-
// Miner actor no longer exists on-chain at finality height.
397-
// Skip — snap sector key cleanup is irrelevant for a deleted miner.
398-
log.Warnw("miner actor not found at finality height, skipping snap-key GC", "miner", maddr)
411+
if cfgMiners[maddr] {
412+
return false, xerrors.Errorf("miner %s is configured but not found on-chain at finality — possible network mismatch, refusing to GC", maddr)
413+
}
414+
log.Warnw("miner actor not found at finality height and not in config, skipping snap-key GC", "miner", maddr)
399415
continue
400416
}
401417
return false, xerrors.Errorf("get miner actor %s at finality: %w", maddr, err)
@@ -528,3 +544,52 @@ func isActorNotFoundErr(err error) bool {
528544
}
529545
return strings.Contains(err.Error(), "actor not found")
530546
}
547+
548+
// configuredMiners returns the set of miner addresses referenced in any
549+
// harmony_config layer. This is used as a safety check: if a miner appears
550+
// "not found" on-chain but is still in config, it's likely a misconfiguration
551+
// (wrong network build, missed upgrade) rather than a truly deleted miner.
552+
func configuredMiners(ctx context.Context, db *harmonydb.DB) (map[address.Address]bool, error) {
553+
var configs []struct {
554+
Config string `db:"config"`
555+
}
556+
err := db.Select(ctx, &configs, `SELECT config FROM harmony_config WHERE LENGTH(config) > 0`)
557+
if err != nil {
558+
return nil, xerrors.Errorf("querying harmony_config: %w", err)
559+
}
560+
561+
result := make(map[address.Address]bool)
562+
for _, c := range configs {
563+
// MinerAddresses appear in TOML as:
564+
// MinerAddresses = ["f01234", "f05678"]
565+
// Simple string scan is sufficient — we just need to detect presence.
566+
for _, line := range strings.Split(c.Config, "\n") {
567+
line = strings.TrimSpace(line)
568+
if !strings.HasPrefix(line, "MinerAddresses") {
569+
continue
570+
}
571+
// Extract addresses from the TOML array value
572+
// e.g. MinerAddresses = ["f01234", "f05678"]
573+
idx := strings.Index(line, "[")
574+
if idx < 0 {
575+
continue
576+
}
577+
arrStr := line[idx:]
578+
arrStr = strings.Trim(arrStr, "[]")
579+
for _, part := range strings.Split(arrStr, ",") {
580+
part = strings.TrimSpace(part)
581+
part = strings.Trim(part, `"' `)
582+
if part == "" {
583+
continue
584+
}
585+
addr, err := address.NewFromString(part)
586+
if err != nil {
587+
continue
588+
}
589+
result[addr] = true
590+
}
591+
}
592+
}
593+
594+
return result, nil
595+
}

0 commit comments

Comments
 (0)