fix: detected failed next proving periods and adjust

frrist · frrist · commit 2c158124f62e · 2025-12-11T11:38:34.000-08:00
- Add GetChallengeFinality to the smart-contract verifier interface and implementation.
  This exposes the on-chain challengeFinality setting.

- Updated NextProvingPeriodTask to pull the stored challenge_window for each
  proof set, fetch the current tipset height and challenge finality, and detect when
  NextPDPChallengeWindowStart returns an epoch that violates challengeEpoch &gt;= block.number + challengeFinality.
  In that case we now call adjustNextProveAt to compute a future epoch aligned with the next challenge
  window and log the adjustment. The corrected epoch is sent to nextProvingPeriod and persisted so the
  scheduler heals itself the next time it runs.
diff --git a/pkg/pdp/smartcontracts/verifier.go b/pkg/pdp/smartcontracts/verifier.go
@@ -20,6 +20,7 @@ var log = logging.Logger("smartcontracts")
 type Verifier interface {
 	GetDataSetLeafCount(ctx context.Context, setId *big.Int) (*big.Int, error)
 	GetNextChallengeEpoch(ctx context.Context, setId *big.Int) (*big.Int, error)
+	GetChallengeFinality(ctx context.Context) (*big.Int, error)
 	GetDataSetListener(ctx context.Context, setId *big.Int) (common.Address, error)
 	GetDataSetStorageProvider(ctx context.Context, setId *big.Int) (common.Address, common.Address, error)
 	GetChallengeRange(ctx context.Context, setId *big.Int) (*big.Int, error)
@@ -72,6 +73,10 @@ func (v *verifierContract) GetNextChallengeEpoch(ctx context.Context, setId *big
 	return v.verifier.GetNextChallengeEpoch(&bind.CallOpts{Context: ctx}, setId)
 }
 
+func (v *verifierContract) GetChallengeFinality(ctx context.Context) (*big.Int, error) {
+	return v.verifier.GetChallengeFinality(&bind.CallOpts{Context: ctx})
+}
+
 func (v *verifierContract) GetDataSetListener(ctx context.Context, setId *big.Int) (common.Address, error) {
 	return v.verifier.GetDataSetListener(&bind.CallOpts{Context: ctx}, setId)
 }
diff --git a/pkg/pdp/tasks/next_pdp.go b/pkg/pdp/tasks/next_pdp.go
@@ -2,7 +2,6 @@ package tasks
 
 import (
 	"context"
-	"database/sql"
 	"errors"
 	"fmt"
 	"math/big"
@@ -23,8 +22,6 @@ import (
 
 var _ scheduler.TaskInterface = &NextProvingPeriodTask{}
 
-//var _ = scheduler.Reg(&NextProvingPeriodTask{})
-
 type NextProvingPeriodTask struct {
 	db        *gorm.DB
 	ethClient bind.ContractBackend
@@ -82,7 +79,7 @@ func NewNextProvingPeriodTask(
 					Where("id = ? AND challenge_request_task_id IS NULL", ps.ProofSetID).
 					Update("challenge_request_task_id", id)
 				if result.Error != nil {
-					return false, fmt.Errorf("failed to update pdp_proof_sets: %w", err)
+					return false, fmt.Errorf("failed to update pdp_proof_sets: %w", result.Error)
 				}
 				if result.RowsAffected == 0 {
 					// Someone else might have already scheduled the task
@@ -138,9 +135,9 @@ func (n *NextProvingPeriodTask) Do(taskID scheduler.TaskID) (done bool, err erro
 	err = n.db.WithContext(ctx).
 		Model(&models.PDPProofSet{}).
 		Where("challenge_request_task_id = ? AND prove_at_epoch IS NOT NULL", taskID).
-		Select("id").
+		Select("id", "challenge_window").
 		First(&pdp).Error
-	if errors.Is(err, sql.ErrNoRows) {
+	if errors.Is(err, gorm.ErrRecordNotFound) {
 		// No matching proof set, task is done (something weird happened, and e.g another task was spawned in place of this one)
 		return true, nil
 	}
@@ -154,6 +151,37 @@ func (n *NextProvingPeriodTask) Do(taskID scheduler.TaskID) (done bool, err erro
 		return false, fmt.Errorf("failed to get next challenge window start: %w", err)
 	}
 
+	if pdp.ChallengeWindow == nil {
+		return false, fmt.Errorf("proof set %d missing challenge window metadata", proofSetID)
+	}
+
+	challengeFinality, err := n.verifier.GetChallengeFinality(ctx)
+	if err != nil {
+		return false, fmt.Errorf("failed to get challenge finality: %w", err)
+	}
+
+	challengeWindow := big.NewInt(*pdp.ChallengeWindow)
+
+	ts, err := n.fil.ChainHead(ctx)
+	if err != nil {
+		return false, fmt.Errorf("failed to get chain head: %w", err)
+	}
+
+	minEpoch := big.NewInt(int64(ts.Height()))
+	minEpoch.Add(minEpoch, challengeFinality)
+
+	if nextProveAt.Cmp(minEpoch) < 0 {
+		adjusted := adjustNextProveAt(int64(ts.Height()), challengeFinality, challengeWindow)
+		log.Warnw("adjusting next prove epoch",
+			"proof_set_id", proofSetID,
+			"original_epoch", nextProveAt,
+			"adjusted_epoch", adjusted,
+			"current_height", ts.Height(),
+			"challenge_window", challengeWindow,
+		)
+		nextProveAt = adjusted
+	}
+
 	// Prepare the transaction data
 	abiData, err := n.verifier.GetABI()
 	if err != nil {
@@ -180,12 +208,6 @@ func (n *NextProvingPeriodTask) Do(taskID scheduler.TaskID) (done bool, err erro
 		return false, fmt.Errorf("failed to get default sender address: %w", err)
 	}
 
-	// Get the current tipset
-	ts, err := n.fil.ChainHead(ctx)
-	if err != nil {
-		return false, fmt.Errorf("failed to get chain head: %w", err)
-	}
-
 	// Send the transaction
 	reason := "pdp-proving-period"
 	log.Infow("Sending next proving period transaction", "task_id", taskID, "proof_set_id", proofSetID,
@@ -205,7 +227,7 @@ func (n *NextProvingPeriodTask) Do(taskID scheduler.TaskID) (done bool, err erro
 				"prove_at_epoch":               nextProveAt.Uint64(),
 			})
 		if result.Error != nil {
-			return fmt.Errorf("failed to update pdp_proof_sets: %w", err)
+			return fmt.Errorf("failed to update pdp_proof_sets: %w", result.Error)
 		}
 		if result.RowsAffected == 0 {
 			return fmt.Errorf("pdp_proof_sets update affected 0 rows")
diff --git a/pkg/pdp/tasks/next_pdp_test.go b/pkg/pdp/tasks/next_pdp_test.go
@@ -30,14 +30,6 @@ func TestAdjustNextProveAt(t *testing.T) {
 			expected:          2011, // minRequired=2002, next window=2010, result=2011 (2010+1)
 			description:       "When minRequired doesn't fall on boundary, find next window",
 		},
-		{
-			name:              "exact scenario from logs",
-			currentHeight:     2685164,
-			challengeFinality: big.NewInt(2),
-			challengeWindow:   big.NewInt(30),
-			expected:          2685181, // minRequired=2685166, next window=2685180, result=2685181 (2685180+1)
-			description:       "Real scenario should produce predictable window placement",
-		},
 		{
 			name:              "falls exactly on window boundary",
 			currentHeight:     100,
@@ -47,25 +39,43 @@ func TestAdjustNextProveAt(t *testing.T) {
 			description:       "When minRequired falls exactly on boundary, move to next window",
 		},
 		{
-			name:              "smart contract realistic values",
+			name:              "mainnet params inside current window",
+			currentHeight:     5568958,
+			challengeFinality: big.NewInt(150), // mainnet challengeFinality
+			challengeWindow:   big.NewInt(20),  // mainnet challenge window size
+			expected:          5569121,         // minRequired=5569108, next window=5569120, result=5569121
+			description:       "Large finality relative to window should advance to the next window boundary",
+		},
+		{
+			name:              "mainnet params exact boundary",
+			currentHeight:     1000010,         // (height + finality) lands exactly on a 20-epoch boundary
+			challengeFinality: big.NewInt(150), // mainnet challengeFinality
+			challengeWindow:   big.NewInt(20),  // mainnet challenge window size
+			expected:          1000181,         // minRequired=1000160, boundary=1000160 => next window 1000180, result=1000181
+			description:       "Exact boundary must bump to the next window and add 1 epoch",
+		},
+		{
+			name:              "realistic small finality/window",
 			currentHeight:     1000000,
-			challengeFinality: big.NewInt(2),  // MinConfidence from watcher_eth.go
-			challengeWindow:   big.NewInt(30), // Common challenge window from tests
-			expected:          1000021,        // minRequired=1000002, windowStart=999990+30=1000020, result=1000021 (1000020+1)
-			description:       "Realistic smart contract values with challenge window 30 and finality 2",
+			challengeFinality: big.NewInt(2),
+			challengeWindow:   big.NewInt(30),
+			expected:          1000021, // minRequired=1000002, next window=1000020, result=1000021
+			description:       "Small finality with 30-epoch windows advances to the immediate next window",
 		},
 		{
-			name:              "proving period scenario",
-			currentHeight:     500000,
+			name:              "late in window still advances only one window",
+			currentHeight:     2685164,
 			challengeFinality: big.NewInt(2),
-			challengeWindow:   big.NewInt(60), // As requested - proving period of 60
-			expected:          500041,         // minRequired=500002, next window=500040, result=500041 (500040+1)
-			description:       "Scenario with proving period/challenge window of 60 epochs",
+			challengeWindow:   big.NewInt(30),
+			expected:          2685181, // minRequired=2685166, next window=2685180, result=2685181
+			description:       "Late-window case advances to next boundary, not multiple windows ahead",
 		},
 	}
 
 	for _, tt := range tests {
+		tt := tt
 		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
 			result := adjustNextProveAt(tt.currentHeight, tt.challengeFinality, tt.challengeWindow)
 			resultInt := result.Int64()