diff --git a/pkg/tide/status.go b/pkg/tide/status.go index 17901811e..df36bfdc7 100644 --- a/pkg/tide/status.go +++ b/pkg/tide/status.go @@ -62,6 +62,66 @@ type storedState struct { PreviousQuery string } +// contextHistory tracks previously seen contexts for a PR/commit combination +// to detect when required contexts disappear (e.g., when GitHub Actions are re-triggered) +type contextHistory struct { + prCommitContexts map[string]sets.Set[string] + sync.Mutex +} + +func newContextHistory() *contextHistory { + return &contextHistory{ + prCommitContexts: make(map[string]sets.Set[string]), + } +} + +// checkAndUpdate returns contexts that disappeared since last check, then updates history. +// This detects the race condition when GitHub Actions are re-triggered and the check +// status temporarily disappears before the new run starts. +// +// Note: Uses PR number only (not commit SHA) to track across force pushes. This means +// if a force push genuinely removes a context from the workflow, it will be detected +// as "disappeared" until the next sync. This is acceptable as it errs on the side of +// preventing premature merges. +func (ch *contextHistory) checkAndUpdate(pr *CodeReviewCommon, currentContexts []string) []string { + key := fmt.Sprintf("%s/%s#%d", pr.Org, pr.Repo, pr.Number) + + ch.Lock() + defer ch.Unlock() + + var disappeared []string + if previous, exists := ch.prCommitContexts[key]; exists { + currentSet := sets.New[string](currentContexts...) + for ctx := range previous.Difference(currentSet) { + disappeared = append(disappeared, ctx) + } + } + + ch.prCommitContexts[key] = sets.New[string](currentContexts...) + return disappeared +} + +// prune removes entries for PRs that are no longer in the pool. +// This prevents unbounded memory growth by cleaning up old/merged PRs. +func (ch *contextHistory) prune(activePRs map[string]CodeReviewCommon) { + ch.Lock() + defer ch.Unlock() + + // Build set of active PR keys + active := sets.New[string]() + for _, pr := range activePRs { + key := fmt.Sprintf("%s/%s#%d", pr.Org, pr.Repo, pr.Number) + active.Insert(key) + } + + // Remove entries not in active set + for key := range ch.prCommitContexts { + if !active.Has(key) { + delete(ch.prCommitContexts, key) + } + } +} + // statusController is a goroutine runs in the background type statusController struct { pjClient ctrlruntimeclient.Client @@ -106,6 +166,9 @@ type statusUpdate struct { // newPoolPending is a size 1 chan that signals that the main Tide loop has // updated the 'poolPRs' field with a freshly updated pool. newPoolPending chan bool + // contextHistory tracks previously seen contexts to detect disappearing contexts + // when GitHub Actions are re-triggered (addresses issue #337) + contextHistory *contextHistory } func (sc *statusController) shutdown() { @@ -238,7 +301,8 @@ func requirementDiff(pr *PullRequest, q *config.TideQuery, cc contextChecker) (s log := logrus.WithFields(pr.logFields()) for _, commit := range pr.Commits.Nodes { if commit.Commit.OID == pr.HeadRefOID { - for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, log) { + crc := CodeReviewCommonFromPullRequest(pr) + for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, crc, nil, log) { contexts = append(contexts, string(ctx.Context)) } } diff --git a/pkg/tide/tide.go b/pkg/tide/tide.go index c4e974412..5c30d8522 100644 --- a/pkg/tide/tide.go +++ b/pkg/tide/tide.go @@ -406,6 +406,7 @@ func NewController( statusUpdate := &statusUpdate{ dontUpdateStatus: &threadSafePRSet{}, newPoolPending: make(chan bool), + contextHistory: newContextHistory(), } sc, err := newStatusController(ctx, logger, ghcStatus, mgr, gc, cfg, opener, statusURI, mergeChecker, usesGitHubAppsAuth, statusUpdate) @@ -582,6 +583,10 @@ func (c *syncController) Sync() error { c.statusUpdate.poolPRs = poolPRMap(filteredPools) c.statusUpdate.baseSHAs = baseSHAMap(filteredPools) c.statusUpdate.requiredContexts = requiredContextsMap(filteredPools) + // Prune old PR entries from context history to prevent memory leak + if c.statusUpdate.contextHistory != nil { + c.statusUpdate.contextHistory.prune(c.statusUpdate.poolPRs) + } select { case c.statusUpdate.newPoolPending <- true: c.statusUpdate.dontUpdateStatus.reset() @@ -675,7 +680,11 @@ func (c *syncController) filterSubpools(mergeAllowed func(*CodeReviewCommon) (st return } key := poolKey(sp.org, sp.repo, sp.branch) - if spFiltered := filterSubpool(c.provider, mergeAllowed, sp); spFiltered != nil { + var ch *contextHistory + if c.statusUpdate != nil { + ch = c.statusUpdate.contextHistory + } + if spFiltered := filterSubpool(c.provider, mergeAllowed, sp, ch); spFiltered != nil { sp.log.WithField("key", key).WithField("pool", spFiltered).Debug("filtered sub-pool") lock.Lock() @@ -727,10 +736,10 @@ func (c *syncController) initSubpoolData(sp *subpool) error { // should be deleted. // // This function works for any source code provider. -func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool) *subpool { +func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, ch *contextHistory) *subpool { var toKeep []CodeReviewCommon for _, pr := range sp.prs { - if !filterPR(provider, mergeAllowed, sp, &pr) { + if !filterPR(provider, mergeAllowed, sp, &pr, ch) { toKeep = append(toKeep, pr) } } @@ -752,7 +761,7 @@ func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (stri // retesting them.) // // This function works for any source code provider. -func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon) bool { +func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon, ch *contextHistory) bool { log := sp.log.WithFields(pr.logFields()) // Skip PRs that are known to be unmergeable. if reason, err := mergeAllowed(pr); err != nil { @@ -778,7 +787,7 @@ func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, e } return false } - for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], log) { + for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], pr, ch, log) { if ctx.State != githubql.StatusStatePending { log.WithField("context", ctx.Context).Debug("filtering out PR as unsuccessful context is not pending") return true @@ -853,7 +862,11 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon, // If we can't get the status of the commit, assume that it is failing. return false } - unsuccessful := unsuccessfulContexts(contexts, cc, log) + var ch *contextHistory + if c.statusUpdate != nil { + ch = c.statusUpdate.contextHistory + } + unsuccessful := unsuccessfulContexts(contexts, cc, pr, ch, log) return len(unsuccessful) == 0 } @@ -862,8 +875,12 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon, // If the branchProtection is set to only check for required checks, we will skip // all non-required tests. If required tests are missing from the list, they will be // added to the list of failed contexts. -func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Entry) []Context { +// It also detects when required contexts disappear (e.g., when GitHub Actions are re-triggered) +// and treats them as PENDING to prevent premature merging (addresses issue #337). +func unsuccessfulContexts(contexts []Context, cc contextChecker, pr *CodeReviewCommon, ch *contextHistory, log *logrus.Entry) []Context { var failed []Context + contextNames := contextsToStrings(contexts) + for _, ctx := range contexts { if string(ctx.Context) == statusContext { continue @@ -875,13 +892,45 @@ func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Ent failed = append(failed, ctx) } } - for _, c := range cc.MissingRequiredContexts(contextsToStrings(contexts)) { - failed = append(failed, newExpectedContext(c)) + + // Track which contexts are added as failed to avoid duplicates + failedSet := sets.New[string]() + for _, ctx := range failed { + failedSet.Insert(string(ctx.Context)) + } + + // Add missing required contexts + for _, c := range cc.MissingRequiredContexts(contextNames) { + if !failedSet.Has(c) { + failed = append(failed, newExpectedContext(c)) + failedSet.Insert(c) + } + } + + // Check for disappeared contexts (race condition fix) + // When a GitHub Action is re-triggered, the check may temporarily disappear + // from the status before the new run starts. Detect this and treat disappeared + // required contexts as non-passing to prevent premature merging. + if ch != nil && pr != nil { + disappeared := ch.checkAndUpdate(pr, contextNames) + if len(disappeared) > 0 { + log.WithField("disappeared_contexts", disappeared). + Info("Detected disappeared contexts - likely due to re-triggered checks") + } + for _, c := range disappeared { + if !cc.IsOptional(c) && !failedSet.Has(c) { + failed = append(failed, Context{ + Context: githubql.String(c), + State: githubql.StatusStatePending, + Description: githubql.String("Context disappeared - likely re-triggered"), + }) + } + } } log.WithFields(logrus.Fields{ "total_context_count": len(contexts), - "context_names": contextsToStrings(contexts), + "context_names": contextNames, "failed_context_count": len(failed), "failed_context_names": contextsToStrings(failed), }).Debug("Filtered out failed contexts") diff --git a/pkg/tide/tide_test.go b/pkg/tide/tide_test.go index d114dca55..4bceb8e66 100644 --- a/pkg/tide/tide_test.go +++ b/pkg/tide/tide_test.go @@ -2908,7 +2908,7 @@ func TestFilterSubpool(t *testing.T) { mergeChecker: mmc, logger: logrus.WithContext(context.Background()), } - filtered := filterSubpool(provider, mmc.isAllowedToMerge, sp) + filtered := filterSubpool(provider, mmc.isAllowedToMerge, sp, newContextHistory()) if len(tc.expectedPRs) == 0 { if filtered != nil { t.Fatalf("Expected subpool to be pruned, but got: %v", filtered) @@ -5482,3 +5482,196 @@ func TestSerialRetestingConsidersPRThatIsCurrentlyBeingSRetested(t *testing.T) { } } + +// TestUnsuccessfulContextsWithDisappearedContexts tests the fix for issue #337 +// where re-triggering GitHub Actions causes contexts to temporarily disappear +func TestUnsuccessfulContextsWithDisappearedContexts(t *testing.T) { + log := logrus.NewEntry(logrus.StandardLogger()) + + testCases := []struct { + name string + contexts []Context + previousContexts []string + requiredContexts []string + expectedFailed int + }{ + { + name: "no disappeared contexts", + contexts: []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + {Context: githubql.String("test-2"), State: githubql.StatusStateSuccess}, + }, + previousContexts: []string{"test-1", "test-2"}, + requiredContexts: []string{"test-1", "test-2"}, + expectedFailed: 0, + }, + { + name: "context disappeared - race condition", + contexts: []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + }, + previousContexts: []string{"test-1", "test-2"}, + requiredContexts: []string{"test-1", "test-2"}, + expectedFailed: 1, // test-2 is both missing and disappeared, counted once + }, + { + name: "unknown context disappeared", + contexts: []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + }, + previousContexts: []string{"test-1", "test-2"}, + requiredContexts: []string{"test-1"}, + // test-2 is unknown (not in required/optional), treated as required + // by default unless SkipUnknownContexts is set + expectedFailed: 1, + }, + { + name: "first time seeing PR - no history", + contexts: []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + }, + previousContexts: nil, + requiredContexts: []string{"test-1", "test-2"}, + expectedFailed: 1, // only missing required, no disappeared + }, + { + name: "context reappeared after disappearing", + contexts: []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + {Context: githubql.String("test-2"), State: githubql.StatusStatePending}, + }, + previousContexts: []string{"test-1"}, + requiredContexts: []string{"test-1", "test-2"}, + expectedFailed: 1, // test-2 is pending (not disappeared, it's present) + }, + { + name: "context disappears multiple times", + contexts: []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + }, + previousContexts: []string{"test-1", "test-2"}, // test-2 was seen before + requiredContexts: []string{"test-1", "test-2"}, + expectedFailed: 1, // test-2 disappeared, blocks merge + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ch := newContextHistory() + pr := &CodeReviewCommon{ + Org: "test-org", + Repo: "test-repo", + Number: 123, + HeadRefOID: "abc123", + } + + // Set up previous context history if provided + if tc.previousContexts != nil { + ch.checkAndUpdate(pr, tc.previousContexts) + } + + cc := &config.TideContextPolicy{ + RequiredContexts: tc.requiredContexts, + } + + failed := unsuccessfulContexts(tc.contexts, cc, pr, ch, log) + + if len(failed) != tc.expectedFailed { + t.Errorf("expected %d failed contexts, got %d: %+v", tc.expectedFailed, len(failed), failed) + } + }) + } +} + +// TestContextDisappearsMultipleTimes tests the scenario where a context +// appears, disappears, reappears, then disappears again +func TestContextDisappearsMultipleTimes(t *testing.T) { + log := logrus.NewEntry(logrus.StandardLogger()) + ch := newContextHistory() + pr := &CodeReviewCommon{ + Org: "test-org", + Repo: "test-repo", + Number: 123, + HeadRefOID: "abc123", + } + cc := &config.TideContextPolicy{ + RequiredContexts: []string{"test-1", "test-2"}, + } + + // T0: Both contexts present and successful + contexts1 := []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + {Context: githubql.String("test-2"), State: githubql.StatusStateSuccess}, + } + failed1 := unsuccessfulContexts(contexts1, cc, pr, ch, log) + if len(failed1) != 0 { + t.Errorf("T0: expected 0 failed, got %d", len(failed1)) + } + + // T1: test-2 disappears (re-triggered) + contexts2 := []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + } + failed2 := unsuccessfulContexts(contexts2, cc, pr, ch, log) + if len(failed2) != 1 { + t.Errorf("T1: expected 1 failed (disappeared test-2), got %d", len(failed2)) + } + + // T2: test-2 reappears as SUCCESS + contexts3 := []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + {Context: githubql.String("test-2"), State: githubql.StatusStateSuccess}, + } + failed3 := unsuccessfulContexts(contexts3, cc, pr, ch, log) + if len(failed3) != 0 { + t.Errorf("T2: expected 0 failed, got %d", len(failed3)) + } + + // T3: test-2 disappears AGAIN (re-triggered again) + contexts4 := []Context{ + {Context: githubql.String("test-1"), State: githubql.StatusStateSuccess}, + } + failed4 := unsuccessfulContexts(contexts4, cc, pr, ch, log) + if len(failed4) != 1 { + t.Errorf("T3: expected 1 failed (disappeared test-2 again), got %d", len(failed4)) + } +} + +// TestContextHistoryPrune tests that old PR entries are cleaned up +func TestContextHistoryPrune(t *testing.T) { + ch := newContextHistory() + + pr1 := &CodeReviewCommon{Org: "org", Repo: "repo", Number: 1, HeadRefOID: "sha1"} + pr2 := &CodeReviewCommon{Org: "org", Repo: "repo", Number: 2, HeadRefOID: "sha2"} + pr3 := &CodeReviewCommon{Org: "org", Repo: "repo", Number: 3, HeadRefOID: "sha3"} + + // Record contexts for multiple PRs + ch.checkAndUpdate(pr1, []string{"test-1", "test-2"}) + ch.checkAndUpdate(pr2, []string{"test-1", "test-2"}) + ch.checkAndUpdate(pr3, []string{"test-1", "test-2"}) + + if len(ch.prCommitContexts) != 3 { + t.Errorf("expected 3 entries, got %d", len(ch.prCommitContexts)) + } + + // Only PR 1 and 2 are still active + activePRs := map[string]CodeReviewCommon{ + "key1": *pr1, + "key2": *pr2, + } + ch.prune(activePRs) + + if len(ch.prCommitContexts) != 2 { + t.Errorf("expected 2 entries after prune, got %d", len(ch.prCommitContexts)) + } + + if !ch.prCommitContexts["org/repo#1"].Has("test-1") { + t.Error("PR 1 should still be tracked") + } + if !ch.prCommitContexts["org/repo#2"].Has("test-1") { + t.Error("PR 2 should still be tracked") + } + if _, exists := ch.prCommitContexts["org/repo#3"]; exists { + t.Error("PR 3 should have been pruned") + } +}