Skip to content

Commit 5552e4b

Browse files
committed
Fix race condition when re-triggering GitHub Actions
When a GitHub Action is re-triggered, GitHub temporarily removes the old check status before the new run starts. This causes a race where Tide may merge the PR during the brief window when the check is missing. This fix tracks previously seen contexts per PR (not per commit, so it works across force pushes). When a required context disappears, it's treated as PENDING to prevent premature merging. The context history is automatically pruned each sync to prevent memory leaks, and duplicates are avoided when a context is both missing and disappeared. Signed-off-by: Sascha Grunert <[email protected]>
1 parent dc898f4 commit 5552e4b

File tree

3 files changed

+307
-12
lines changed

3 files changed

+307
-12
lines changed

pkg/tide/status.go

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,62 @@ type storedState struct {
6262
PreviousQuery string
6363
}
6464

65+
// contextHistory tracks previously seen contexts for a PR/commit combination
66+
// to detect when required contexts disappear (e.g., when GitHub Actions are re-triggered)
67+
type contextHistory struct {
68+
prCommitContexts map[string]sets.Set[string]
69+
sync.Mutex
70+
}
71+
72+
func newContextHistory() *contextHistory {
73+
return &contextHistory{
74+
prCommitContexts: make(map[string]sets.Set[string]),
75+
}
76+
}
77+
78+
// checkAndUpdate returns contexts that disappeared since last check, then updates history.
79+
// This detects the race condition when GitHub Actions are re-triggered and the check
80+
// status temporarily disappears before the new run starts.
81+
func (ch *contextHistory) checkAndUpdate(pr *CodeReviewCommon, currentContexts []string) []string {
82+
// Use PR number only (not commit SHA) to track across force pushes
83+
key := fmt.Sprintf("%s/%s#%d", pr.Org, pr.Repo, pr.Number)
84+
85+
ch.Lock()
86+
defer ch.Unlock()
87+
88+
var disappeared []string
89+
if previous, exists := ch.prCommitContexts[key]; exists {
90+
currentSet := sets.New[string](currentContexts...)
91+
for ctx := range previous.Difference(currentSet) {
92+
disappeared = append(disappeared, ctx)
93+
}
94+
}
95+
96+
ch.prCommitContexts[key] = sets.New[string](currentContexts...)
97+
return disappeared
98+
}
99+
100+
// prune removes entries for PRs that are no longer in the pool.
101+
// This prevents unbounded memory growth by cleaning up old/merged PRs.
102+
func (ch *contextHistory) prune(activePRs map[string]CodeReviewCommon) {
103+
ch.Lock()
104+
defer ch.Unlock()
105+
106+
// Build set of active PR keys
107+
active := sets.New[string]()
108+
for _, pr := range activePRs {
109+
key := fmt.Sprintf("%s/%s#%d", pr.Org, pr.Repo, pr.Number)
110+
active.Insert(key)
111+
}
112+
113+
// Remove entries not in active set
114+
for key := range ch.prCommitContexts {
115+
if !active.Has(key) {
116+
delete(ch.prCommitContexts, key)
117+
}
118+
}
119+
}
120+
65121
// statusController is a goroutine runs in the background
66122
type statusController struct {
67123
pjClient ctrlruntimeclient.Client
@@ -106,6 +162,9 @@ type statusUpdate struct {
106162
// newPoolPending is a size 1 chan that signals that the main Tide loop has
107163
// updated the 'poolPRs' field with a freshly updated pool.
108164
newPoolPending chan bool
165+
// contextHistory tracks previously seen contexts to detect disappearing contexts
166+
// when GitHub Actions are re-triggered (addresses issue #337)
167+
contextHistory *contextHistory
109168
}
110169

111170
func (sc *statusController) shutdown() {
@@ -238,7 +297,8 @@ func requirementDiff(pr *PullRequest, q *config.TideQuery, cc contextChecker) (s
238297
log := logrus.WithFields(pr.logFields())
239298
for _, commit := range pr.Commits.Nodes {
240299
if commit.Commit.OID == pr.HeadRefOID {
241-
for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, log) {
300+
crc := CodeReviewCommonFromPullRequest(pr)
301+
for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, crc, nil, log) {
242302
contexts = append(contexts, string(ctx.Context))
243303
}
244304
}

pkg/tide/tide.go

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ func NewController(
406406
statusUpdate := &statusUpdate{
407407
dontUpdateStatus: &threadSafePRSet{},
408408
newPoolPending: make(chan bool),
409+
contextHistory: newContextHistory(),
409410
}
410411

411412
sc, err := newStatusController(ctx, logger, ghcStatus, mgr, gc, cfg, opener, statusURI, mergeChecker, usesGitHubAppsAuth, statusUpdate)
@@ -582,6 +583,10 @@ func (c *syncController) Sync() error {
582583
c.statusUpdate.poolPRs = poolPRMap(filteredPools)
583584
c.statusUpdate.baseSHAs = baseSHAMap(filteredPools)
584585
c.statusUpdate.requiredContexts = requiredContextsMap(filteredPools)
586+
// Prune old PR entries from context history to prevent memory leak
587+
if c.statusUpdate.contextHistory != nil {
588+
c.statusUpdate.contextHistory.prune(c.statusUpdate.poolPRs)
589+
}
585590
select {
586591
case c.statusUpdate.newPoolPending <- true:
587592
c.statusUpdate.dontUpdateStatus.reset()
@@ -675,7 +680,11 @@ func (c *syncController) filterSubpools(mergeAllowed func(*CodeReviewCommon) (st
675680
return
676681
}
677682
key := poolKey(sp.org, sp.repo, sp.branch)
678-
if spFiltered := filterSubpool(c.provider, mergeAllowed, sp); spFiltered != nil {
683+
var ch *contextHistory
684+
if c.statusUpdate != nil {
685+
ch = c.statusUpdate.contextHistory
686+
}
687+
if spFiltered := filterSubpool(c.provider, mergeAllowed, sp, ch); spFiltered != nil {
679688
sp.log.WithField("key", key).WithField("pool", spFiltered).Debug("filtered sub-pool")
680689

681690
lock.Lock()
@@ -727,10 +736,10 @@ func (c *syncController) initSubpoolData(sp *subpool) error {
727736
// should be deleted.
728737
//
729738
// This function works for any source code provider.
730-
func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool) *subpool {
739+
func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, ch *contextHistory) *subpool {
731740
var toKeep []CodeReviewCommon
732741
for _, pr := range sp.prs {
733-
if !filterPR(provider, mergeAllowed, sp, &pr) {
742+
if !filterPR(provider, mergeAllowed, sp, &pr, ch) {
734743
toKeep = append(toKeep, pr)
735744
}
736745
}
@@ -752,7 +761,7 @@ func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (stri
752761
// retesting them.)
753762
//
754763
// This function works for any source code provider.
755-
func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon) bool {
764+
func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon, ch *contextHistory) bool {
756765
log := sp.log.WithFields(pr.logFields())
757766
// Skip PRs that are known to be unmergeable.
758767
if reason, err := mergeAllowed(pr); err != nil {
@@ -778,7 +787,7 @@ func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, e
778787
}
779788
return false
780789
}
781-
for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], log) {
790+
for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], pr, ch, log) {
782791
if ctx.State != githubql.StatusStatePending {
783792
log.WithField("context", ctx.Context).Debug("filtering out PR as unsuccessful context is not pending")
784793
return true
@@ -853,7 +862,11 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon,
853862
// If we can't get the status of the commit, assume that it is failing.
854863
return false
855864
}
856-
unsuccessful := unsuccessfulContexts(contexts, cc, log)
865+
var ch *contextHistory
866+
if c.statusUpdate != nil {
867+
ch = c.statusUpdate.contextHistory
868+
}
869+
unsuccessful := unsuccessfulContexts(contexts, cc, pr, ch, log)
857870
return len(unsuccessful) == 0
858871
}
859872

@@ -862,8 +875,12 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon,
862875
// If the branchProtection is set to only check for required checks, we will skip
863876
// all non-required tests. If required tests are missing from the list, they will be
864877
// added to the list of failed contexts.
865-
func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Entry) []Context {
878+
// It also detects when required contexts disappear (e.g., when GitHub Actions are re-triggered)
879+
// and treats them as PENDING to prevent premature merging (addresses issue #337).
880+
func unsuccessfulContexts(contexts []Context, cc contextChecker, pr *CodeReviewCommon, ch *contextHistory, log *logrus.Entry) []Context {
866881
var failed []Context
882+
contextNames := contextsToStrings(contexts)
883+
867884
for _, ctx := range contexts {
868885
if string(ctx.Context) == statusContext {
869886
continue
@@ -875,13 +892,40 @@ func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Ent
875892
failed = append(failed, ctx)
876893
}
877894
}
878-
for _, c := range cc.MissingRequiredContexts(contextsToStrings(contexts)) {
879-
failed = append(failed, newExpectedContext(c))
895+
896+
// Track which contexts are added as failed to avoid duplicates
897+
failedSet := sets.New[string]()
898+
for _, ctx := range failed {
899+
failedSet.Insert(string(ctx.Context))
900+
}
901+
902+
// Add missing required contexts
903+
for _, c := range cc.MissingRequiredContexts(contextNames) {
904+
if !failedSet.Has(c) {
905+
failed = append(failed, newExpectedContext(c))
906+
failedSet.Insert(c)
907+
}
908+
}
909+
910+
// Check for disappeared contexts (race condition fix for issue #337)
911+
// When a GitHub Action is re-triggered, the check may temporarily disappear
912+
// from the status before the new run starts. Detect this and treat disappeared
913+
// required contexts as non-passing to prevent premature merging.
914+
if ch != nil && pr != nil {
915+
for _, c := range ch.checkAndUpdate(pr, contextNames) {
916+
if !cc.IsOptional(c) && !failedSet.Has(c) {
917+
failed = append(failed, Context{
918+
Context: githubql.String(c),
919+
State: githubql.StatusStatePending,
920+
Description: githubql.String("Context disappeared - likely re-triggered"),
921+
})
922+
}
923+
}
880924
}
881925

882926
log.WithFields(logrus.Fields{
883927
"total_context_count": len(contexts),
884-
"context_names": contextsToStrings(contexts),
928+
"context_names": contextNames,
885929
"failed_context_count": len(failed),
886930
"failed_context_names": contextsToStrings(failed),
887931
}).Debug("Filtered out failed contexts")

0 commit comments

Comments
 (0)