Skip to content

Commit 75d9fb6

Browse files
committed
Fix race condition when re-triggering GitHub Actions
When a GitHub Action is re-triggered, GitHub temporarily removes the old check status before the new run starts. This causes a race where Tide may merge the PR during the brief window when the check is missing. This fix tracks previously seen contexts per PR (not per commit, so it works across force pushes). When a required context disappears, it's treated as PENDING to prevent premature merging. The context history is automatically pruned each sync to prevent memory leaks, and duplicates are avoided when a context is both missing and disappeared. Signed-off-by: Sascha Grunert <[email protected]>
1 parent dc898f4 commit 75d9fb6

File tree

3 files changed

+318
-12
lines changed

3 files changed

+318
-12
lines changed

pkg/tide/status.go

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,66 @@ type storedState struct {
6262
PreviousQuery string
6363
}
6464

65+
// contextHistory tracks previously seen contexts for a PR/commit combination
66+
// to detect when required contexts disappear (e.g., when GitHub Actions are re-triggered)
67+
type contextHistory struct {
68+
prCommitContexts map[string]sets.Set[string]
69+
sync.Mutex
70+
}
71+
72+
func newContextHistory() *contextHistory {
73+
return &contextHistory{
74+
prCommitContexts: make(map[string]sets.Set[string]),
75+
}
76+
}
77+
78+
// checkAndUpdate returns contexts that disappeared since last check, then updates history.
79+
// This detects the race condition when GitHub Actions are re-triggered and the check
80+
// status temporarily disappears before the new run starts.
81+
//
82+
// Note: Uses PR number only (not commit SHA) to track across force pushes. This means
83+
// if a force push genuinely removes a context from the workflow, it will be detected
84+
// as "disappeared" until the next sync. This is acceptable as it errs on the side of
85+
// preventing premature merges.
86+
func (ch *contextHistory) checkAndUpdate(pr *CodeReviewCommon, currentContexts []string) []string {
87+
key := fmt.Sprintf("%s/%s#%d", pr.Org, pr.Repo, pr.Number)
88+
89+
ch.Lock()
90+
defer ch.Unlock()
91+
92+
var disappeared []string
93+
if previous, exists := ch.prCommitContexts[key]; exists {
94+
currentSet := sets.New[string](currentContexts...)
95+
for ctx := range previous.Difference(currentSet) {
96+
disappeared = append(disappeared, ctx)
97+
}
98+
}
99+
100+
ch.prCommitContexts[key] = sets.New[string](currentContexts...)
101+
return disappeared
102+
}
103+
104+
// prune removes entries for PRs that are no longer in the pool.
105+
// This prevents unbounded memory growth by cleaning up old/merged PRs.
106+
func (ch *contextHistory) prune(activePRs map[string]CodeReviewCommon) {
107+
ch.Lock()
108+
defer ch.Unlock()
109+
110+
// Build set of active PR keys
111+
active := sets.New[string]()
112+
for _, pr := range activePRs {
113+
key := fmt.Sprintf("%s/%s#%d", pr.Org, pr.Repo, pr.Number)
114+
active.Insert(key)
115+
}
116+
117+
// Remove entries not in active set
118+
for key := range ch.prCommitContexts {
119+
if !active.Has(key) {
120+
delete(ch.prCommitContexts, key)
121+
}
122+
}
123+
}
124+
65125
// statusController is a goroutine runs in the background
66126
type statusController struct {
67127
pjClient ctrlruntimeclient.Client
@@ -106,6 +166,9 @@ type statusUpdate struct {
106166
// newPoolPending is a size 1 chan that signals that the main Tide loop has
107167
// updated the 'poolPRs' field with a freshly updated pool.
108168
newPoolPending chan bool
169+
// contextHistory tracks previously seen contexts to detect disappearing contexts
170+
// when GitHub Actions are re-triggered (addresses issue #337)
171+
contextHistory *contextHistory
109172
}
110173

111174
func (sc *statusController) shutdown() {
@@ -238,7 +301,8 @@ func requirementDiff(pr *PullRequest, q *config.TideQuery, cc contextChecker) (s
238301
log := logrus.WithFields(pr.logFields())
239302
for _, commit := range pr.Commits.Nodes {
240303
if commit.Commit.OID == pr.HeadRefOID {
241-
for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, log) {
304+
crc := CodeReviewCommonFromPullRequest(pr)
305+
for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, crc, nil, log) {
242306
contexts = append(contexts, string(ctx.Context))
243307
}
244308
}

pkg/tide/tide.go

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ func NewController(
406406
statusUpdate := &statusUpdate{
407407
dontUpdateStatus: &threadSafePRSet{},
408408
newPoolPending: make(chan bool),
409+
contextHistory: newContextHistory(),
409410
}
410411

411412
sc, err := newStatusController(ctx, logger, ghcStatus, mgr, gc, cfg, opener, statusURI, mergeChecker, usesGitHubAppsAuth, statusUpdate)
@@ -582,6 +583,10 @@ func (c *syncController) Sync() error {
582583
c.statusUpdate.poolPRs = poolPRMap(filteredPools)
583584
c.statusUpdate.baseSHAs = baseSHAMap(filteredPools)
584585
c.statusUpdate.requiredContexts = requiredContextsMap(filteredPools)
586+
// Prune old PR entries from context history to prevent memory leak
587+
if c.statusUpdate.contextHistory != nil {
588+
c.statusUpdate.contextHistory.prune(c.statusUpdate.poolPRs)
589+
}
585590
select {
586591
case c.statusUpdate.newPoolPending <- true:
587592
c.statusUpdate.dontUpdateStatus.reset()
@@ -675,7 +680,11 @@ func (c *syncController) filterSubpools(mergeAllowed func(*CodeReviewCommon) (st
675680
return
676681
}
677682
key := poolKey(sp.org, sp.repo, sp.branch)
678-
if spFiltered := filterSubpool(c.provider, mergeAllowed, sp); spFiltered != nil {
683+
var ch *contextHistory
684+
if c.statusUpdate != nil {
685+
ch = c.statusUpdate.contextHistory
686+
}
687+
if spFiltered := filterSubpool(c.provider, mergeAllowed, sp, ch); spFiltered != nil {
679688
sp.log.WithField("key", key).WithField("pool", spFiltered).Debug("filtered sub-pool")
680689

681690
lock.Lock()
@@ -727,10 +736,10 @@ func (c *syncController) initSubpoolData(sp *subpool) error {
727736
// should be deleted.
728737
//
729738
// This function works for any source code provider.
730-
func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool) *subpool {
739+
func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, ch *contextHistory) *subpool {
731740
var toKeep []CodeReviewCommon
732741
for _, pr := range sp.prs {
733-
if !filterPR(provider, mergeAllowed, sp, &pr) {
742+
if !filterPR(provider, mergeAllowed, sp, &pr, ch) {
734743
toKeep = append(toKeep, pr)
735744
}
736745
}
@@ -752,7 +761,7 @@ func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (stri
752761
// retesting them.)
753762
//
754763
// This function works for any source code provider.
755-
func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon) bool {
764+
func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon, ch *contextHistory) bool {
756765
log := sp.log.WithFields(pr.logFields())
757766
// Skip PRs that are known to be unmergeable.
758767
if reason, err := mergeAllowed(pr); err != nil {
@@ -778,7 +787,7 @@ func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, e
778787
}
779788
return false
780789
}
781-
for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], log) {
790+
for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], pr, ch, log) {
782791
if ctx.State != githubql.StatusStatePending {
783792
log.WithField("context", ctx.Context).Debug("filtering out PR as unsuccessful context is not pending")
784793
return true
@@ -853,7 +862,11 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon,
853862
// If we can't get the status of the commit, assume that it is failing.
854863
return false
855864
}
856-
unsuccessful := unsuccessfulContexts(contexts, cc, log)
865+
var ch *contextHistory
866+
if c.statusUpdate != nil {
867+
ch = c.statusUpdate.contextHistory
868+
}
869+
unsuccessful := unsuccessfulContexts(contexts, cc, pr, ch, log)
857870
return len(unsuccessful) == 0
858871
}
859872

@@ -862,8 +875,12 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon,
862875
// If the branchProtection is set to only check for required checks, we will skip
863876
// all non-required tests. If required tests are missing from the list, they will be
864877
// added to the list of failed contexts.
865-
func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Entry) []Context {
878+
// It also detects when required contexts disappear (e.g., when GitHub Actions are re-triggered)
879+
// and treats them as PENDING to prevent premature merging (addresses issue #337).
880+
func unsuccessfulContexts(contexts []Context, cc contextChecker, pr *CodeReviewCommon, ch *contextHistory, log *logrus.Entry) []Context {
866881
var failed []Context
882+
contextNames := contextsToStrings(contexts)
883+
867884
for _, ctx := range contexts {
868885
if string(ctx.Context) == statusContext {
869886
continue
@@ -875,13 +892,45 @@ func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Ent
875892
failed = append(failed, ctx)
876893
}
877894
}
878-
for _, c := range cc.MissingRequiredContexts(contextsToStrings(contexts)) {
879-
failed = append(failed, newExpectedContext(c))
895+
896+
// Track which contexts are added as failed to avoid duplicates
897+
failedSet := sets.New[string]()
898+
for _, ctx := range failed {
899+
failedSet.Insert(string(ctx.Context))
900+
}
901+
902+
// Add missing required contexts
903+
for _, c := range cc.MissingRequiredContexts(contextNames) {
904+
if !failedSet.Has(c) {
905+
failed = append(failed, newExpectedContext(c))
906+
failedSet.Insert(c)
907+
}
908+
}
909+
910+
// Check for disappeared contexts (race condition fix)
911+
// When a GitHub Action is re-triggered, the check may temporarily disappear
912+
// from the status before the new run starts. Detect this and treat disappeared
913+
// required contexts as non-passing to prevent premature merging.
914+
if ch != nil && pr != nil {
915+
disappeared := ch.checkAndUpdate(pr, contextNames)
916+
if len(disappeared) > 0 {
917+
log.WithField("disappeared_contexts", disappeared).
918+
Info("Detected disappeared contexts - likely due to re-triggered checks")
919+
}
920+
for _, c := range disappeared {
921+
if !cc.IsOptional(c) && !failedSet.Has(c) {
922+
failed = append(failed, Context{
923+
Context: githubql.String(c),
924+
State: githubql.StatusStatePending,
925+
Description: githubql.String("Context disappeared - likely re-triggered"),
926+
})
927+
}
928+
}
880929
}
881930

882931
log.WithFields(logrus.Fields{
883932
"total_context_count": len(contexts),
884-
"context_names": contextsToStrings(contexts),
933+
"context_names": contextNames,
885934
"failed_context_count": len(failed),
886935
"failed_context_names": contextsToStrings(failed),
887936
}).Debug("Filtered out failed contexts")

0 commit comments

Comments
 (0)