Skip to content

Commit d901412

Browse files
committed
Fix race condition when re-triggering GitHub Actions
When a GitHub Action is re-triggered, GitHub temporarily removes the old check status before the new run starts. This causes a race where Tide may merge the PR during the brief window when the check is missing. This fix tracks previously seen contexts per PR/commit. When a required context disappears, it's treated as PENDING to prevent premature merging. Signed-off-by: Sascha Grunert <[email protected]>
1 parent dc898f4 commit d901412

File tree

3 files changed

+241
-11
lines changed

3 files changed

+241
-11
lines changed

pkg/tide/status.go

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,62 @@ type storedState struct {
6262
PreviousQuery string
6363
}
6464

65+
// contextHistory tracks previously seen contexts for a PR/commit combination
66+
// to detect when required contexts disappear (e.g., when GitHub Actions are re-triggered)
67+
type contextHistory struct {
68+
// prCommitContexts maps "org/repo#number:sha" -> set of context names seen
69+
prCommitContexts map[string]sets.Set[string]
70+
sync.RWMutex
71+
}
72+
73+
func newContextHistory() *contextHistory {
74+
return &contextHistory{
75+
prCommitContexts: make(map[string]sets.Set[string]),
76+
}
77+
}
78+
79+
// recordContexts records the contexts seen for a PR/commit combination
80+
func (ch *contextHistory) recordContexts(org, repo string, number int, sha string, contexts []string) {
81+
key := fmt.Sprintf("%s/%s#%d:%s", org, repo, number, sha)
82+
ch.Lock()
83+
defer ch.Unlock()
84+
ch.prCommitContexts[key] = sets.New[string](contexts...)
85+
}
86+
87+
// getDisappearedContexts returns contexts that were previously seen but are now missing
88+
// This detects the race condition when GitHub Actions are re-triggered and the check
89+
// status temporarily disappears before the new run starts.
90+
func (ch *contextHistory) getDisappearedContexts(org, repo string, number int, sha string, currentContexts []string) []string {
91+
key := fmt.Sprintf("%s/%s#%d:%s", org, repo, number, sha)
92+
ch.RLock()
93+
defer ch.RUnlock()
94+
95+
previousContexts, exists := ch.prCommitContexts[key]
96+
if !exists {
97+
// First time seeing this PR/commit, no disappeared contexts
98+
return nil
99+
}
100+
101+
currentSet := sets.New[string](currentContexts...)
102+
var disappeared []string
103+
for ctx := range previousContexts.Difference(currentSet) {
104+
disappeared = append(disappeared, ctx)
105+
}
106+
return disappeared
107+
}
108+
109+
// cleanup removes old entries to prevent unbounded memory growth
110+
// This should be called periodically with PRs that are no longer active
111+
func (ch *contextHistory) cleanup(activeKeys sets.Set[string]) {
112+
ch.Lock()
113+
defer ch.Unlock()
114+
for key := range ch.prCommitContexts {
115+
if !activeKeys.Has(key) {
116+
delete(ch.prCommitContexts, key)
117+
}
118+
}
119+
}
120+
65121
// statusController is a goroutine runs in the background
66122
type statusController struct {
67123
pjClient ctrlruntimeclient.Client
@@ -106,6 +162,9 @@ type statusUpdate struct {
106162
// newPoolPending is a size 1 chan that signals that the main Tide loop has
107163
// updated the 'poolPRs' field with a freshly updated pool.
108164
newPoolPending chan bool
165+
// contextHistory tracks previously seen contexts to detect disappearing contexts
166+
// when GitHub Actions are re-triggered (addresses issue #337)
167+
contextHistory *contextHistory
109168
}
110169

111170
func (sc *statusController) shutdown() {
@@ -238,7 +297,8 @@ func requirementDiff(pr *PullRequest, q *config.TideQuery, cc contextChecker) (s
238297
log := logrus.WithFields(pr.logFields())
239298
for _, commit := range pr.Commits.Nodes {
240299
if commit.Commit.OID == pr.HeadRefOID {
241-
for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, log) {
300+
crc := CodeReviewCommonFromPullRequest(pr)
301+
for _, ctx := range unsuccessfulContexts(append(commit.Commit.Status.Contexts, checkRunNodesToContexts(log, commit.Commit.StatusCheckRollup.Contexts.Nodes)...), cc, crc, nil, log) {
242302
contexts = append(contexts, string(ctx.Context))
243303
}
244304
}

pkg/tide/tide.go

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ func NewController(
406406
statusUpdate := &statusUpdate{
407407
dontUpdateStatus: &threadSafePRSet{},
408408
newPoolPending: make(chan bool),
409+
contextHistory: newContextHistory(),
409410
}
410411

411412
sc, err := newStatusController(ctx, logger, ghcStatus, mgr, gc, cfg, opener, statusURI, mergeChecker, usesGitHubAppsAuth, statusUpdate)
@@ -675,7 +676,11 @@ func (c *syncController) filterSubpools(mergeAllowed func(*CodeReviewCommon) (st
675676
return
676677
}
677678
key := poolKey(sp.org, sp.repo, sp.branch)
678-
if spFiltered := filterSubpool(c.provider, mergeAllowed, sp); spFiltered != nil {
679+
var ch *contextHistory
680+
if c.statusUpdate != nil {
681+
ch = c.statusUpdate.contextHistory
682+
}
683+
if spFiltered := filterSubpool(c.provider, mergeAllowed, sp, ch); spFiltered != nil {
679684
sp.log.WithField("key", key).WithField("pool", spFiltered).Debug("filtered sub-pool")
680685

681686
lock.Lock()
@@ -727,10 +732,10 @@ func (c *syncController) initSubpoolData(sp *subpool) error {
727732
// should be deleted.
728733
//
729734
// This function works for any source code provider.
730-
func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool) *subpool {
735+
func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, ch *contextHistory) *subpool {
731736
var toKeep []CodeReviewCommon
732737
for _, pr := range sp.prs {
733-
if !filterPR(provider, mergeAllowed, sp, &pr) {
738+
if !filterPR(provider, mergeAllowed, sp, &pr, ch) {
734739
toKeep = append(toKeep, pr)
735740
}
736741
}
@@ -752,7 +757,7 @@ func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (stri
752757
// retesting them.)
753758
//
754759
// This function works for any source code provider.
755-
func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon) bool {
760+
func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon, ch *contextHistory) bool {
756761
log := sp.log.WithFields(pr.logFields())
757762
// Skip PRs that are known to be unmergeable.
758763
if reason, err := mergeAllowed(pr); err != nil {
@@ -778,7 +783,7 @@ func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, e
778783
}
779784
return false
780785
}
781-
for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], log) {
786+
for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], pr, ch, log) {
782787
if ctx.State != githubql.StatusStatePending {
783788
log.WithField("context", ctx.Context).Debug("filtering out PR as unsuccessful context is not pending")
784789
return true
@@ -853,7 +858,11 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon,
853858
// If we can't get the status of the commit, assume that it is failing.
854859
return false
855860
}
856-
unsuccessful := unsuccessfulContexts(contexts, cc, log)
861+
var ch *contextHistory
862+
if c.statusUpdate != nil {
863+
ch = c.statusUpdate.contextHistory
864+
}
865+
unsuccessful := unsuccessfulContexts(contexts, cc, pr, ch, log)
857866
return len(unsuccessful) == 0
858867
}
859868

@@ -862,8 +871,12 @@ func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon,
862871
// If the branchProtection is set to only check for required checks, we will skip
863872
// all non-required tests. If required tests are missing from the list, they will be
864873
// added to the list of failed contexts.
865-
func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Entry) []Context {
874+
// It also detects when required contexts disappear (e.g., when GitHub Actions are re-triggered)
875+
// and treats them as PENDING to prevent premature merging (addresses issue #337).
876+
func unsuccessfulContexts(contexts []Context, cc contextChecker, pr *CodeReviewCommon, ch *contextHistory, log *logrus.Entry) []Context {
866877
var failed []Context
878+
contextNames := contextsToStrings(contexts)
879+
867880
for _, ctx := range contexts {
868881
if string(ctx.Context) == statusContext {
869882
continue
@@ -875,13 +888,39 @@ func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Ent
875888
failed = append(failed, ctx)
876889
}
877890
}
878-
for _, c := range cc.MissingRequiredContexts(contextsToStrings(contexts)) {
891+
892+
// Add missing required contexts
893+
for _, c := range cc.MissingRequiredContexts(contextNames) {
879894
failed = append(failed, newExpectedContext(c))
880895
}
881896

897+
// Check for disappeared contexts (race condition fix for issue #337)
898+
// When a GitHub Action is re-triggered, the check may temporarily disappear
899+
// from the status before the new run starts. We need to detect this and
900+
// treat it as a non-passing state to prevent premature merging.
901+
if ch != nil && pr != nil {
902+
disappeared := ch.getDisappearedContexts(pr.Org, pr.Repo, pr.Number, pr.HeadRefOID, contextNames)
903+
if len(disappeared) > 0 {
904+
log.WithField("disappeared_contexts", disappeared).
905+
Warn("Detected disappeared contexts - likely due to re-triggered checks")
906+
for _, c := range disappeared {
907+
// Only add if it's not optional and not already in failed list
908+
if !cc.IsOptional(c) {
909+
failed = append(failed, Context{
910+
Context: githubql.String(c),
911+
State: githubql.StatusStatePending,
912+
Description: githubql.String("Context disappeared - likely re-triggered"),
913+
})
914+
}
915+
}
916+
}
917+
// Record current contexts for future comparison
918+
ch.recordContexts(pr.Org, pr.Repo, pr.Number, pr.HeadRefOID, contextNames)
919+
}
920+
882921
log.WithFields(logrus.Fields{
883922
"total_context_count": len(contexts),
884-
"context_names": contextsToStrings(contexts),
923+
"context_names": contextNames,
885924
"failed_context_count": len(failed),
886925
"failed_context_names": contextsToStrings(failed),
887926
}).Debug("Filtered out failed contexts")

pkg/tide/tide_test.go

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2908,7 +2908,7 @@ func TestFilterSubpool(t *testing.T) {
29082908
mergeChecker: mmc,
29092909
logger: logrus.WithContext(context.Background()),
29102910
}
2911-
filtered := filterSubpool(provider, mmc.isAllowedToMerge, sp)
2911+
filtered := filterSubpool(provider, mmc.isAllowedToMerge, sp, newContextHistory())
29122912
if len(tc.expectedPRs) == 0 {
29132913
if filtered != nil {
29142914
t.Fatalf("Expected subpool to be pruned, but got: %v", filtered)
@@ -5482,3 +5482,134 @@ func TestSerialRetestingConsidersPRThatIsCurrentlyBeingSRetested(t *testing.T) {
54825482
}
54835483

54845484
}
5485+
5486+
// TestUnsuccessfulContextsWithDisappearedContexts tests the fix for issue #337
5487+
// where re-triggering GitHub Actions causes contexts to temporarily disappear
5488+
func TestUnsuccessfulContextsWithDisappearedContexts(t *testing.T) {
5489+
log := logrus.NewEntry(logrus.StandardLogger())
5490+
5491+
testCases := []struct {
5492+
name string
5493+
contexts []Context
5494+
previousContexts []string
5495+
requiredContexts []string
5496+
expectedFailed int
5497+
expectWarning bool
5498+
}{
5499+
{
5500+
name: "no disappeared contexts",
5501+
contexts: []Context{
5502+
{Context: githubql.String("test-1"), State: githubql.StatusStateSuccess},
5503+
{Context: githubql.String("test-2"), State: githubql.StatusStateSuccess},
5504+
},
5505+
previousContexts: []string{"test-1", "test-2"},
5506+
requiredContexts: []string{"test-1", "test-2"},
5507+
expectedFailed: 0,
5508+
expectWarning: false,
5509+
},
5510+
{
5511+
name: "context disappeared - race condition",
5512+
contexts: []Context{
5513+
{Context: githubql.String("test-1"), State: githubql.StatusStateSuccess},
5514+
},
5515+
previousContexts: []string{"test-1", "test-2"},
5516+
requiredContexts: []string{"test-1", "test-2"},
5517+
expectedFailed: 2, // one missing required + one disappeared
5518+
expectWarning: true,
5519+
},
5520+
{
5521+
name: "optional context disappeared - should not fail",
5522+
contexts: []Context{
5523+
{Context: githubql.String("test-1"), State: githubql.StatusStateSuccess},
5524+
},
5525+
previousContexts: []string{"test-1", "test-2"},
5526+
requiredContexts: []string{"test-1"},
5527+
expectedFailed: 1, // test-2 is unknown, treated as required by default
5528+
expectWarning: true,
5529+
},
5530+
{
5531+
name: "first time seeing PR - no history",
5532+
contexts: []Context{
5533+
{Context: githubql.String("test-1"), State: githubql.StatusStateSuccess},
5534+
},
5535+
previousContexts: nil,
5536+
requiredContexts: []string{"test-1", "test-2"},
5537+
expectedFailed: 1, // only missing required, no disappeared
5538+
expectWarning: false,
5539+
},
5540+
{
5541+
name: "context reappeared after disappearing",
5542+
contexts: []Context{
5543+
{Context: githubql.String("test-1"), State: githubql.StatusStateSuccess},
5544+
{Context: githubql.String("test-2"), State: githubql.StatusStatePending},
5545+
},
5546+
previousContexts: []string{"test-1"},
5547+
requiredContexts: []string{"test-1", "test-2"},
5548+
expectedFailed: 1, // test-2 is pending (not disappeared, it's present)
5549+
expectWarning: false,
5550+
},
5551+
}
5552+
5553+
for _, tc := range testCases {
5554+
t.Run(tc.name, func(t *testing.T) {
5555+
ch := newContextHistory()
5556+
pr := &CodeReviewCommon{
5557+
Org: "test-org",
5558+
Repo: "test-repo",
5559+
Number: 123,
5560+
HeadRefOID: "abc123",
5561+
}
5562+
5563+
// Set up previous context history if provided
5564+
if tc.previousContexts != nil {
5565+
ch.recordContexts(pr.Org, pr.Repo, pr.Number, pr.HeadRefOID, tc.previousContexts)
5566+
}
5567+
5568+
// Create a simple context checker
5569+
cc := &config.TideContextPolicy{
5570+
RequiredContexts: tc.requiredContexts,
5571+
}
5572+
5573+
failed := unsuccessfulContexts(tc.contexts, cc, pr, ch, log)
5574+
5575+
if len(failed) != tc.expectedFailed {
5576+
t.Errorf("expected %d failed contexts, got %d: %+v", tc.expectedFailed, len(failed), failed)
5577+
}
5578+
})
5579+
}
5580+
}
5581+
5582+
// TestContextHistoryCleanup tests that old context history entries are cleaned up
5583+
func TestContextHistoryCleanup(t *testing.T) {
5584+
ch := newContextHistory()
5585+
5586+
// Record contexts for multiple PRs
5587+
ch.recordContexts("org", "repo", 1, "sha1", []string{"test-1", "test-2"})
5588+
ch.recordContexts("org", "repo", 2, "sha2", []string{"test-1", "test-2"})
5589+
ch.recordContexts("org", "repo", 3, "sha3", []string{"test-1", "test-2"})
5590+
5591+
if len(ch.prCommitContexts) != 3 {
5592+
t.Errorf("expected 3 entries, got %d", len(ch.prCommitContexts))
5593+
}
5594+
5595+
// Only PR 1 and 2 are still active
5596+
activeKeys := sets.New[string](
5597+
"org/repo#1:sha1",
5598+
"org/repo#2:sha2",
5599+
)
5600+
ch.cleanup(activeKeys)
5601+
5602+
if len(ch.prCommitContexts) != 2 {
5603+
t.Errorf("expected 2 entries after cleanup, got %d", len(ch.prCommitContexts))
5604+
}
5605+
5606+
if !ch.prCommitContexts["org/repo#1:sha1"].Has("test-1") {
5607+
t.Error("PR 1 should still be tracked")
5608+
}
5609+
if !ch.prCommitContexts["org/repo#2:sha2"].Has("test-1") {
5610+
t.Error("PR 2 should still be tracked")
5611+
}
5612+
if _, exists := ch.prCommitContexts["org/repo#3:sha3"]; exists {
5613+
t.Error("PR 3 should have been cleaned up")
5614+
}
5615+
}

0 commit comments

Comments
 (0)