Skip to content

Commit c626587

Browse files
authored
send neutral status for failed jobs if they are fallible (#108)
Signed-off-by: Kent Rancourt <[email protected]>
1 parent a4d4dff commit c626587

File tree

9 files changed

+103
-14
lines changed

9 files changed

+103
-14
lines changed

charts/brigade-github-gateway/templates/monitor/deployment.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ spec:
4141
value: {{ .Values.monitor.listEventsInterval }}
4242
- name: EVENT_FOLLOW_UP_INTERVAL
4343
value: {{ .Values.monitor.eventFollowUpInterval }}
44+
- name: REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL
45+
value: {{ quote .Values.monitor.github.checkSuite.reportFallibleJobFailuresAsNeutral }}
4446
volumeMounts:
4547
- name: config
4648
mountPath: /app/config

charts/brigade-github-gateway/values.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,15 @@ monitor:
194194
## time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".
195195
eventFollowUpInterval: 30s
196196

197+
github:
198+
checkSuite:
199+
## Determines how a failed job that is ALLOWED to fail will be reported
200+
## upstream to GitHub. When set to false, failed jobs, even if allowed to
201+
## fail, will be reported as failures. When set to true, failed jobs will
202+
## be reported with a neutral status. This is defaulted to false for
203+
## backwards compatibility, but it is recommended to set this to true.
204+
reportFallibleJobFailuresAsNeutral: false
205+
197206
resources: {}
198207
# We usually recommend not to specify default resources and to leave this as
199208
# a conscious choice for the user. This also increases chances charts run on

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ replace k8s.io/client-go => k8s.io/client-go v0.18.2
77
require (
88
github.com/armon/circbuf v0.0.0-20190214190532-5111143e8da2
99
github.com/brigadecore/brigade-foundations v0.2.0
10-
github.com/brigadecore/brigade/sdk/v3 v3.0.0
10+
github.com/brigadecore/brigade/sdk/v3 v3.1.0
1111
github.com/dgrijalva/jwt-go v3.2.0+incompatible
1212
github.com/google/go-github/v33 v33.0.0
1313
github.com/gorilla/mux v1.8.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ github.com/armon/circbuf v0.0.0-20190214190532-5111143e8da2 h1:7Ip0wMmLHLRJdrloD
22
github.com/armon/circbuf v0.0.0-20190214190532-5111143e8da2/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
33
github.com/brigadecore/brigade-foundations v0.2.0 h1:pUtIQgN5Qa1I5JjpJOu+9E7J/kXC/QY2fzdGZt/I50I=
44
github.com/brigadecore/brigade-foundations v0.2.0/go.mod h1:edMgSJCUgfHN1RNGiiVOTRW4X4VykBLgssgWHPZK7Sg=
5-
github.com/brigadecore/brigade/sdk/v3 v3.0.0 h1:jCjKQuoDYK8J+P2Zpuc/IQK/GKx0M678AbD0GgxOvcM=
6-
github.com/brigadecore/brigade/sdk/v3 v3.0.0/go.mod h1:Ow91x3wvUtkyMsV6hwbPtVZevrcHqoH0Pjh0OID4Sh0=
5+
github.com/brigadecore/brigade/sdk/v3 v3.1.0 h1:HHQ7PbXoamBNBZbQiEnfuqjdTx2A/IKrYNkmXjfDb0I=
6+
github.com/brigadecore/brigade/sdk/v3 v3.1.0/go.mod h1:FEGeewbusnb0mZbqGtJsjbMYAQtnU9O2gZHHV1cFm1o=
77
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
88
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
99
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=

monitor/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,5 +65,10 @@ func getMonitorConfig() (monitorConfig, error) {
6565
}
6666
config.eventFollowUpInterval, err =
6767
os.GetDurationFromEnvVar("EVENT_FOLLOW_UP_INTERVAL", 30*time.Second)
68+
if err != nil {
69+
return config, err
70+
}
71+
config.reportFallibleJobFailuresAsNeutral, err =
72+
os.GetBoolFromEnvVar("REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", false)
6873
return config, err
6974
}

monitor/config_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,31 @@ func TestGetMonitorConfig(t *testing.T) {
167167
require.Contains(t, err.Error(), "was not parsable as a duration")
168168
},
169169
},
170+
171+
{
172+
name: "errors parsing REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL",
173+
setup: func() {
174+
appsFile, err := ioutil.TempFile("", "apps.json")
175+
require.NoError(t, err)
176+
defer appsFile.Close()
177+
_, err =
178+
appsFile.Write([]byte(`[{"appID":42,"apiKey":"foobar"}]`))
179+
require.NoError(t, err)
180+
t.Setenv("GITHUB_APPS_PATH", appsFile.Name())
181+
t.Setenv("LIST_EVENTS_INTERVAL", "1m")
182+
t.Setenv("EVENT_FOLLOW_UP_INTERVAL", "1m")
183+
t.Setenv("REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", "bogus")
184+
},
185+
assertions: func(cfg monitorConfig, err error) {
186+
require.Error(t, err)
187+
require.Contains(
188+
t,
189+
err.Error(),
190+
"REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL",
191+
)
192+
require.Contains(t, err.Error(), "was not parsable as a bool")
193+
},
194+
},
170195
{
171196
name: "success",
172197
setup: func() {
@@ -178,6 +203,7 @@ func TestGetMonitorConfig(t *testing.T) {
178203
require.NoError(t, err)
179204
t.Setenv("GITHUB_APPS_PATH", appsFile.Name())
180205
t.Setenv("EVENT_FOLLOW_UP_INTERVAL", "1m")
206+
t.Setenv("REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", "true")
181207
},
182208
assertions: func(cfg monitorConfig, err error) {
183209
require.NoError(t, err)
@@ -186,6 +212,7 @@ func TestGetMonitorConfig(t *testing.T) {
186212
require.Equal(t, "foobar", cfg.gitHubApps[42].APIKey)
187213
require.Equal(t, time.Minute, cfg.listEventsInterval)
188214
require.Equal(t, time.Minute, cfg.eventFollowUpInterval)
215+
require.True(t, cfg.reportFallibleJobFailuresAsNeutral)
189216
},
190217
},
191218
}

monitor/events.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ const (
2424
// nolint: misspell
2525
conclusionCanceled = "cancelled" // This is how GitHub spells it
2626
conclusionFailure = "failure"
27+
conclusionNeutral = "neutral"
2728
conclusionSuccess = "success"
2829
conclusionTimedOut = "timed_out"
2930
)
@@ -177,8 +178,10 @@ func (m *monitor) monitorEventInternal(
177178
continue // next job
178179
}
179180

180-
status, conclusion :=
181-
checkRunStatusAndConclusionFromJobStatus(job.Status.Phase)
181+
status, conclusion := m.checkRunStatusAndConclusionFromJobStatus(
182+
job.Status.Phase,
183+
job.Spec.Fallible,
184+
)
182185

183186
// Note: This will return an empty string if the job isn't in a terminal
184187
// phase
@@ -401,8 +404,9 @@ func (m *monitor) updateCheckRun(
401404
)
402405
}
403406

404-
func checkRunStatusAndConclusionFromJobStatus(
407+
func (m *monitor) checkRunStatusAndConclusionFromJobStatus(
405408
jobPhase sdk.JobPhase,
409+
fallible bool,
406410
) (string, string) {
407411
var status string
408412
var conclusion string
@@ -412,7 +416,11 @@ func checkRunStatusAndConclusionFromJobStatus(
412416
conclusion = conclusionCanceled
413417
case sdk.JobPhaseFailed, sdk.JobPhaseSchedulingFailed, sdk.JobPhaseUnknown: // nolint: lll
414418
status = statusCompleted
415-
conclusion = conclusionFailure
419+
if fallible && m.config.reportFallibleJobFailuresAsNeutral {
420+
conclusion = conclusionNeutral
421+
} else {
422+
conclusion = conclusionFailure
423+
}
416424
case sdk.JobPhasePending, sdk.JobPhaseStarting:
417425
status = statusQueued
418426
case sdk.JobPhaseRunning:

monitor/events_test.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,65 +711,102 @@ func TestUpdateCheckRun(t *testing.T) {
711711

712712
func TestCheckRunStatusAndConclusionFromJobStatus(t *testing.T) {
713713
testCases := []struct {
714+
name string
714715
jobPhase sdk.JobPhase
716+
jobIsFallible bool
717+
config monitorConfig
715718
expectedStatus string
716719
expectedConclusion string
717720
}{
718721
{
722+
name: "job was aborted",
719723
jobPhase: sdk.JobPhaseAborted,
720724
expectedStatus: statusCompleted,
721725
expectedConclusion: conclusionCanceled,
722726
},
723727
{
728+
name: "job was canceled",
724729
jobPhase: sdk.JobPhaseCanceled,
725730
expectedStatus: statusCompleted,
726731
expectedConclusion: conclusionCanceled,
727732
},
728733
{
734+
name: "job failed",
729735
jobPhase: sdk.JobPhaseFailed,
730736
expectedStatus: statusCompleted,
731737
expectedConclusion: conclusionFailure,
732738
},
733739
{
740+
name: "job failed; job is fallible; fallible not " +
741+
"reported as neutral",
742+
jobPhase: sdk.JobPhaseFailed,
743+
jobIsFallible: true,
744+
expectedStatus: statusCompleted,
745+
expectedConclusion: conclusionFailure,
746+
},
747+
{
748+
name: "job failed; job is fallible; fallible reported " +
749+
"as neutral",
750+
jobPhase: sdk.JobPhaseFailed,
751+
jobIsFallible: true,
752+
config: monitorConfig{
753+
reportFallibleJobFailuresAsNeutral: true,
754+
},
755+
expectedStatus: statusCompleted,
756+
expectedConclusion: conclusionNeutral,
757+
},
758+
{
759+
name: "job scheduling failed",
734760
jobPhase: sdk.JobPhaseSchedulingFailed,
735761
expectedStatus: statusCompleted,
736762
expectedConclusion: conclusionFailure,
737763
},
738764
{
765+
name: "job phase is unknown",
739766
jobPhase: sdk.JobPhaseUnknown,
740767
expectedStatus: statusCompleted,
741768
expectedConclusion: conclusionFailure,
742769
},
743770
{
771+
name: "job is pending",
744772
jobPhase: sdk.JobPhasePending,
745773
expectedStatus: statusQueued,
746774
expectedConclusion: "",
747775
},
748776
{
777+
name: "job is starting",
749778
jobPhase: sdk.JobPhaseStarting,
750779
expectedStatus: statusQueued,
751780
expectedConclusion: "",
752781
},
753782
{
783+
name: "job is running",
754784
jobPhase: sdk.JobPhaseRunning,
755785
expectedStatus: statusInProgress,
756786
expectedConclusion: "",
757787
},
758788
{
789+
name: "job has succeeded",
759790
jobPhase: sdk.JobPhaseSucceeded,
760791
expectedStatus: statusCompleted,
761792
expectedConclusion: conclusionSuccess,
762793
},
763794
{
795+
name: "job has timed out",
764796
jobPhase: sdk.JobPhaseTimedOut,
765797
expectedStatus: statusCompleted,
766798
expectedConclusion: conclusionTimedOut,
767799
},
768800
}
769801
for _, testCase := range testCases {
770-
t.Run(string(testCase.jobPhase), func(t *testing.T) {
771-
status, conclusion :=
772-
checkRunStatusAndConclusionFromJobStatus(testCase.jobPhase)
802+
t.Run(testCase.name, func(t *testing.T) {
803+
m := &monitor{
804+
config: testCase.config,
805+
}
806+
status, conclusion := m.checkRunStatusAndConclusionFromJobStatus(
807+
testCase.jobPhase,
808+
testCase.jobIsFallible,
809+
)
773810
require.Equal(t, testCase.expectedStatus, status)
774811
require.Equal(t, testCase.expectedConclusion, conclusion)
775812
})

monitor/monitor.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@ import (
1212

1313
// monitorConfig encapsulates configuration options for the monitor component.
1414
type monitorConfig struct {
15-
healthcheckInterval time.Duration
16-
listEventsInterval time.Duration
17-
eventFollowUpInterval time.Duration
18-
gitHubApps map[int64]github.App
15+
healthcheckInterval time.Duration
16+
listEventsInterval time.Duration
17+
eventFollowUpInterval time.Duration
18+
gitHubApps map[int64]github.App
19+
reportFallibleJobFailuresAsNeutral bool
1920
}
2021

2122
// monitor is a component that continuously monitors certain events that the

0 commit comments

Comments
 (0)