diff --git a/charts/brigade-github-gateway/templates/monitor/deployment.yaml b/charts/brigade-github-gateway/templates/monitor/deployment.yaml index 8e5fbd1..f9d0c28 100644 --- a/charts/brigade-github-gateway/templates/monitor/deployment.yaml +++ b/charts/brigade-github-gateway/templates/monitor/deployment.yaml @@ -41,6 +41,8 @@ spec: value: {{ .Values.monitor.listEventsInterval }} - name: EVENT_FOLLOW_UP_INTERVAL value: {{ .Values.monitor.eventFollowUpInterval }} + - name: REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL + value: {{ quote .Values.monitor.github.checkSuite.reportFallibleJobFailuresAsNeutral }} volumeMounts: - name: config mountPath: /app/config diff --git a/charts/brigade-github-gateway/values.yaml b/charts/brigade-github-gateway/values.yaml index 832a505..48e0275 100755 --- a/charts/brigade-github-gateway/values.yaml +++ b/charts/brigade-github-gateway/values.yaml @@ -194,6 +194,15 @@ monitor: ## time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". eventFollowUpInterval: 30s + github: + checkSuite: + ## Determines how a failed job that is ALLOWED to fail will be reported + ## upstream to GitHub. When set to false, failed jobs, even if allowed to + ## fail, will be reported as failures. When set to true, failed jobs will + ## be reported with a neutral status. This is defaulted to false for + ## backwards compatibility, but it is recommended to set this to true. + reportFallibleJobFailuresAsNeutral: false + resources: {} # We usually recommend not to specify default resources and to leave this as # a conscious choice for the user. This also increases chances charts run on diff --git a/go.mod b/go.mod index 10e8baf..461af30 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ replace k8s.io/client-go => k8s.io/client-go v0.18.2 require ( github.com/armon/circbuf v0.0.0-20190214190532-5111143e8da2 github.com/brigadecore/brigade-foundations v0.2.0 - github.com/brigadecore/brigade/sdk/v3 v3.0.0 + github.com/brigadecore/brigade/sdk/v3 v3.1.0 github.com/dgrijalva/jwt-go v3.2.0+incompatible github.com/google/go-github/v33 v33.0.0 github.com/gorilla/mux v1.8.0 diff --git a/go.sum b/go.sum index b7d588a..a3cabc3 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,8 @@ github.com/armon/circbuf v0.0.0-20190214190532-5111143e8da2 h1:7Ip0wMmLHLRJdrloD github.com/armon/circbuf v0.0.0-20190214190532-5111143e8da2/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/brigadecore/brigade-foundations v0.2.0 h1:pUtIQgN5Qa1I5JjpJOu+9E7J/kXC/QY2fzdGZt/I50I= github.com/brigadecore/brigade-foundations v0.2.0/go.mod h1:edMgSJCUgfHN1RNGiiVOTRW4X4VykBLgssgWHPZK7Sg= -github.com/brigadecore/brigade/sdk/v3 v3.0.0 h1:jCjKQuoDYK8J+P2Zpuc/IQK/GKx0M678AbD0GgxOvcM= -github.com/brigadecore/brigade/sdk/v3 v3.0.0/go.mod h1:Ow91x3wvUtkyMsV6hwbPtVZevrcHqoH0Pjh0OID4Sh0= +github.com/brigadecore/brigade/sdk/v3 v3.1.0 h1:HHQ7PbXoamBNBZbQiEnfuqjdTx2A/IKrYNkmXjfDb0I= +github.com/brigadecore/brigade/sdk/v3 v3.1.0/go.mod h1:FEGeewbusnb0mZbqGtJsjbMYAQtnU9O2gZHHV1cFm1o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= diff --git a/monitor/config.go b/monitor/config.go index 7e388d9..46fc81d 100644 --- a/monitor/config.go +++ b/monitor/config.go @@ -65,5 +65,10 @@ func getMonitorConfig() (monitorConfig, error) { } config.eventFollowUpInterval, err = os.GetDurationFromEnvVar("EVENT_FOLLOW_UP_INTERVAL", 30*time.Second) + if err != nil { + return config, err + } + config.reportFallibleJobFailuresAsNeutral, err = + os.GetBoolFromEnvVar("REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", false) return config, err } diff --git a/monitor/config_test.go b/monitor/config_test.go index ed9adda..ddb737a 100644 --- a/monitor/config_test.go +++ b/monitor/config_test.go @@ -167,6 +167,31 @@ func TestGetMonitorConfig(t *testing.T) { require.Contains(t, err.Error(), "was not parsable as a duration") }, }, + + { + name: "errors parsing REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", + setup: func() { + appsFile, err := ioutil.TempFile("", "apps.json") + require.NoError(t, err) + defer appsFile.Close() + _, err = + appsFile.Write([]byte(`[{"appID":42,"apiKey":"foobar"}]`)) + require.NoError(t, err) + t.Setenv("GITHUB_APPS_PATH", appsFile.Name()) + t.Setenv("LIST_EVENTS_INTERVAL", "1m") + t.Setenv("EVENT_FOLLOW_UP_INTERVAL", "1m") + t.Setenv("REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", "bogus") + }, + assertions: func(cfg monitorConfig, err error) { + require.Error(t, err) + require.Contains( + t, + err.Error(), + "REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", + ) + require.Contains(t, err.Error(), "was not parsable as a bool") + }, + }, { name: "success", setup: func() { @@ -178,6 +203,7 @@ func TestGetMonitorConfig(t *testing.T) { require.NoError(t, err) t.Setenv("GITHUB_APPS_PATH", appsFile.Name()) t.Setenv("EVENT_FOLLOW_UP_INTERVAL", "1m") + t.Setenv("REPORT_FALLIBLE_JOB_FAILURES_AS_NEUTRAL", "true") }, assertions: func(cfg monitorConfig, err error) { require.NoError(t, err) @@ -186,6 +212,7 @@ func TestGetMonitorConfig(t *testing.T) { require.Equal(t, "foobar", cfg.gitHubApps[42].APIKey) require.Equal(t, time.Minute, cfg.listEventsInterval) require.Equal(t, time.Minute, cfg.eventFollowUpInterval) + require.True(t, cfg.reportFallibleJobFailuresAsNeutral) }, }, } diff --git a/monitor/events.go b/monitor/events.go index 670ec98..8d9f52d 100644 --- a/monitor/events.go +++ b/monitor/events.go @@ -24,6 +24,7 @@ const ( // nolint: misspell conclusionCanceled = "cancelled" // This is how GitHub spells it conclusionFailure = "failure" + conclusionNeutral = "neutral" conclusionSuccess = "success" conclusionTimedOut = "timed_out" ) @@ -177,8 +178,10 @@ func (m *monitor) monitorEventInternal( continue // next job } - status, conclusion := - checkRunStatusAndConclusionFromJobStatus(job.Status.Phase) + status, conclusion := m.checkRunStatusAndConclusionFromJobStatus( + job.Status.Phase, + job.Spec.Fallible, + ) // Note: This will return an empty string if the job isn't in a terminal // phase @@ -401,8 +404,9 @@ func (m *monitor) updateCheckRun( ) } -func checkRunStatusAndConclusionFromJobStatus( +func (m *monitor) checkRunStatusAndConclusionFromJobStatus( jobPhase sdk.JobPhase, + fallible bool, ) (string, string) { var status string var conclusion string @@ -412,7 +416,11 @@ func checkRunStatusAndConclusionFromJobStatus( conclusion = conclusionCanceled case sdk.JobPhaseFailed, sdk.JobPhaseSchedulingFailed, sdk.JobPhaseUnknown: // nolint: lll status = statusCompleted - conclusion = conclusionFailure + if fallible && m.config.reportFallibleJobFailuresAsNeutral { + conclusion = conclusionNeutral + } else { + conclusion = conclusionFailure + } case sdk.JobPhasePending, sdk.JobPhaseStarting: status = statusQueued case sdk.JobPhaseRunning: diff --git a/monitor/events_test.go b/monitor/events_test.go index 3215b28..0c98696 100644 --- a/monitor/events_test.go +++ b/monitor/events_test.go @@ -711,65 +711,102 @@ func TestUpdateCheckRun(t *testing.T) { func TestCheckRunStatusAndConclusionFromJobStatus(t *testing.T) { testCases := []struct { + name string jobPhase sdk.JobPhase + jobIsFallible bool + config monitorConfig expectedStatus string expectedConclusion string }{ { + name: "job was aborted", jobPhase: sdk.JobPhaseAborted, expectedStatus: statusCompleted, expectedConclusion: conclusionCanceled, }, { + name: "job was canceled", jobPhase: sdk.JobPhaseCanceled, expectedStatus: statusCompleted, expectedConclusion: conclusionCanceled, }, { + name: "job failed", jobPhase: sdk.JobPhaseFailed, expectedStatus: statusCompleted, expectedConclusion: conclusionFailure, }, { + name: "job failed; job is fallible; fallible not " + + "reported as neutral", + jobPhase: sdk.JobPhaseFailed, + jobIsFallible: true, + expectedStatus: statusCompleted, + expectedConclusion: conclusionFailure, + }, + { + name: "job failed; job is fallible; fallible reported " + + "as neutral", + jobPhase: sdk.JobPhaseFailed, + jobIsFallible: true, + config: monitorConfig{ + reportFallibleJobFailuresAsNeutral: true, + }, + expectedStatus: statusCompleted, + expectedConclusion: conclusionNeutral, + }, + { + name: "job scheduling failed", jobPhase: sdk.JobPhaseSchedulingFailed, expectedStatus: statusCompleted, expectedConclusion: conclusionFailure, }, { + name: "job phase is unknown", jobPhase: sdk.JobPhaseUnknown, expectedStatus: statusCompleted, expectedConclusion: conclusionFailure, }, { + name: "job is pending", jobPhase: sdk.JobPhasePending, expectedStatus: statusQueued, expectedConclusion: "", }, { + name: "job is starting", jobPhase: sdk.JobPhaseStarting, expectedStatus: statusQueued, expectedConclusion: "", }, { + name: "job is running", jobPhase: sdk.JobPhaseRunning, expectedStatus: statusInProgress, expectedConclusion: "", }, { + name: "job has succeeded", jobPhase: sdk.JobPhaseSucceeded, expectedStatus: statusCompleted, expectedConclusion: conclusionSuccess, }, { + name: "job has timed out", jobPhase: sdk.JobPhaseTimedOut, expectedStatus: statusCompleted, expectedConclusion: conclusionTimedOut, }, } for _, testCase := range testCases { - t.Run(string(testCase.jobPhase), func(t *testing.T) { - status, conclusion := - checkRunStatusAndConclusionFromJobStatus(testCase.jobPhase) + t.Run(testCase.name, func(t *testing.T) { + m := &monitor{ + config: testCase.config, + } + status, conclusion := m.checkRunStatusAndConclusionFromJobStatus( + testCase.jobPhase, + testCase.jobIsFallible, + ) require.Equal(t, testCase.expectedStatus, status) require.Equal(t, testCase.expectedConclusion, conclusion) }) diff --git a/monitor/monitor.go b/monitor/monitor.go index 25ee9db..f2658c3 100644 --- a/monitor/monitor.go +++ b/monitor/monitor.go @@ -12,10 +12,11 @@ import ( // monitorConfig encapsulates configuration options for the monitor component. type monitorConfig struct { - healthcheckInterval time.Duration - listEventsInterval time.Duration - eventFollowUpInterval time.Duration - gitHubApps map[int64]github.App + healthcheckInterval time.Duration + listEventsInterval time.Duration + eventFollowUpInterval time.Duration + gitHubApps map[int64]github.App + reportFallibleJobFailuresAsNeutral bool } // monitor is a component that continuously monitors certain events that the