Skip to content

Commit 3d7f03a

Browse files
authored
Cleanup and update test code
The cleanup orphaned pipelines test (that's not really a test, more a maintenance task we need to run every once in a while) needs some serious TLC. It uses the testcase struct as a concurrency controller, and its logic is generally really confusing. This PR updates the cleanup test to use more standard go idioms for concurrency control, generally makes it a bit easier to read, and updates it so that it also deletes orphaned cluster queues. It also updates go-buildkite to the latest version -- we were fairly seriously behind
2 parents 89ce630 + 6e187dc commit 3d7f03a

File tree

10 files changed

+191
-137
lines changed

10 files changed

+191
-137
lines changed

.buildkite/pipeline.yaml

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,22 @@ x-anchors:
1313
checkout:
1414
fetchFlags: -v --tags
1515

16+
run-tests: &run-tests
17+
artifact_paths: junit-*.xml
18+
secrets:
19+
INTEGRATION_TEST_BUILDKITE_TOKEN: integration_test_buildkite_api_token
20+
BUILDKITE_ANALYTICS_TOKEN: test_engine_suite_token
21+
image: golang:1.25.5@sha256:a22b2e6c5e753345b9759fba9e5c1731ebe28af506745e98f406cc85d50c828e
22+
command: .buildkite/steps/tests.sh
23+
plugins:
24+
- kubernetes:
25+
podTemplate: go-with-cache
26+
podSpecPatch:
27+
serviceAccountName: integration-tests
28+
- test-collector:
29+
files: junit-*.xml
30+
format: junit
31+
1632
agents:
1733
queue: kubernetes
1834

@@ -47,22 +63,26 @@ steps:
4763
command: .buildkite/steps/agent.sh
4864

4965
- label: ":buildkite::test_tube: tests"
66+
<<: *run-tests
5067
key: tests
5168
depends_on: agent
52-
artifact_paths: junit-*.xml
53-
secrets:
54-
INTEGRATION_TEST_BUILDKITE_TOKEN: integration_test_buildkite_api_token
55-
BUILDKITE_ANALYTICS_TOKEN: test_engine_suite_token
56-
image: golang:1.25.5@sha256:a22b2e6c5e753345b9759fba9e5c1731ebe28af506745e98f406cc85d50c828e
57-
command: .buildkite/steps/tests.sh
69+
70+
- input: "Cleanup orphaned buildkite resources?"
71+
key: cleanup-orphans-block
72+
depends_on: tests
73+
74+
- label: ":broom::buildkite: Clean up orphaned pipelines and queues"
75+
<<: *run-tests
76+
key: cleanup-orphans
77+
depends_on: cleanup-orphans-block
78+
command: .buildkite/steps/tests.sh -v -run TestCleanupOrphanedResources
79+
env:
80+
CLEANUP_PIPELINES: "true"
5881
plugins:
5982
- kubernetes:
6083
podTemplate: go-with-cache
6184
podSpecPatch:
6285
serviceAccountName: integration-tests
63-
- test-collector:
64-
files: junit-*.xml
65-
format: junit
6686

6787
- label: ":docker: build and push controller image"
6888
key: controller

.buildkite/steps/tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,5 @@ export AGENT_TOKEN_SECRET="agent-stack-k8s-secrets"
1717
go tool gotestsum --junitfile "junit-${BUILDKITE_JOB_ID}.xml" -- \
1818
-count=1 \
1919
-ldflags="-X ${package}.branch=${branch}" \
20+
"$@" \
2021
./...

cmd/linter/linter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"os"
1111

1212
"github.com/buildkite/agent-stack-k8s/v2/internal/controller/scheduler"
13-
"github.com/buildkite/go-buildkite/v3/buildkite"
13+
"github.com/buildkite/go-buildkite/v4"
1414
"github.com/go-playground/validator/v10"
1515
"github.com/spf13/cobra"
1616
"github.com/xeipuuv/gojsonschema"

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ go 1.25.4
55
require (
66
github.com/Khan/genqlient v0.8.1
77
github.com/buildkite/agent/v3 v3.115.2
8-
github.com/buildkite/go-buildkite/v3 v3.13.0
8+
github.com/buildkite/go-buildkite/v4 v4.13.1
99
github.com/buildkite/roko v1.4.0
1010
github.com/buildkite/stacksapi v1.0.0
1111
github.com/distribution/reference v0.6.0
@@ -98,7 +98,7 @@ require (
9898
github.com/buildkite/go-pipeline v0.16.0 // indirect
9999
github.com/buildkite/interpolate v0.1.5 // indirect
100100
github.com/buildkite/shellwords v1.0.1 // indirect
101-
github.com/cenkalti/backoff v1.1.1-0.20171020064038-309aa717adbf // indirect
101+
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
102102
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
103103
github.com/cespare/xxhash/v2 v2.3.0 // indirect
104104
github.com/cihub/seelog v0.0.0-20170130134532-f561c5e57575 // indirect

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ github.com/buildkite/agent/v3 v3.115.2 h1:26A/dEabfzjorS3Wh/low+yOBM/u8QaT59BYWu
134134
github.com/buildkite/agent/v3 v3.115.2/go.mod h1:a3t090/PPxAIIPCjlXF5fhfRvG0E9huFsnMX7B76iIQ=
135135
github.com/buildkite/bintest/v3 v3.3.0 h1:RTWcSaJRlOT6t/K311ejPf+0J3LE/QEODzVG3vlLnWo=
136136
github.com/buildkite/bintest/v3 v3.3.0/go.mod h1:btqpTsVODiJcb0NMdkkmtMQ6xoFc2W/nY5yy+3I0zcs=
137-
github.com/buildkite/go-buildkite/v3 v3.13.0 h1:8DxSmai9UND0n81BlKqNyCuLmiUSDroMhXq+BF28e7E=
138-
github.com/buildkite/go-buildkite/v3 v3.13.0/go.mod h1:ux2rjcqNoE54wfFHO3Vlet+a57Tt2jqOqfc9Afs7R7g=
137+
github.com/buildkite/go-buildkite/v4 v4.13.1 h1:3PhrShdQwlZ2+OIWy0QbxbbjP6M97NCvarlIB6Oye+k=
138+
github.com/buildkite/go-buildkite/v4 v4.13.1/go.mod h1:DlebrRJqpZttXDjCW+MJ1QyW9AN++ZWt/UbPtKdbSSk=
139139
github.com/buildkite/go-pipeline v0.16.0 h1:wEgWUMRAgSg1ZnWOoA3AovtYYdTvN0dLY1zwUWmPP+4=
140140
github.com/buildkite/go-pipeline v0.16.0/go.mod h1:VE37qY3X5pmAKKUMoDZvPsHOQuyakB9cmXj9Qn6QasA=
141141
github.com/buildkite/interpolate v0.1.5 h1:v2Ji3voik69UZlbfoqzx+qfcsOKLA61nHdU79VV+tPU=
@@ -148,8 +148,8 @@ github.com/buildkite/stacksapi v1.0.0 h1:qZ/sU6zpeSI3Izk+RjHfhhpK35g9pJQ0KETjoI/
148148
github.com/buildkite/stacksapi v1.0.0/go.mod h1:JffsOjAtQW5sX1s0IN6B6KhnE6jWgJNFmv82687M8jY=
149149
github.com/buildkite/zstash v0.7.0 h1:lKDGLaRjdjh7M0ItRfpy9HIUv02r1bszKC/8fupDjXY=
150150
github.com/buildkite/zstash v0.7.0/go.mod h1:BGBukIwUaM3r/ypdQWyaDIP3izQj1PSauoAS6fOaYwg=
151-
github.com/cenkalti/backoff v1.1.1-0.20171020064038-309aa717adbf h1:yxlp0s+Sge9UsKEK0Bsvjiopb9XRk+vxylmZ9eGBfm8=
152-
github.com/cenkalti/backoff v1.1.1-0.20171020064038-309aa717adbf/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
151+
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
152+
github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
153153
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
154154
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
155155
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=

internal/integration/cleanup_test.go

Lines changed: 94 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,92 +4,133 @@ import (
44
"context"
55
"fmt"
66
"net/http"
7+
"strings"
8+
"sync"
79
"testing"
810
"time"
911

1012
"github.com/Khan/genqlient/graphql"
1113
"github.com/buildkite/agent-stack-k8s/v2/internal/integration/api"
14+
"github.com/buildkite/go-buildkite/v4"
1215
"github.com/buildkite/roko"
13-
"github.com/stretchr/testify/assert"
14-
"github.com/stretchr/testify/require"
1516
)
1617

17-
func TestCleanupOrphanedPipelines(t *testing.T) {
18+
func TestCleanupOrphanedResources(t *testing.T) {
1819
if !cleanupPipelines {
1920
t.Skip("not cleaning orphaned pipelines")
2021
}
2122

2223
ctx := context.Background()
2324
graphqlClient := api.NewGraphQLClient(cfg.BuildkiteToken, cfg.GraphQLEndpoint)
24-
orgSlug := getOrgSlug(t)
25+
tc := testcase{T: t}.Init()
26+
orgSlug := tc.Org
27+
clusterUUID := tc.ClusterUUID
2528

2629
allPipelines, err := searchAllPipelines(ctx, t, graphqlClient, orgSlug, "test-")
27-
require.NoError(t, err)
30+
if err != nil {
31+
t.Fatalf("failed to search for pipelines: %v", err)
32+
}
2833

2934
t.Logf("found %d total pipelines to delete", len(allPipelines))
3035

36+
bkClient, err := buildkite.NewOpts(buildkite.WithTokenAuth(cfg.BuildkiteToken))
37+
if err != nil {
38+
t.Fatalf("failed to create buildkite client: %v", err)
39+
}
40+
41+
// First, go through all the pipelines and cancel any running builds they might have
42+
wg := sync.WaitGroup{}
3143
for _, pipeline := range allPipelines {
32-
t.Run(pipeline.Node.Name, func(t *testing.T) {
33-
tc := testcase{
34-
T: t,
35-
GraphQL: graphqlClient,
36-
PipelineName: pipeline.Node.Name,
37-
}.Init()
38-
39-
builds, err := api.GetBuilds(
40-
ctx,
41-
graphqlClient,
42-
fmt.Sprintf("%s/%s", tc.Org, pipeline.Node.Name),
43-
[]api.BuildStates{api.BuildStatesRunning},
44-
100,
45-
)
46-
require.NoError(t, err)
47-
48-
for _, build := range builds.Pipeline.Builds.Edges {
49-
_, err = api.BuildCancel(
50-
ctx,
51-
graphqlClient,
52-
api.BuildCancelInput{Id: build.Node.Id},
53-
)
54-
assert.NoError(t, err)
44+
wg.Go(func() {
45+
cancelAllBuildsForPipeline(ctx, t, graphqlClient, orgSlug, pipeline.Node.Name)
46+
47+
// Then, delete the pipeline
48+
deletePipeline(ctx, t, bkClient, orgSlug, pipeline.Node.Name)
49+
})
50+
}
51+
52+
wg.Wait()
53+
54+
// Now that all the pipelines have been deleted, go through and delete all the queues that are hanging around
55+
queues, _, err := bkClient.ClusterQueues.List(ctx, orgSlug, clusterUUID, nil)
56+
if err != nil {
57+
t.Fatalf("failed to list queues: %v", err)
58+
}
59+
60+
testQueues := make([]buildkite.ClusterQueue, 0, len(queues))
61+
for _, queue := range queues {
62+
if strings.HasPrefix(queue.Key, "test-") {
63+
testQueues = append(testQueues, queue)
64+
}
65+
}
66+
67+
t.Logf("found %d test queues to clean up", len(testQueues))
68+
69+
for _, queue := range testQueues {
70+
wg.Go(func() {
71+
_, err := bkClient.ClusterQueues.Delete(ctx, orgSlug, clusterUUID, queue.ID)
72+
if err != nil {
73+
t.Errorf("failed to delete queue %q (%s): %v", queue.Key, queue.ID, err)
74+
return
5575
}
5676

57-
tc.deletePipeline(ctx)
77+
t.Logf("deleted queue %q", queue.Key)
5878
})
5979
}
80+
81+
wg.Wait()
6082
}
6183

62-
func (t testcase) deletePipeline(ctx context.Context) {
84+
func cancelAllBuildsForPipeline(ctx context.Context, t *testing.T, graphqlClient graphql.Client, orgSlug, pipelineSlug string) {
6385
t.Helper()
6486

65-
EnsureCleanup(t.T, func() {
66-
if err := roko.NewRetrier(
67-
roko.WithMaxAttempts(10),
68-
roko.WithStrategy(roko.Exponential(time.Second, 5*time.Second)),
69-
).DoWithContext(ctx, func(r *roko.Retrier) error {
70-
resp, err := t.Buildkite.Pipelines.Delete(t.Org, t.PipelineName)
87+
builds, err := api.GetBuilds(ctx, graphqlClient, fmt.Sprintf("%s/%s", orgSlug, pipelineSlug), []api.BuildStates{api.BuildStatesRunning}, 100)
88+
if err != nil {
89+
t.Errorf("failed to get builds for pipeline %q while attempting to delete it: %v", pipelineSlug, err)
90+
return
91+
}
92+
93+
t.Logf("Found %d builds to cancel for pipeline %q", len(builds.Pipeline.Builds.Edges), pipelineSlug)
94+
95+
wg := sync.WaitGroup{}
96+
for _, build := range builds.Pipeline.Builds.Edges {
97+
wg.Go(func() {
98+
_, err = api.BuildCancel(ctx, graphqlClient, api.BuildCancelInput{Id: build.Node.Id})
7199
if err != nil {
72-
if resp.StatusCode == http.StatusNotFound {
73-
return nil
74-
}
75-
t.Logf("waiting for build to be canceled on pipeline %s", t.PipelineName)
76-
return err
100+
t.Errorf("failed to cancel build %q while attempting to delete pipeline %q: %v", build.Node.Id, pipelineSlug, err)
101+
return
77102
}
78-
return nil
79-
}); err != nil {
80-
t.Errorf("failed to cleanup pipeline %s: %v", t.PipelineName, err)
81-
return
82-
}
83103

84-
t.Logf("deleted pipeline! %s", t.PipelineName)
85-
})
104+
t.Logf("cancelled build %q", build.Node.Id)
105+
})
106+
}
107+
108+
wg.Wait()
86109
}
87110

88-
func getOrgSlug(t *testing.T) string {
89-
tc := testcase{
90-
T: t,
91-
}.Init()
92-
return tc.Org
111+
func deletePipeline(ctx context.Context, t *testing.T, apiClient *buildkite.Client, orgSlug, pipelineSlug string) {
112+
t.Helper()
113+
114+
if err := roko.NewRetrier(
115+
roko.WithMaxAttempts(10),
116+
roko.WithStrategy(roko.Exponential(time.Second, 5*time.Second)),
117+
).DoWithContext(ctx, func(r *roko.Retrier) error {
118+
resp, err := apiClient.Pipelines.Delete(ctx, orgSlug, pipelineSlug)
119+
if err != nil {
120+
if resp.StatusCode == http.StatusNotFound {
121+
return nil
122+
}
123+
124+
return err
125+
}
126+
127+
return nil
128+
}); err != nil {
129+
t.Errorf("failed to delete pipeline %q: %v", pipelineSlug, err)
130+
return
131+
}
132+
133+
t.Logf("deleted pipeline %q", pipelineSlug)
93134
}
94135

95136
// searchAllPipelines fetches all pipelines matching the search prefix, paginating through all results.

internal/integration/integration_test.go

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func TestDefaultQueue(t *testing.T) {
112112
"queue=default",
113113
"pseudoQueue="+tc.ShortPipelineName(),
114114
)
115-
build := tc.TriggerBuild(ctx, *pipeline.GraphQLID)
115+
build := tc.TriggerBuild(ctx, pipeline.GraphQLID)
116116
tc.AssertSuccess(ctx, build)
117117
tc.AssertLogsContain(build, "Hi there")
118118
}
@@ -141,7 +141,7 @@ func TestExplicitDefaultQueue(t *testing.T) {
141141
tc.StartController(ctx, cfg,
142142
"pseudoQueue="+tc.ShortPipelineName(),
143143
)
144-
build := tc.TriggerBuild(ctx, *pipeline.GraphQLID)
144+
build := tc.TriggerBuild(ctx, pipeline.GraphQLID)
145145
tc.AssertSuccess(ctx, build)
146146
tc.AssertLogsContain(build, "Hi there")
147147
}
@@ -367,19 +367,14 @@ func TestMaxInFlightLimited(t *testing.T) {
367367
buildID := tc.TriggerBuild(ctx, pipelineID).Number
368368

369369
for {
370-
build, _, err := tc.Buildkite.Builds.Get(
371-
tc.Org,
372-
tc.PipelineName,
373-
strconv.Itoa(buildID),
374-
nil,
375-
)
370+
build, _, err := tc.Buildkite.Builds.Get(ctx, tc.Org, tc.PipelineName, strconv.Itoa(buildID), nil)
376371
if err != nil {
377372
t.Fatalf("tc.Buildkite.Builds.Get(%q, %q, %d, nil) error = %v", tc.Org, tc.PipelineName, buildID, err)
378373
}
379374

380-
switch *build.State {
375+
switch build.State {
381376
case "running":
382-
if got, want := *build.Pipeline.RunningJobsCount, cfg.MaxInFlight; got > want {
377+
if got, want := build.Pipeline.RunningJobsCount, cfg.MaxInFlight; got > want {
383378
t.Fatalf("*build.Pipeline.RunningJobsCount = %d, want <= %d", got, want)
384379
}
385380

@@ -390,7 +385,7 @@ func TestMaxInFlightLimited(t *testing.T) {
390385
t.Log("waiting for build to start")
391386

392387
default:
393-
t.Fatalf("unexpected build state: %v", *build.State)
388+
t.Fatalf("unexpected build state: %v", build.State)
394389
}
395390

396391
// Don't want to hammer the API.
@@ -417,21 +412,16 @@ func TestMaxInFlightUnlimited(t *testing.T) {
417412
maxRunningJobs := 0
418413
fetchBuildStateLoop:
419414
for {
420-
build, _, err := tc.Buildkite.Builds.Get(
421-
tc.Org,
422-
tc.PipelineName,
423-
strconv.Itoa(buildID),
424-
nil,
425-
)
415+
build, _, err := tc.Buildkite.Builds.Get(ctx, tc.Org, tc.PipelineName, strconv.Itoa(buildID), nil)
426416
if err != nil {
427417
t.Fatalf("tc.Buildkite.Builds.Get(%q, %q, %d, nil) error = %v", tc.Org, tc.PipelineName, buildID, err)
428418
}
429419

430-
switch *build.State {
420+
switch build.State {
431421
case "running":
432422
runningJobs := 0
433423
for _, job := range build.Jobs {
434-
if *job.State == "running" {
424+
if job.State == "running" {
435425
runningJobs++
436426
}
437427
}
@@ -445,7 +435,7 @@ fetchBuildStateLoop:
445435
t.Log("waiting for build to start")
446436

447437
default:
448-
t.Fatalf("unexpected build state: %v", *build.State)
438+
t.Fatalf("unexpected build state: %v", build.State)
449439
}
450440

451441
// Don't want to hammer the API.

0 commit comments

Comments
 (0)