Skip to content

Commit 5fe9c4f

Browse files
Merge pull request #312 from uselagoon/fix-queue-cancellations
refactor: build qos and cache logic
2 parents ea898a6 + 6b39796 commit 5fe9c4f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1378
-1100
lines changed

api/lagoon/v1beta2/lagoonbuild_helpers.go

Lines changed: 297 additions & 29 deletions
Large diffs are not rendered by default.

api/lagoon/v1beta2/lagoonbuild_helpers_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func TestSortQueuedBuilds(t *testing.T) {
115115
}
116116
}
117117

118-
func TestSortQueuedNamespaceBuilds(t *testing.T) {
118+
func TestSortQueuedNamespaceBuildsByCreation(t *testing.T) {
119119
type args struct {
120120
namespace string
121121
pendingBuilds []string
@@ -127,7 +127,7 @@ func TestSortQueuedNamespaceBuilds(t *testing.T) {
127127
wantErr bool
128128
}{
129129
{
130-
name: "test1 - namespace1 builds only sorted by priority then creation",
130+
name: "test1 - namespace1 builds only sorted by creation",
131131
args: args{
132132
namespace: "namespace1",
133133
pendingBuilds: []string{
@@ -154,13 +154,13 @@ func TestSortQueuedNamespaceBuilds(t *testing.T) {
154154
}
155155
for _, tt := range tests {
156156
t.Run(tt.name, func(t *testing.T) {
157-
got, err := SortQueuedNamespaceBuilds(tt.args.namespace, tt.args.pendingBuilds)
157+
got, err := SortQueuedNamespaceBuildsByCreation(tt.args.namespace, tt.args.pendingBuilds)
158158
if (err != nil) != tt.wantErr {
159-
t.Errorf("SortQueuedNamespaceBuilds() error = %v, wantErr %v", err, tt.wantErr)
159+
t.Errorf("SortQueuedNamespaceBuildsByCreation() error = %v, wantErr %v", err, tt.wantErr)
160160
return
161161
}
162162
if !reflect.DeepEqual(got, tt.want) {
163-
t.Errorf("SortQueuedNamespaceBuilds() = %v, want %v", got, tt.want)
163+
t.Errorf("SortQueuedNamespaceBuildsByCreation() = %v, want %v", got, tt.want)
164164
}
165165
})
166166
}

api/lagoon/v1beta2/lagoonbuild_types.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package v1beta2
1818
import (
1919
"encoding/json"
2020
"strings"
21+
"time"
2122

2223
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2324
)
@@ -182,6 +183,7 @@ type CachedBuildItem struct {
182183
Step string `json:"step"`
183184
DockerBuild bool `json:"dockerBuild"`
184185
CreationTimestamp int64 `json:"creationTimestamp"`
186+
UpdatedTimestamp int64 `json:"updatedTimestamp"`
185187
}
186188

187189
func (q *CachedBuildItem) String() string {
@@ -203,6 +205,7 @@ func NewCachedBuildItem(lagoonBuild LagoonBuild, status string, dockerBuild bool
203205
Step: lagoonBuild.Labels["lagoon.sh/buildStep"],
204206
DockerBuild: dockerBuild,
205207
CreationTimestamp: lagoonBuild.CreationTimestamp.Unix(),
208+
UpdatedTimestamp: time.Now().UTC().Unix(),
206209
}
207210
}
208211

@@ -213,6 +216,7 @@ type CachedBuildQueueItem struct {
213216
Position int `json:"position"`
214217
Length int `json:"length"`
215218
CreationTimestamp int64 `json:"creationTimestamp"`
219+
UpdatedTimestamp int64 `json:"updatedTimestamp"`
216220
}
217221

218222
func (q *CachedBuildQueueItem) String() string {
@@ -234,5 +238,21 @@ func NewCachedBuildQueueItem(lagoonBuild LagoonBuild, priority, position, length
234238
Position: position,
235239
Length: length,
236240
CreationTimestamp: lagoonBuild.CreationTimestamp.Unix(),
241+
UpdatedTimestamp: time.Now().UTC().Unix(),
237242
}
238243
}
244+
245+
const (
246+
BuildFinalizer = "finalizer.lagoonbuild.crd.lagoon.sh/v1beta2"
247+
248+
// NotOwnedByControllerMessage is used to describe an error where the controller was unable to start the build because
249+
// the `lagoon.sh/controller` label does not match this controllers name
250+
NotOwnedByControllerMessage = `Build was cancelled due to an issue with the build controller.
251+
This issue is related to the deployment system, not the repository or code base changes.
252+
Contact your Lagoon support team for help`
253+
// MissingLabelsMessage is used to describe an error where the controller was unable to start the build because
254+
// the `lagoon.sh/controller` label is missing
255+
MissingLabelsMessage = `"Build was cancelled due to namespace configuration issue. A label or labels are missing on the namespace.
256+
This issue is related to the deployment system, not the repository or code base changes.
257+
Contact your Lagoon support team for help`
258+
)

api/lagoon/v1beta2/lagoontask_helpers.go

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ import (
88
"time"
99

1010
"github.com/go-logr/logr"
11+
lru "github.com/hashicorp/golang-lru/v2"
1112
"github.com/uselagoon/machinery/api/schema"
1213
"github.com/uselagoon/remote-controller/internal/helpers"
1314
corev1 "k8s.io/api/core/v1"
1415
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1516
"k8s.io/apimachinery/pkg/labels"
17+
runtime "k8s.io/apimachinery/pkg/runtime"
1618
"k8s.io/apimachinery/pkg/selection"
1719
"k8s.io/apimachinery/pkg/types"
1820
ctrl "sigs.k8s.io/controller-runtime"
@@ -240,10 +242,8 @@ func updateLagoonTask(namespace string, taskSpec LagoonTaskSpec) ([]byte, error)
240242
}
241243

242244
// CancelTask handles cancelling tasks or handling if a tasks no longer exists.
243-
func CancelTask(ctx context.Context, cl client.Client, namespace string, body []byte) (bool, []byte, error) {
245+
func CancelTask(ctx context.Context, cl client.Client, namespace string, jobSpec *LagoonTaskSpec) (bool, []byte, error) {
244246
opLog := ctrl.Log.WithName("handlers").WithName("LagoonTasks")
245-
jobSpec := &LagoonTaskSpec{}
246-
_ = json.Unmarshal(body, jobSpec)
247247
var jobPod corev1.Pod
248248
// @TODO: use `taskName` in the future only
249249
taskName := fmt.Sprintf("lagoon-task-%s-%s", jobSpec.Task.ID, helpers.HashString(jobSpec.Task.ID)[0:6])
@@ -345,3 +345,46 @@ func SortQueuedNamespaceTasks(namespace string, pendingTasks []string) ([]Cached
345345
})
346346
return tasks, nil
347347
}
348+
349+
func SeedTaskStartup(cl client.Client, scheme *runtime.Scheme, controllerNamespace string,
350+
tasksCache *lru.Cache[string, string], tasksQueueCache *lru.Cache[string, string],
351+
) error {
352+
runningTasks := &LagoonTaskList{}
353+
listOption := (&client.ListOptions{}).ApplyOptions([]client.ListOption{
354+
client.MatchingLabels(map[string]string{
355+
"lagoon.sh/controller": controllerNamespace, // created by this controller
356+
"lagoon.sh/taskStatus": BuildStatusRunning.String(),
357+
}),
358+
})
359+
if err := cl.List(context.Background(), runningTasks, listOption); err != nil {
360+
return fmt.Errorf("unable to list running LagoonTasks, there may be none or something went wrong: %v", err)
361+
}
362+
for _, build := range runningTasks.Items {
363+
bc := NewCachedTaskItem(build, "Running")
364+
tasksCache.Add(build.Name, bc.String())
365+
}
366+
pendingTasks := &LagoonTaskList{}
367+
listOption = (&client.ListOptions{}).ApplyOptions([]client.ListOption{
368+
client.MatchingLabels(map[string]string{
369+
"lagoon.sh/controller": controllerNamespace, // created by this controller
370+
"lagoon.sh/taskStatus": BuildStatusPending.String(),
371+
}),
372+
})
373+
if err := cl.List(context.Background(), pendingTasks, listOption); err != nil {
374+
return fmt.Errorf("unable to list pending LagoonTasks, there may be none or something went wrong: %v", err)
375+
}
376+
sortTasks(pendingTasks)
377+
position := 1
378+
for _, build := range pendingTasks.Items {
379+
bc := NewCachedTaskQueueItem(build, position, len(pendingTasks.Items))
380+
tasksQueueCache.Add(build.Name, bc.String())
381+
}
382+
return nil
383+
}
384+
385+
func sortTasks(pendingTasks *LagoonTaskList) {
386+
sort.Slice(pendingTasks.Items, func(i, j int) bool {
387+
// sort by creation timestamp
388+
return pendingTasks.Items[i].CreationTimestamp.Before(&pendingTasks.Items[j].CreationTimestamp)
389+
})
390+
}

api/lagoon/v1beta2/lagoontask_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,3 +253,7 @@ func NewCachedTaskQueueItem(lagoonTask LagoonTask, position, length int) CachedT
253253
CreationTimestamp: lagoonTask.CreationTimestamp.Unix(),
254254
}
255255
}
256+
257+
const (
258+
TaskFinalizer = "finalizer.lagoontask.crd.lagoon.sh/v1beta2"
259+
)

api/lagoon/v1beta2/zz_generated.deepcopy.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/main.go

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"crypto/tls"
2121
"flag"
2222
"fmt"
23+
_ "net/http/pprof"
2324
"net/url"
2425
"os"
2526
"strings"
@@ -32,6 +33,7 @@ import (
3233
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
3334
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
3435
ctrl "sigs.k8s.io/controller-runtime"
36+
"sigs.k8s.io/controller-runtime/pkg/client"
3537
"sigs.k8s.io/controller-runtime/pkg/log/zap"
3638

3739
"github.com/uselagoon/remote-controller/internal/dockerhost"
@@ -174,7 +176,7 @@ func main() {
174176
var pruneLongRunningPodsCron string
175177

176178
var lffQoSEnabled bool
177-
var qosMaxBuilds int
179+
var qosTotalBuilds int
178180
var qosMaxContainerBuilds int
179181
var qosDefaultPriority int
180182

@@ -394,11 +396,11 @@ func main() {
394396
flag.IntVar(&timeoutForLongRunningTaskPods, "timeout-longrunning-task-pod-cleanup", 6, "How many hours a task pod should run before forcefully closed.")
395397

396398
// Build QoS configuration
397-
flag.BoolVar(&lffQoSEnabled, "enable-qos", false, "Flag to enable this controller with QoS for builds.")
399+
flag.BoolVar(&lffQoSEnabled, "enable-qos", true, "Deprecated: Flag to enable this controller with QoS for builds. No longer configurable")
398400
// this flag remains the same, the number of max builds flag remains unchanged to be backwards compatible
399401
flag.IntVar(&qosMaxContainerBuilds, "qos-max-builds", 20, "The total number of builds during the container build phase that can run at any one time.")
400402
// this new flag is added but defaults to 0, if it is greater than `qos-max-builds` then it will be used, otherwise it will default to the value of `qos-max-builds`
401-
flag.IntVar(&qosMaxBuilds, "qos-total-builds", 0, "The total number of builds that can run at any one time. Defaults to qos-max-builds if not provided or less than qos-max-builds.")
403+
flag.IntVar(&qosTotalBuilds, "qos-total-builds", 0, "The total number of builds that can run at any one time. Defaults to qos-max-builds if not provided or less than qos-max-builds.")
402404
flag.IntVar(&qosDefaultPriority, "qos-default", 5, "The default qos priority value to apply if one is not provided.")
403405

404406
// Task QoS configuration
@@ -565,7 +567,7 @@ func main() {
565567
os.Exit(1)
566568
}
567569

568-
cacheSize := helpers.GetEnvInt("CACHE_SIZE", 1000)
570+
cacheSize := helpers.GetEnvInt("CACHE_SIZE", 2000)
569571
// create the cancellation cache
570572
cache := expirable.NewLRU[string, string](cacheSize, nil, time.Minute*60)
571573
// create queue cache
@@ -744,8 +746,12 @@ func main() {
744746
TLSSkipVerify: tlsSkipVerify,
745747
}
746748

749+
// setup harbor config
750+
var lagoonHarbor *harbor.Harbor
751+
lagoonHarbor, _ = harbor.New(harborConfig)
752+
747753
deletion := deletions.New(mgr.GetClient(),
748-
harborConfig,
754+
lagoonHarbor,
749755
deletions.DeleteConfig{
750756
PVCRetryAttempts: pvcRetryAttempts,
751757
PVCRetryInterval: pvcRetryInterval,
@@ -756,6 +762,7 @@ func main() {
756762

757763
messaging := messenger.New(config,
758764
mgr.GetClient(),
765+
mgr.GetAPIReader(),
759766
startupConnectionAttempts,
760767
startupConnectionInterval,
761768
controllerNamespace,
@@ -767,7 +774,11 @@ func main() {
767774
enableDebug,
768775
lffSupportK8UPv2,
769776
cache,
770-
harborConfig,
777+
lagoonHarbor,
778+
lagoonTargetName,
779+
buildsCache,
780+
buildsQueueCache,
781+
qosDefaultPriority,
771782
)
772783

773784
reuseCache, _ := lru.New[string, string](cacheSize)
@@ -790,19 +801,13 @@ func main() {
790801
buildCache,
791802
)
792803
c := cron.New()
793-
// if we are running with MQ support, then start the consumer handler
794-
795-
if enableMQ {
796-
setupLog.Info("starting messaging handler")
797-
go messaging.Consumer(lagoonTargetName)
798-
}
799804

800805
// this ensures that the max number of builds is not less than the container builds support
801-
if qosMaxBuilds < qosMaxContainerBuilds {
802-
qosMaxBuilds = qosMaxContainerBuilds
806+
if qosTotalBuilds < qosMaxContainerBuilds {
807+
qosTotalBuilds = qosMaxContainerBuilds
803808
}
804809
buildQoSConfigv1beta2 := lagoonv1beta2ctrl.BuildQoS{
805-
MaxBuilds: qosMaxBuilds,
810+
TotalBuilds: qosTotalBuilds,
806811
MaxContainerBuilds: qosMaxContainerBuilds,
807812
DefaultPriority: qosDefaultPriority,
808813
}
@@ -828,6 +833,7 @@ func main() {
828833
}
829834

830835
resourceCleanup := pruner.New(mgr.GetClient(),
836+
mgr.GetAPIReader(),
831837
buildsToKeep,
832838
buildPodsToKeep,
833839
tasksToKeep,
@@ -896,7 +902,6 @@ func main() {
896902
// use cron to run a task pod cleanup task
897903
// this will check any Lagoon task pods and attempt to delete them
898904
_, err := c.AddFunc(harborCredentialCron, func() {
899-
lagoonHarbor, _ := harbor.New(harborConfig)
900905
lagoonHarbor.RotateRobotCredentials(context.Background(), mgr.GetClient())
901906
})
902907
if err != nil {
@@ -930,12 +935,24 @@ func main() {
930935

931936
c.Start()
932937

933-
// @TODO: maybe insert a pre-controller start state collector to try and seed the queue/build caches before the controllers start
938+
// create a temporary client to use in seed functions
939+
tmpClient, _ := client.New(ctrl.GetConfigOrDie(), client.Options{
940+
Scheme: scheme,
941+
})
942+
// pre-seed the queues with the current state of builds
943+
if err := lagoonv1beta2.SeedBuildStartup(tmpClient, scheme, controllerNamespace, qosDefaultPriority, buildsCache, buildsQueueCache); err != nil {
944+
setupLog.Error(err, "unable to seed controller startup state")
945+
}
946+
// pre-seed the queues with the current state of tasks
947+
if err := lagoonv1beta2.SeedTaskStartup(tmpClient, scheme, controllerNamespace, tasksCache, tasksQueueCache); err != nil {
948+
setupLog.Error(err, "unable to seed controller startup state")
949+
}
934950

935951
setupLog.Info("starting build controller")
936952
// v1beta2 is the latest version
937953
if err = (&lagoonv1beta2ctrl.LagoonBuildReconciler{
938954
Client: mgr.GetClient(),
955+
APIReader: mgr.GetAPIReader(),
939956
Log: ctrl.Log.WithName("v1beta2").WithName("LagoonBuild"),
940957
Scheme: mgr.GetScheme(),
941958
EnableMQ: enableMQ,
@@ -973,8 +990,7 @@ func main() {
973990
LFFBackupWeeklyRandom: lffBackupWeeklyRandom,
974991
LFFRouterURL: lffRouterURL,
975992
LFFHarborEnabled: lffHarborEnabled,
976-
Harbor: harborConfig,
977-
LFFQoSEnabled: lffQoSEnabled,
993+
Harbor: lagoonHarbor,
978994
BuildQoS: buildQoSConfigv1beta2,
979995
NativeCronPodMinFrequency: nativeCronPodMinFrequency,
980996
LagoonTargetName: lagoonTargetName,
@@ -1038,6 +1054,7 @@ func main() {
10381054
setupLog.Info("starting build pod monitor controller")
10391055
if err = (&lagoonv1beta2ctrl.BuildMonitorReconciler{
10401056
Client: mgr.GetClient(),
1057+
APIReader: mgr.GetAPIReader(),
10411058
Log: ctrl.Log.WithName("v1beta2").WithName("LagoonBuildPodMonitor"),
10421059
Scheme: mgr.GetScheme(),
10431060
EnableMQ: enableMQ,
@@ -1047,7 +1064,6 @@ func main() {
10471064
RandomNamespacePrefix: randomPrefix,
10481065
EnableDebug: enableDebug,
10491066
LagoonTargetName: lagoonTargetName,
1050-
LFFQoSEnabled: lffQoSEnabled,
10511067
BuildQoS: buildQoSConfigv1beta2,
10521068
Cache: cache,
10531069
DockerHost: dockerhosts,
@@ -1081,18 +1097,25 @@ func main() {
10811097
if lffHarborEnabled {
10821098
if err = (&harborctrl.HarborCredentialReconciler{
10831099
Client: mgr.GetClient(),
1100+
APIReader: mgr.GetAPIReader(),
10841101
Log: ctrl.Log.WithName("harbor").WithName("HarborCredentialReconciler"),
10851102
Scheme: mgr.GetScheme(),
10861103
LFFHarborEnabled: lffHarborEnabled,
10871104
ControllerNamespace: controllerNamespace,
1088-
Harbor: harborConfig,
1105+
Harbor: lagoonHarbor,
10891106
}).SetupWithManager(mgr); err != nil {
10901107
setupLog.Error(err, "unable to create controller", "controller", "HarborCredentialReconciler")
10911108
os.Exit(1)
10921109
}
10931110
}
10941111
// +kubebuilder:scaffold:builder
10951112

1113+
// if we are running with MQ support, then start the consumer handler
1114+
if enableMQ {
1115+
setupLog.Info("starting messaging handler")
1116+
go messaging.Consumer(lagoonTargetName)
1117+
}
1118+
10961119
setupLog.Info("starting manager")
10971120
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
10981121
setupLog.Error(err, "problem running manager")

config/default/manager_auth_proxy_patch.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,13 @@ spec:
1515
- "--leader-elect"
1616
- "--build-pod-cleanup-cron=*/1 * * * *"
1717
- "--task-pod-cleanup-cron=*/1 * * * *"
18-
- "--harbor-credential-cron=*/1 * * * *"
18+
- "--harbor-credential-cron=*/2 * * * *"
1919
- "--harbor-robot-account-expiry=1d"
2020
- "--enable-harbor"
2121
- "--harbor-enable-project-webhook"
2222
- "--enable-debug"
23-
- "--enable-qos"
2423
- "--qos-total-builds=5"
2524
- "--qos-max-builds=3"
26-
- "--enable-deprecated-apis"
2725
- "--lagoon-feature-flag-support-k8upv2"
2826
- "--skip-tls-verify"
2927
- "--enable-task-qos"

0 commit comments

Comments
 (0)