diff --git a/go.mod b/go.mod index 6ed5321cc3..c03461ade5 100644 --- a/go.mod +++ b/go.mod @@ -64,15 +64,15 @@ require ( github.com/prometheus/procfs v0.15.1 // indirect github.com/robfig/cron/v3 v3.0.1 github.com/spf13/cobra v1.8.1 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/x448/float16 v0.8.4 // indirect - golang.org/x/net v0.33.0 // indirect + golang.org/x/net v0.35.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sys v0.28.0 // indirect - golang.org/x/term v0.27.0 // indirect - golang.org/x/tools v0.28.0 // indirect + golang.org/x/sys v0.30.0 // indirect + golang.org/x/term v0.29.0 // indirect + golang.org/x/tools v0.30.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/protobuf v1.36.1 // indirect + google.golang.org/protobuf v1.36.4 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect @@ -85,6 +85,7 @@ require ( require ( github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/google/btree v1.1.3 // indirect + github.com/rogpeppe/go-internal v1.13.1 // indirect golang.org/x/sync v0.11.0 // indirect ) diff --git a/go.sum b/go.sum index 181ab9bb4e..7a267cbe37 100644 --- a/go.sum +++ b/go.sum @@ -111,15 +111,16 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/samber/lo v1.49.1 h1:4BIFyVfuQSEpluc7Fua+j1NolZHiEHEpaSEKdsH0tew= github.com/samber/lo v1.49.1/go.mod h1:dO6KHFzUKXgP8LDhU0oI8d2hekjXnGOu0DB8Jecxd6o= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -148,8 +149,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= -golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= +golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -160,10 +161,10 @@ golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= +golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= @@ -174,16 +175,16 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= -golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= +golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY= +golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= -google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM= +google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/pkg/controllers/disruption/controller.go b/pkg/controllers/disruption/controller.go index 0d94b49a23..351476ffce 100644 --- a/pkg/controllers/disruption/controller.go +++ b/pkg/controllers/disruption/controller.go @@ -202,7 +202,7 @@ func (c *Controller) disrupt(ctx context.Context, disruption Method) (bool, erro // 3. Add Command to orchestration.Queue to wait to delete the candiates. func (c *Controller) executeCommand(ctx context.Context, m Method, cmd Command, schedulingResults scheduling.Results) error { commandID := uuid.NewUUID() - log.FromContext(ctx).WithValues("command-id", commandID, "reason", strings.ToLower(string(m.Reason()))).Info(fmt.Sprintf("disrupting nodeclaim(s) via %s", cmd)) + log.FromContext(ctx).WithValues(append([]any{"command-id", string(commandID), "reason", strings.ToLower(string(m.Reason()))}, cmd.LogValues()...)...).Info("disrupting node(s)") // Cordon the old nodes before we launch the replacements to prevent new pods from scheduling to the old nodes if err := c.MarkDisrupted(ctx, m, cmd.candidates...); err != nil { diff --git a/pkg/controllers/disruption/emptiness.go b/pkg/controllers/disruption/emptiness.go index b5994ca96e..4fd248ce1c 100644 --- a/pkg/controllers/disruption/emptiness.go +++ b/pkg/controllers/disruption/emptiness.go @@ -104,7 +104,7 @@ func (e *Emptiness) ComputeCommand(ctx context.Context, disruptionBudgetMapping validatedCandidates, err := v.ValidateCandidates(ctx, cmd.candidates...) if err != nil { if IsValidationError(err) { - log.FromContext(ctx).V(1).Info(fmt.Sprintf("abandoning empty node consolidation attempt due to pod churn, command is no longer valid, %s", cmd)) + log.FromContext(ctx).V(1).WithValues(cmd.LogValues()...).Info("abandoning empty node consolidation attempt due to pod churn, command is no longer valid") return Command{}, scheduling.Results{}, nil } return Command{}, scheduling.Results{}, err @@ -114,7 +114,7 @@ func (e *Emptiness) ComputeCommand(ctx context.Context, disruptionBudgetMapping if lo.ContainsBy(validatedCandidates, func(c *Candidate) bool { return len(c.reschedulablePods) != 0 }) { - log.FromContext(ctx).V(1).Info(fmt.Sprintf("abandoning empty node consolidation attempt due to pod churn, command is no longer valid, %s", cmd)) + log.FromContext(ctx).V(1).WithValues(cmd.LogValues()...).Info("abandoning empty node consolidation attempt due to pod churn, command is no longer valid") return Command{}, scheduling.Results{}, nil } diff --git a/pkg/controllers/disruption/multinodeconsolidation.go b/pkg/controllers/disruption/multinodeconsolidation.go index 1f1947a967..58028cf372 100644 --- a/pkg/controllers/disruption/multinodeconsolidation.go +++ b/pkg/controllers/disruption/multinodeconsolidation.go @@ -97,7 +97,7 @@ func (m *MultiNodeConsolidation) ComputeCommand(ctx context.Context, disruptionB if err := NewValidation(m.clock, m.cluster, m.kubeClient, m.provisioner, m.cloudProvider, m.recorder, m.queue, m.Reason()).IsValid(ctx, cmd, consolidationTTL); err != nil { if IsValidationError(err) { - log.FromContext(ctx).V(1).Info(fmt.Sprintf("abandoning multi-node consolidation attempt due to pod churn, command is no longer valid, %s", cmd)) + log.FromContext(ctx).V(1).WithValues(cmd.LogValues()...).Info("abandoning multi-node consolidation attempt due to pod churn, command is no longer valid") return Command{}, scheduling.Results{}, nil } return Command{}, scheduling.Results{}, fmt.Errorf("validating consolidation, %w", err) @@ -128,7 +128,7 @@ func (m *MultiNodeConsolidation) firstNConsolidationOption(ctx context.Context, if lastSavedCommand.candidates == nil { log.FromContext(ctx).V(1).Info(fmt.Sprintf("failed to find a multi-node consolidation after timeout, last considered batch had %d", (min+max)/2)) } else { - log.FromContext(ctx).V(1).Info(fmt.Sprintf("stopping multi-node consolidation after timeout, returning last valid command %s", lastSavedCommand)) + log.FromContext(ctx).V(1).WithValues(lastSavedCommand.LogValues()...).Info("stopping multi-node consolidation after timeout, returning last valid command") } return lastSavedCommand, lastSavedResults, nil } diff --git a/pkg/controllers/disruption/orchestration/queue.go b/pkg/controllers/disruption/orchestration/queue.go index 17e85c344a..7d49f1caad 100644 --- a/pkg/controllers/disruption/orchestration/queue.go +++ b/pkg/controllers/disruption/orchestration/queue.go @@ -30,6 +30,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" "k8s.io/utils/clock" controllerruntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -63,6 +64,29 @@ type Command struct { lastError error } +func (c *Command) LogValues() []any { + candidateNodes := lo.Map(c.candidates, func(candidate *state.StateNode, _ int) interface{} { + return map[string]interface{}{ + "Node": klog.KObj(candidate.Node), + "NodeClaim": klog.KObj(candidate.NodeClaim), + } + }) + replacementNodes := lo.Map(c.Replacements, func(replacement Replacement, _ int) interface{} { + return map[string]interface{}{ + "NodeClaim": klog.KRef("", replacement.name), + } + }) + return []any{ + "command-id", c.id, + "reason", c.reason, + "decision", c.Decision(), + "disrupted-node-count", len(candidateNodes), + "replacement-node-count", len(replacementNodes), + "disrupted-nodes", candidateNodes, + "replacement-nodes", replacementNodes, + } +} + // Replacement wraps a NodeClaim name with an initialized field to save on readiness checks and identify // when a NodeClaim is first initialized for metrics and events. type Replacement struct { @@ -175,7 +199,7 @@ func (q *Queue) Reconcile(ctx context.Context) (reconcile.Result, error) { if shutdown { panic("unexpected failure, disruption queue has shut down") } - ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("command-id", string(cmd.id))) + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues(cmd.LogValues()...)) if err := q.waitOrTerminate(ctx, cmd); err != nil { // If recoverable, re-queue and try again. diff --git a/pkg/controllers/disruption/singlenodeconsolidation.go b/pkg/controllers/disruption/singlenodeconsolidation.go index 8de9f69146..ad0196e15e 100644 --- a/pkg/controllers/disruption/singlenodeconsolidation.go +++ b/pkg/controllers/disruption/singlenodeconsolidation.go @@ -84,7 +84,7 @@ func (s *SingleNodeConsolidation) ComputeCommand(ctx context.Context, disruption } if err := v.IsValid(ctx, cmd, consolidationTTL); err != nil { if IsValidationError(err) { - log.FromContext(ctx).V(1).Info(fmt.Sprintf("abandoning single-node consolidation attempt due to pod churn, command is no longer valid, %s", cmd)) + log.FromContext(ctx).V(1).WithValues(cmd.LogValues()...).Info("abandoning single-node consolidation attempt due to pod churn, command is no longer valid") return Command{}, scheduling.Results{}, nil } return Command{}, scheduling.Results{}, fmt.Errorf("validating consolidation, %w", err) diff --git a/pkg/controllers/disruption/types.go b/pkg/controllers/disruption/types.go index b958f777c8..7ef8059d0c 100644 --- a/pkg/controllers/disruption/types.go +++ b/pkg/controllers/disruption/types.go @@ -17,12 +17,12 @@ limitations under the License. package disruption import ( - "bytes" "context" "fmt" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" "k8s.io/utils/clock" "sigs.k8s.io/controller-runtime/pkg/client" @@ -140,46 +140,34 @@ func (c Command) Decision() Decision { } } -func (c Command) String() string { - var buf bytes.Buffer +func (c Command) LogValues() []any { podCount := lo.Reduce(c.candidates, func(_ int, cd *Candidate, _ int) int { return len(cd.reschedulablePods) }, 0) - fmt.Fprintf(&buf, "%s, terminating %d nodes (%d pods) ", c.Decision(), len(c.candidates), podCount) - for i, old := range c.candidates { - if i != 0 { - fmt.Fprint(&buf, ", ") + + candidateNodes := lo.Map(c.candidates, func(candidate *Candidate, _ int) interface{} { + return map[string]interface{}{ + "Node": klog.KObj(candidate.Node), + "NodeClaim": klog.KObj(candidate.NodeClaim), + "instance-type": candidate.Labels()[corev1.LabelInstanceTypeStable], + "capacity-type": candidate.Labels()[v1.CapacityTypeLabelKey], } - fmt.Fprintf(&buf, "%s", old.Name()) - fmt.Fprintf(&buf, "/%s", old.Labels()[corev1.LabelInstanceTypeStable]) - fmt.Fprintf(&buf, "/%s", old.Labels()[v1.CapacityTypeLabelKey]) - } - if len(c.replacements) == 0 { - return buf.String() - } - odNodeClaims := 0 - spotNodeClaims := 0 - for _, nodeClaim := range c.replacements { - ct := nodeClaim.Requirements.Get(v1.CapacityTypeLabelKey) - if ct.Has(v1.CapacityTypeOnDemand) { - odNodeClaims++ + }) + replacementNodes := lo.Map(c.replacements, func(replacement *scheduling.NodeClaim, _ int) interface{} { + ct := replacement.Requirements.Get(v1.CapacityTypeLabelKey) + m := map[string]interface{}{ + "capacity-type": lo.Ternary[string](ct.Has(v1.CapacityTypeSpot), v1.CapacityTypeSpot, v1.CapacityTypeOnDemand), } - if ct.Has(v1.CapacityTypeSpot) { - spotNodeClaims++ + if len(c.replacements) == 1 { + m["instance-types"] = scheduling.InstanceTypeList(replacement.InstanceTypeOptions) } + return m + }) + + return []any{ + "decision", c.Decision(), + "disrupted-node-count", len(candidateNodes), + "replacement-node-count", len(replacementNodes), + "pod-count", podCount, + "disrupted-nodes", candidateNodes, + "replacement-nodes", replacementNodes, } - // Print list of instance types for the first replacements. - if len(c.replacements) > 1 { - fmt.Fprintf(&buf, " and replacing with %d spot and %d on-demand, from types %s", - spotNodeClaims, odNodeClaims, - scheduling.InstanceTypeList(c.replacements[0].InstanceTypeOptions)) - return buf.String() - } - ct := c.replacements[0].Requirements.Get(v1.CapacityTypeLabelKey) - nodeDesc := "node" - if ct.Len() == 1 { - nodeDesc = fmt.Sprintf("%s node", ct.Any()) - } - fmt.Fprintf(&buf, " and replacing with %s from types %s", - nodeDesc, - scheduling.InstanceTypeList(c.replacements[0].InstanceTypeOptions)) - return buf.String() } diff --git a/pkg/controllers/nodeclaim/garbagecollection/controller.go b/pkg/controllers/nodeclaim/garbagecollection/controller.go index 00a05dbe24..fc5af465ae 100644 --- a/pkg/controllers/nodeclaim/garbagecollection/controller.go +++ b/pkg/controllers/nodeclaim/garbagecollection/controller.go @@ -100,7 +100,7 @@ func (c *Controller) Reconcile(ctx context.Context) (reconcile.Result, error) { return } log.FromContext(ctx).WithValues( - "NodeClaim", klog.KRef("", nodeClaims[i].Name), + "NodeClaim", klog.KObj(nodeClaims[i]), "Node", klog.KRef("", nodeClaims[i].Status.NodeName), "provider-id", nodeClaims[i].Status.ProviderID, ).V(1).Info("garbage collecting nodeclaim with no cloudprovider representation") diff --git a/pkg/controllers/nodeclaim/hydration/controller.go b/pkg/controllers/nodeclaim/hydration/controller.go index eb698ba7dd..f2e32236dd 100644 --- a/pkg/controllers/nodeclaim/hydration/controller.go +++ b/pkg/controllers/nodeclaim/hydration/controller.go @@ -54,7 +54,9 @@ func NewController(kubeClient client.Client, cloudProvider cloudprovider.CloudPr func (c *Controller) Reconcile(ctx context.Context, nc *v1.NodeClaim) (reconcile.Result, error) { ctx = injection.WithControllerName(ctx, c.Name()) - ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Node", klog.KRef("", nc.Status.NodeName))) + if nc.Status.NodeName != "" { + ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Node", klog.KRef("", nc.Status.NodeName))) + } if !nodeclaimutils.IsManaged(nc, c.cloudProvider) { return reconcile.Result{}, nil diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go index ef2b5b52f5..3319c5ee1e 100644 --- a/pkg/controllers/provisioning/provisioner.go +++ b/pkg/controllers/provisioning/provisioner.go @@ -166,7 +166,7 @@ func (p *Provisioner) GetPendingPods(ctx context.Context) ([]*corev1.Pod, error) if err := p.Validate(ctx, po); err != nil { // Mark in memory that this pod is unschedulable p.cluster.MarkPodSchedulingDecisions(map[*corev1.Pod]error{po: fmt.Errorf("ignoring pod, %w", err)}, po) - log.FromContext(ctx).WithValues("Pod", klog.KRef(po.Namespace, po.Name)).V(1).Info(fmt.Sprintf("ignoring pod, %s", err)) + log.FromContext(ctx).WithValues("Pod", klog.KObj(po)).V(1).Info(fmt.Sprintf("ignoring pod, %s", err)) return true } return false @@ -220,7 +220,7 @@ func (p *Provisioner) NewScheduler(ctx context.Context, pods []*corev1.Pod, stat } nodePools = lo.Filter(nodePools, func(np *v1.NodePool, _ int) bool { if !np.StatusConditions().IsTrue(status.ConditionReady) { - log.FromContext(ctx).WithValues("NodePool", klog.KRef("", np.Name)).Error(err, "ignoring nodepool, not ready") + log.FromContext(ctx).WithValues("NodePool", klog.KObj(np)).Error(err, "ignoring nodepool, not ready") return false } return np.DeletionTimestamp.IsZero() @@ -239,11 +239,11 @@ func (p *Provisioner) NewScheduler(ctx context.Context, pods []*corev1.Pod, stat // Get instance type options its, err := p.cloudProvider.GetInstanceTypes(ctx, np) if err != nil { - log.FromContext(ctx).WithValues("NodePool", klog.KRef("", np.Name)).Error(err, "skipping, unable to resolve instance types") + log.FromContext(ctx).WithValues("NodePool", klog.KObj(np)).Error(err, "skipping, unable to resolve instance types") continue } if len(its) == 0 { - log.FromContext(ctx).WithValues("NodePool", klog.KRef("", np.Name)).Info("skipping, no resolved instance types found") + log.FromContext(ctx).WithValues("NodePool", klog.KObj(np)).Info("skipping, no resolved instance types found") continue } instanceTypes[np.Name] = its @@ -314,7 +314,7 @@ func (p *Provisioner) Schedule(ctx context.Context) (scheduler.Results, error) { results := s.Solve(ctx, pods).TruncateInstanceTypes(scheduler.MaxInstanceTypes) scheduler.UnschedulablePodsCount.Set(float64(len(results.PodErrors)), map[string]string{scheduler.ControllerLabel: injection.GetControllerName(ctx)}) if len(results.NewNodeClaims) > 0 { - log.FromContext(ctx).WithValues("Pods", pretty.Slice(lo.Map(pods, func(p *corev1.Pod, _ int) string { return klog.KRef(p.Namespace, p.Name).String() }), 5), "duration", time.Since(start)).Info("found provisionable pod(s)") + log.FromContext(ctx).WithValues("Pods", pretty.Slice(lo.Map(pods, func(p *corev1.Pod, _ int) string { return klog.KObj(p).String() }), 5), "duration", time.Since(start)).Info("found provisionable pod(s)") } // Mark in memory when these pods were marked as schedulable or when we made a decision on the pods p.cluster.MarkPodSchedulingDecisions(results.PodErrors, pendingPods...) @@ -341,7 +341,7 @@ func (p *Provisioner) Create(ctx context.Context, n *scheduler.NodeClaim, opts . return req.Key == corev1.LabelInstanceTypeStable }) - log.FromContext(ctx).WithValues("NodeClaim", klog.KRef("", nodeClaim.Name), "requests", nodeClaim.Spec.Resources.Requests, "instance-types", instanceTypeList(instanceTypeRequirement.Values)). + log.FromContext(ctx).WithValues("NodeClaim", klog.KObj(nodeClaim), "requests", nodeClaim.Spec.Resources.Requests, "instance-types", instanceTypeList(instanceTypeRequirement.Values)). Info("created nodeclaim") metrics.NodeClaimsCreatedTotal.Inc(map[string]string{ metrics.ReasonLabel: options.Reason, @@ -429,7 +429,7 @@ func (p *Provisioner) injectVolumeTopologyRequirements(ctx context.Context, pods var schedulablePods []*corev1.Pod for _, pod := range pods { if err := p.volumeTopology.Inject(ctx, pod); err != nil { - log.FromContext(ctx).WithValues("Pod", klog.KRef(pod.Namespace, pod.Name)).Error(err, "failed getting volume topology requirements") + log.FromContext(ctx).WithValues("Pod", klog.KObj(pod)).Error(err, "failed getting volume topology requirements") } else { schedulablePods = append(schedulablePods, pod) } diff --git a/pkg/controllers/provisioning/scheduling/preferences.go b/pkg/controllers/provisioning/scheduling/preferences.go index 53305acd53..9ec3ff4f1b 100644 --- a/pkg/controllers/provisioning/scheduling/preferences.go +++ b/pkg/controllers/provisioning/scheduling/preferences.go @@ -49,7 +49,7 @@ func (p *Preferences) Relax(ctx context.Context, pod *v1.Pod) bool { for _, relaxFunc := range relaxations { if reason := relaxFunc(pod); reason != nil { - log.FromContext(ctx).WithValues("Pod", klog.KRef(pod.Namespace, pod.Name)).V(1).Info(fmt.Sprintf("relaxing soft constraints for pod since it previously failed to schedule, %s", lo.FromPtr(reason))) + log.FromContext(ctx).WithValues("Pod", klog.KObj(pod)).V(1).Info(fmt.Sprintf("relaxing soft constraints for pod since it previously failed to schedule, %s", lo.FromPtr(reason))) return true } } diff --git a/pkg/controllers/provisioning/scheduling/scheduler.go b/pkg/controllers/provisioning/scheduling/scheduler.go index 398942c536..960f8c5ffd 100644 --- a/pkg/controllers/provisioning/scheduling/scheduler.go +++ b/pkg/controllers/provisioning/scheduling/scheduler.go @@ -65,7 +65,7 @@ func NewScheduler(ctx context.Context, kubeClient client.Client, nodePools []*v1 nct.InstanceTypeOptions, _ = filterInstanceTypesByRequirements(instanceTypes[np.Name], nct.Requirements, corev1.ResourceList{}, corev1.ResourceList{}, corev1.ResourceList{}) if len(nct.InstanceTypeOptions) == 0 { recorder.Publish(NoCompatibleInstanceTypes(np)) - log.FromContext(ctx).WithValues("NodePool", klog.KRef("", np.Name)).Info("skipping, nodepool requirements filtered out all instance types") + log.FromContext(ctx).WithValues("NodePool", klog.KObj(np)).Info("skipping, nodepool requirements filtered out all instance types") return nil, false } return nct, true @@ -124,7 +124,7 @@ type Results struct { func (r Results) Record(ctx context.Context, recorder events.Recorder, cluster *state.Cluster) { // Report failures and nominations for p, err := range r.PodErrors { - log.FromContext(ctx).WithValues("Pod", klog.KRef(p.Namespace, p.Name)).Error(err, "could not schedule pod") + log.FromContext(ctx).WithValues("Pod", klog.KObj(p)).Error(err, "could not schedule pod") recorder.Publish(PodFailedToScheduleEvent(p, err)) } for _, existing := range r.ExistingNodes { diff --git a/pkg/controllers/provisioning/scheduling/volumetopology.go b/pkg/controllers/provisioning/scheduling/volumetopology.go index a4dc0be961..14d218eb13 100644 --- a/pkg/controllers/provisioning/scheduling/volumetopology.go +++ b/pkg/controllers/provisioning/scheduling/volumetopology.go @@ -72,7 +72,7 @@ func (v *VolumeTopology) Inject(ctx context.Context, pod *v1.Pod) error { } log.FromContext(ctx). - WithValues("Pod", klog.KRef(pod.Namespace, pod.Name)). + WithValues("Pod", klog.KObj(pod)). V(1).Info(fmt.Sprintf("adding requirements derived from pod volumes, %s", requirements)) return nil } diff --git a/pkg/scheduling/volumeusage.go b/pkg/scheduling/volumeusage.go index d8575d1a2e..60efc84d11 100644 --- a/pkg/scheduling/volumeusage.go +++ b/pkg/scheduling/volumeusage.go @@ -88,7 +88,7 @@ func GetVolumes(ctx context.Context, kubeClient client.Client, pod *v1.Pod) (Vol // computing limits, otherwise Karpenter may never be able to update its cluster state. if err != nil { if errors.IsNotFound(err) { - log.FromContext(ctx).WithValues("Pod", klog.KRef(pod.Namespace, pod.Name), "volume", volume.Name).Error(err, "failed tracking CSI volume limits for volume") + log.FromContext(ctx).WithValues("Pod", klog.KObj(pod), "volume", volume.Name).Error(err, "failed tracking CSI volume limits for volume") continue } return nil, fmt.Errorf("failed updating volume limits, %w", err) @@ -134,7 +134,7 @@ func resolveDriver(ctx context.Context, kubeClient client.Client, pod *v1.Pod, v // In either of these cases, a PV must have been previously bound to the PVC and has since been removed. We can // ignore this PVC while computing limits and continue. if storageClassName == "" { - log.FromContext(ctx).WithValues("volume", volumeName, "Pod", klog.KRef(pod.Namespace, pod.Name), "PersistentVolumeClaim", klog.KRef(pvc.Namespace, pvc.Name)).V(1).Info("failed tracking CSI volume limits for volume with unbound PVC, no storage class specified") + log.FromContext(ctx).WithValues("volume", volumeName, "Pod", klog.KObj(pod), "PersistentVolumeClaim", klog.KObj(pvc)).V(1).Info("failed tracking CSI volume limits for volume with unbound PVC, no storage class specified") return "", nil } @@ -145,7 +145,7 @@ func resolveDriver(ctx context.Context, kubeClient client.Client, pod *v1.Pod, v // 2. The StorageClass never existed and was used to bind the PVC to an existing PV, but that PV was removed // In either of these cases, we should ignore the PVC while computing limits and continue. if errors.IsNotFound(err) { - log.FromContext(ctx).WithValues("volume", volumeName, "Pod", klog.KRef(pod.Namespace, pod.Name), "PersistentVolumeClaim", klog.KRef(pvc.Namespace, pvc.Name), "StorageClass", klog.KRef("", storageClassName)).V(1).Info(fmt.Sprintf("failed tracking CSI volume limits for volume with unbound PVC, %s", err)) + log.FromContext(ctx).WithValues("volume", volumeName, "Pod", klog.KObj(pod), "PersistentVolumeClaim", klog.KObj(pvc), "StorageClass", klog.KRef("", storageClassName)).V(1).Info(fmt.Sprintf("failed tracking CSI volume limits for volume with unbound PVC, %s", err)) return "", nil } return "", err