Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion bindata/assets/kube-descheduler/softtainterdeployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ spec:
name: "descheduler"
- name: certs-dir
secret:
secretName: kube-descheduler-serving-cert
secretName: softtainter-serving-cert
priorityClassName: "system-cluster-critical"
restartPolicy: "Always"
containers:
Expand All @@ -38,6 +38,10 @@ spec:
capabilities:
drop: ["ALL"]
image: ${SOFTTAINTER_IMAGE}
ports:
- name: metrics
containerPort: 8443
Copy link
Member

@ingvagabund ingvagabund Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to use a different port number since 443 suffix resembles an apiserver. We have 10258 for the descheduler as 10259 is used for the scheduler. There's 60000 for the operator. We could go with 60001 for the moment?

Another way is to expose the metrics through descheduler as a carry patch. In which case there's no need for the extra configuration.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question is how this will evolve if/when the soft-tainter logic is moved to the descheduler repository. Also, given the descheduler is the source of truth about the classification it's more natural to expose the metrics within.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the current approach the soft-tainter can run very often and thus provide almost an up-to-date classification of nodes. Which on the other hand does not need to correspond to what descheduler sees since the time of running the LowNodeUtilization plugin may vary depending on how quickly the plugin works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The softtainter is running with the same periodicity of the LowNodeUtilization plugin, but not in sync being two fully independent pods. If the interval is small enough, we can assume for simplicity that the observations about node classifications are consistent. This is the same assumption we are taking when applying the soft-taints to nodes.
The descheduler classification is the source of truth for evictions, while the softtainer ones is the source of truth for the soft taints.
I think that the choice is more about the process to push it to upstream:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

protocol: TCP
livenessProbe:
failureThreshold: 1
httpGet:
Expand Down
22 changes: 22 additions & 0 deletions bindata/assets/kube-descheduler/softtainterservice.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: v1
kind: Service
metadata:
annotations:
include.release.openshift.io/self-managed-high-availability: "true"
include.release.openshift.io/single-node-developer: "true"
service.beta.openshift.io/serving-cert-secret-name: softtainter-serving-cert
exclude.release.openshift.io/internal-openshift-hosted: "true"
labels:
app: softtainer
name: softtainter-metrics
namespace: openshift-kube-descheduler-operator
spec:
ports:
- name: https
port: 8443
protocol: TCP
targetPort: 8443
selector:
app: softtainer
sessionAffinity: None
type: ClusterIP
25 changes: 25 additions & 0 deletions bindata/assets/kube-descheduler/softtainterservicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: softtainter
namespace: openshift-kube-descheduler-operator
annotations:
exclude.release.openshift.io/internal-openshift-hosted: "true"
include.release.openshift.io/self-managed-high-availability: "true"
include.release.openshift.io/single-node-developer: "true"
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
path: /metrics
port: https
scheme: https
tlsConfig:
caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt
serverName: softtainter-metrics.openshift-kube-descheduler-operator.svc
namespaceSelector:
matchNames:
- openshift-kube-descheduler-operator
selector:
matchLabels:
app: softtainer
11 changes: 11 additions & 0 deletions cmd/soft-tainter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

goflag "flag"
flag "github.com/spf13/pflag"
Expand All @@ -44,6 +45,9 @@ var (
const (
HealthProbeHost = "0.0.0.0"
HealthProbePort int32 = 6060
MetricsHost = "0.0.0.0"
MetricsPort int32 = 8443
CertsDir = "/certs-dir"
readinessEndpointName = "/readyz"
livenessEndpointName = "/livez"
)
Expand Down Expand Up @@ -77,6 +81,13 @@ func getManagerOptions(operatorNamespace string, needLeaderElection bool, scheme
LeaderElectionNamespace: operatorNamespace,
Cache: getCacheOption(operatorNamespace),
Scheme: scheme,
Metrics: metricsserver.Options{
BindAddress: fmt.Sprintf("%s:%d", MetricsHost, MetricsPort),
SecureServing: true,
CertDir: CertsDir,
CertName: "tls.crt",
KeyName: "tls.key",
},
}
}

Expand Down
41 changes: 41 additions & 0 deletions pkg/operator/target_config_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,20 @@ func (c TargetConfigReconciler) sync() error {
specAnnotations["prometheusrule/descheduler-psi-alert"] = resourceVersion
}

if stService, _, err := c.manageSoftTainterService(descheduler, isSoftTainterNeeded); err != nil {
return err
} else {
resourceVersion := "0"
if stService != nil {
resourceVersion = stService.ObjectMeta.ResourceVersion
}
specAnnotations["services/softtainter-metrics"] = resourceVersion
}

if _, err := c.manageSoftTainterServiceMonitor(descheduler, isSoftTainterNeeded); err != nil {
return err
}

if role, _, err := c.manageRole(descheduler); err != nil {
return err
} else {
Expand Down Expand Up @@ -654,6 +668,33 @@ func (c *TargetConfigReconciler) manageServiceMonitor(descheduler *deschedulerv1
return changed, err
}

func (c *TargetConfigReconciler) manageSoftTainterService(descheduler *deschedulerv1.KubeDescheduler, stEnabled bool) (*v1.Service, bool, error) {
required := resourceread.ReadServiceV1OrDie(bindata.MustAsset("assets/kube-descheduler/softtainterservice.yaml"))
required.Namespace = descheduler.Namespace
ownerReference := metav1.OwnerReference{
APIVersion: "operator.openshift.io/v1",
Kind: "KubeDescheduler",
Name: descheduler.Name,
UID: descheduler.UID,
}
controller.EnsureOwnerRef(required, ownerReference)

if stEnabled {
return resourceapply.ApplyService(c.ctx, c.kubeClient.CoreV1(), c.eventRecorder, required)
}
return resourceapply.DeleteService(c.ctx, c.kubeClient.CoreV1(), c.eventRecorder, required)
}

func (c *TargetConfigReconciler) manageSoftTainterServiceMonitor(descheduler *deschedulerv1.KubeDescheduler, stEnabled bool) (bool, error) {
required := resourceread.ReadUnstructuredOrDie(bindata.MustAsset("assets/kube-descheduler/softtainterservicemonitor.yaml"))
if stEnabled {
_, changed, err := resourceapply.ApplyKnownUnstructured(c.ctx, c.dynamicClient, c.eventRecorder, required)
return changed, err
}
_, changed, err := resourceapply.DeleteKnownUnstructured(c.ctx, c.dynamicClient, c.eventRecorder, required)
return changed, err
}

func defaultEvictorOverrides(profileCustomizations *deschedulerv1.ProfileCustomizations, pluginConfig *v1alpha2.PluginConfig) error {
// set priority class threshold if customized
if profileCustomizations.ThresholdPriority != nil && profileCustomizations.ThresholdPriorityClassName != "" {
Expand Down
139 changes: 137 additions & 2 deletions pkg/operator/target_config_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1454,7 +1454,7 @@ func TestManageSoftTainterDeployment(t *testing.T) {
TypeMeta: metav1.TypeMeta{APIVersion: "apps/v1", Kind: "Deployment"},
ObjectMeta: metav1.ObjectMeta{
Name: "softtainter",
Annotations: map[string]string{"operator.openshift.io/spec-hash": "3410714199b93034a43b3103b92abcd75fc527463744386b7eff0809471bf81f"},
Annotations: map[string]string{"operator.openshift.io/spec-hash": "4b1e0da2bfcae39b8ee9ca0c8bd4dc89f06cfb743633b7be8b18f98cc985ac7a"},
Labels: map[string]string{"app": "softtainer"},
OwnerReferences: []metav1.OwnerReference{{APIVersion: "operator.openshift.io/v1", Kind: "KubeDescheduler"}},
},
Expand All @@ -1480,6 +1480,13 @@ func TestManageSoftTainterDeployment(t *testing.T) {
"-v=2",
},
Image: "RELATED_IMAGE_SOFTTAINTER_IMAGE",
Ports: []corev1.ContainerPort{
{
Name: "metrics",
ContainerPort: 8443,
Protocol: corev1.ProtocolTCP,
},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/livez", Port: intstr.FromInt32(6060), Scheme: corev1.URISchemeHTTP}},
InitialDelaySeconds: 30,
Expand Down Expand Up @@ -1531,7 +1538,7 @@ func TestManageSoftTainterDeployment(t *testing.T) {
},
{
Name: "certs-dir",
VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: "kube-descheduler-serving-cert"}},
VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: "softtainter-serving-cert"}},
},
},
},
Expand Down Expand Up @@ -1993,3 +2000,131 @@ func NewFakeRecorder(bufferSize int) *fakeRecorder {
Events: make(chan string, bufferSize),
}
}

func TestManageSoftTainterService(t *testing.T) {
ctx, cancelFunc := context.WithCancel(context.TODO())
defer cancelFunc()

descheduler := &deschedulerv1.KubeDescheduler{
TypeMeta: metav1.TypeMeta{APIVersion: "operator.openshift.io/v1", Kind: "KubeDescheduler"},
ObjectMeta: metav1.ObjectMeta{
Name: "cluster",
Namespace: operatorclient.OperatorNamespace,
UID: "test-uid",
},
Spec: deschedulerv1.KubeDeschedulerSpec{
Profiles: []deschedulerv1.DeschedulerProfile{deschedulerv1.DevKubeVirtRelieveAndMigrate},
},
}

tests := []struct {
name string
enabled bool
wantService bool
wantDeleted bool
}{
{
name: "Service created when softtainter enabled",
enabled: true,
wantService: true,
wantDeleted: false,
},
{
name: "Service deleted when softtainter disabled",
enabled: false,
wantService: false,
wantDeleted: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
targetConfigReconciler, _ := initTargetConfigReconciler(ctx, nil, nil, nil, nil)

service, _, err := targetConfigReconciler.manageSoftTainterService(descheduler, tt.enabled)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}

if tt.wantService {
if service == nil {
t.Fatal("Expected service to be created, got nil")
}
if service.Name != "softtainter-metrics" {
t.Errorf("Expected service name 'softtainter-metrics', got %q", service.Name)
}
if service.Namespace != operatorclient.OperatorNamespace {
t.Errorf("Expected namespace %q, got %q", operatorclient.OperatorNamespace, service.Namespace)
}
if len(service.Spec.Ports) != 1 {
t.Errorf("Expected 1 port, got %d", len(service.Spec.Ports))
} else {
port := service.Spec.Ports[0]
if port.Name != "https" {
t.Errorf("Expected port name 'https', got %q", port.Name)
}
if port.Port != 8443 {
t.Errorf("Expected port 8443, got %d", port.Port)
}
}
if service.Annotations["service.beta.openshift.io/serving-cert-secret-name"] != "softtainter-serving-cert" {
t.Errorf("Expected serving cert annotation, got %q", service.Annotations["service.beta.openshift.io/serving-cert-secret-name"])
}
if len(service.OwnerReferences) != 1 {
t.Fatalf("Expected 1 owner reference, got %d", len(service.OwnerReferences))
}
ownerRef := service.OwnerReferences[0]
if ownerRef.Kind != "KubeDescheduler" || ownerRef.Name != descheduler.Name {
t.Errorf("Expected owner reference to KubeDescheduler/%s, got %s/%s", descheduler.Name, ownerRef.Kind, ownerRef.Name)
}
} else if tt.wantDeleted {
if service != nil {
t.Errorf("Expected service to be nil when deleted, got %v", service)
}
}
})
}
}

func TestManageSoftTainterServiceMonitor(t *testing.T) {
ctx, cancelFunc := context.WithCancel(context.TODO())
defer cancelFunc()

descheduler := &deschedulerv1.KubeDescheduler{
TypeMeta: metav1.TypeMeta{APIVersion: "operator.openshift.io/v1", Kind: "KubeDescheduler"},
ObjectMeta: metav1.ObjectMeta{
Name: "cluster",
Namespace: operatorclient.OperatorNamespace,
UID: "test-uid",
},
Spec: deschedulerv1.KubeDeschedulerSpec{
Profiles: []deschedulerv1.DeschedulerProfile{deschedulerv1.DevKubeVirtRelieveAndMigrate},
},
}

tests := []struct {
name string
enabled bool
}{
{
name: "ServiceMonitor created when softtainter enabled",
enabled: true,
},
{
name: "ServiceMonitor deleted when softtainter disabled",
enabled: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
targetConfigReconciler, _ := initTargetConfigReconciler(ctx, nil, nil, nil, nil)

_, err := targetConfigReconciler.manageSoftTainterServiceMonitor(descheduler, tt.enabled)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}

})
}
}
Loading