Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ build.docker.ghcr: build.multiarch

# Local build target for e2e testing (single arch with --load)
build.docker.local: build.multiarch
docker buildx build --platform linux/amd64 -t "$(IMAGE):$(TAG)" -f Dockerfile.ghcr --load .
docker buildx build --platform linux/$(shell docker version --format '{{.Server.Arch}}') -t "$(IMAGE):$(TAG)" -f Dockerfile.ghcr --load .

build.push.ghcr: build.multiarch
docker buildx build --platform linux/amd64,linux/arm64 -t "$(IMAGE):$(TAG)" -f Dockerfile.ghcr --push .
146 changes: 146 additions & 0 deletions cmd/e2e/basic_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package main

import (
"context"
"fmt"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
zv1 "github.com/zalando-incubator/es-operator/pkg/apis/zalando.org/v1"
appsv1 "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)

type TestEDSSpecFactory struct {
Expand Down Expand Up @@ -133,3 +137,145 @@ func TestEDSCreateBasic9(t *testing.T) {
err := deleteEDS(edsName)
require.NoError(t, err)
}

func TestPodLabelMigration(t *testing.T) {
t.Parallel()
edsName := "e2e-migrate"
version := "8.6.2"
configMap := "es8-config"

// Create EDS first
testEDSCreate(t, edsName, version, configMap)

// Wait for pods to be created and ready
require.Eventually(t, func() bool {
pods, err := kubernetesClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: "es-operator-dataset=" + edsName,
})
if err != nil {
return false
}
return len(pods.Items) > 0
}, 60*time.Second, 2*time.Second, "pods for EDS not created with labels")

// Get the initial pods
pods, err := kubernetesClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: "es-operator-dataset=" + edsName,
})
require.NoError(t, err)
require.NotEmpty(t, pods.Items, "No pods found with the expected label")

// Verify the label is initially present on all pods (this tests template injection)
for _, pod := range pods.Items {
labelValue, exists := pod.Labels["es-operator-dataset"]
assert.True(t, exists, "Pod %s missing es-operator-dataset label", pod.Name)
assert.Equal(t, edsName, labelValue, "Pod %s has incorrect label value", pod.Name)
}

// Test the migration path by simulating a pre-migration scenario
t.Logf("Testing migration path: removing labels from %d pods", len(pods.Items))
var unlabeledPods []string

// Remove labels from all pods to simulate pre-migration state
for _, pod := range pods.Items {
patch := []byte(`{"metadata":{"labels":{"es-operator-dataset":null}}}`)
_, err := kubernetesClient.CoreV1().Pods(namespace).Patch(
context.Background(),
pod.Name,
types.StrategicMergePatchType,
patch,
metav1.PatchOptions{},
)
require.NoError(t, err, "Failed to remove label from pod %s", pod.Name)
unlabeledPods = append(unlabeledPods, pod.Name)
}

// Verify labels were removed
require.Eventually(t, func() bool {
for _, podName := range unlabeledPods {
pod, err := kubernetesClient.CoreV1().Pods(namespace).Get(context.Background(), podName, metav1.GetOptions{})
if err != nil {
return false
}
if _, exists := pod.Labels["es-operator-dataset"]; exists {
return false
}
}
return true
}, 30*time.Second, 2*time.Second, "Labels were not removed from pods")

// Remove migration completion annotation to force re-migration
eds, err := edsInterface().Get(context.Background(), edsName, metav1.GetOptions{})
require.NoError(t, err)

migrationAnnotationKey := "es-operator.zalando.org/pod-label-migration-complete"
if eds.Annotations != nil && eds.Annotations[migrationAnnotationKey] != "" {
patch := []byte(fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}}`, migrationAnnotationKey))
_, err := edsInterface().Patch(
context.Background(),
edsName,
types.StrategicMergePatchType,
patch,
metav1.PatchOptions{},
)
require.NoError(t, err, "Failed to remove migration annotation from EDS")
t.Logf("Removed migration annotation to trigger fresh migration")
}

// Restart the operator to trigger migration logic (which runs during startup)
// This is more realistic than creating a temporary EDS
t.Logf("Restarting operator to trigger pod label migration during startup")
err = restartOperator(t)
require.NoError(t, err, "Failed to restart operator")

// Verify that the operator actually performed the migration by checking that labels were restored
t.Logf("Verifying that operator migration restored labels automatically")

// Verify all pods have labels restored by the operator migration
require.Eventually(t, func() bool {
pods, err := kubernetesClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: "es-operator-dataset=" + edsName,
})
if err != nil {
t.Logf("Failed to list pods with label selector: %v", err)
return false
}
t.Logf("Found %d pods with restored labels (expected %d)", len(pods.Items), len(unlabeledPods))
return len(pods.Items) == len(unlabeledPods)
}, 60*time.Second, 2*time.Second, "Not all pods have labels restored by operator migration")

// Verify migration annotation was added by the operator
require.Eventually(t, func() bool {
eds, err := edsInterface().Get(context.Background(), edsName, metav1.GetOptions{})
if err != nil {
t.Logf("Failed to get EDS: %v", err)
return false
}
if eds.Annotations == nil {
t.Logf("EDS has no annotations yet")
return false
}
annotationValue := eds.Annotations[migrationAnnotationKey]
t.Logf("Migration annotation value: %s", annotationValue)
return annotationValue == "true"
}, 60*time.Second, 2*time.Second, "Migration annotation was not set by operator")

// Final verification - ensure all pods have the correct label
pods, err = kubernetesClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: "es-operator-dataset=" + edsName,
})
require.NoError(t, err)
require.Equal(t, len(unlabeledPods), len(pods.Items), "Mismatch in number of labeled pods")

for _, pod := range pods.Items {
labelValue, exists := pod.Labels["es-operator-dataset"]
assert.True(t, exists, "Pod %s missing es-operator-dataset label after migration", pod.Name)
assert.Equal(t, edsName, labelValue, "Pod %s has incorrect label value after migration", pod.Name)
}

t.Logf("Migration test completed successfully - operator automatically restored labels and set annotation")

// Cleanup
err = deleteEDS(edsName)
require.NoError(t, err)
}
77 changes: 72 additions & 5 deletions cmd/e2e/test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,18 @@ var (
},
},
Env: []v1.EnvVar{
{Name: "ES_JAVA_OPTS", Value: "-Xms356m -Xmx356m"},
{Name: "ES_JAVA_OPTS", Value: "-XX:MaxDirectMemorySize=10M -XX:MaxMetaspaceSize=155273K -XX:ReservedCodeCacheSize=240M -Xss1M -Xms684406K -Xmx684406K -XX:ActiveProcessorCount=2 -Xlog:gc"},
{Name: "node.roles", Value: "data"},
{Name: "node.attr.group", Value: nodeGroup},
},
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("1Gi"),
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("1120Mi"),
v1.ResourceCPU: resource.MustParse("2000m"),
},
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("1Gi"),
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("1120Mi"),
v1.ResourceCPU: resource.MustParse("50m"),
},
},
ReadinessProbe: &v1.Probe{
Expand Down Expand Up @@ -315,3 +315,70 @@ func pint64(i int64) *int64 {
func pint32(i int32) *int32 {
return &i
}

// restartOperator restarts the es-operator deployment using rollout restart
// This triggers the migration logic that runs during operator startup
func restartOperator(t *testing.T) error {
// Find the operator deployment
deployment, err := kubernetesClient.AppsV1().Deployments(namespace).Get(
context.Background(),
"es-operator",
metav1.GetOptions{},
)
if err != nil {
return fmt.Errorf("failed to get operator deployment: %v", err)
}

// Trigger a deployment rollout restart by adding/updating a restart annotation
// This is the equivalent of `kubectl rollout restart deployment/es-operator`
if deployment.Spec.Template.Annotations == nil {
deployment.Spec.Template.Annotations = make(map[string]string)
}
deployment.Spec.Template.Annotations["kubectl.kubernetes.io/restartedAt"] = fmt.Sprintf("%d", time.Now().Unix())

t.Logf("Triggering rollout restart of deployment %s", deployment.Name)
_, err = kubernetesClient.AppsV1().Deployments(namespace).Update(
context.Background(),
deployment,
metav1.UpdateOptions{},
)
if err != nil {
return fmt.Errorf("failed to update deployment for restart: %v", err)
}

// Wait for the deployment to recreate pods and become ready
t.Logf("Waiting for operator deployment to become ready after restart")
err = newAwaiter(t, "operator deployment restart").
withTimeout(2 * time.Minute).
withPoll(func() (bool, error) {
// Check if deployment is ready
currentDeployment, err := kubernetesClient.AppsV1().Deployments(namespace).Get(
context.Background(),
"es-operator",
metav1.GetOptions{},
)
if err != nil {
return true, fmt.Errorf("failed to get deployment status: %v", err)
}

if currentDeployment.Status.ReadyReplicas == *deployment.Spec.Replicas {
t.Logf("Operator deployment is ready with %d/%d replicas",
currentDeployment.Status.ReadyReplicas, *deployment.Spec.Replicas)
return false, nil
}

t.Logf("Operator deployment not ready yet: %d/%d replicas ready",
currentDeployment.Status.ReadyReplicas, *deployment.Spec.Replicas)
return true, fmt.Errorf("deployment not ready: %d/%d replicas",
currentDeployment.Status.ReadyReplicas, *deployment.Spec.Replicas)
}).await()

if err != nil {
return fmt.Errorf("operator failed to restart: %v", err)
}

// Give the operator a moment to start processing after becoming ready
t.Logf("Operator restarted successfully, waiting for migration processing")
time.Sleep(10 * time.Second)
return nil
}
14 changes: 14 additions & 0 deletions deploy/e2e/apply/es8-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,20 @@ data:
bootstrap.memory_lock: false
discovery.seed_hosts: [es8-master]
cluster.initial_master_nodes: [es8-master-0]
cluster.no_master_block: "write"
gateway.expected_data_nodes: 0
indices.breaker.total.use_real_memory: false
indices.lifecycle.history_index_enabled: false
slm.history_index_enabled: false

# Disable X-Pack features
xpack.security.enabled: false
xpack.security.transport.ssl.enabled: false
xpack.security.http.ssl.enabled: false
xpack.ml.enabled: false
xpack.watcher.enabled: false
xpack.graph.enabled: false
xpack.ccr.enabled: false

# Disable EQL
xpack.eql.enabled: false
6 changes: 3 additions & 3 deletions deploy/e2e/apply/es8-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,18 @@ spec:
resources:
requests:
memory: 1120Mi
cpu: 100m
cpu: 50m
limits:
memory: 1120Mi
cpu: 100m
cpu: 2000m
image: "docker.elastic.co/elasticsearch/elasticsearch:8.19.5"
env:
- name: "node.name"
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: "ES_JAVA_OPTS"
value: "-Xms560m -Xmx560m"
value: "-XX:MaxDirectMemorySize=10M -XX:MaxMetaspaceSize=155273K -XX:ReservedCodeCacheSize=240M -Xss1M -Xms684406K -Xmx684406K -XX:ActiveProcessorCount=2 -XX:+UseCompactObjectHeaders -Xlog:gc"
- name: node.roles
value: "master,data"
readinessProbe:
Expand Down
14 changes: 14 additions & 0 deletions deploy/e2e/apply/es9-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,20 @@ data:
bootstrap.memory_lock: false
discovery.seed_hosts: [es9-master]
cluster.initial_master_nodes: [es9-master-0]
cluster.no_master_block: "write"
gateway.expected_data_nodes: 0
indices.breaker.total.use_real_memory: false
indices.lifecycle.history_index_enabled: false
slm.history_index_enabled: false

# Disable X-Pack features
xpack.security.enabled: false
xpack.security.transport.ssl.enabled: false
xpack.security.http.ssl.enabled: false
xpack.ml.enabled: false
xpack.watcher.enabled: false
xpack.graph.enabled: false
xpack.ccr.enabled: false

# Disable EQL
xpack.eql.enabled: false
6 changes: 3 additions & 3 deletions deploy/e2e/apply/es9-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@ spec:
resources:
requests:
memory: 1120Mi
cpu: 100m
cpu: 50m
limits:
memory: 1120Mi
cpu: 100m
cpu: 2000m
image: "docker.elastic.co/elasticsearch/elasticsearch:9.1.5"
env:
- name: "node.name"
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: "ES_JAVA_OPTS"
value: "-Xmx560m -Xms560m"
value: "-XX:MaxDirectMemorySize=10M -XX:MaxMetaspaceSize=155273K -XX:ReservedCodeCacheSize=240M -Xss1M -Xms684406K -Xmx684406K -XX:ActiveProcessorCount=2 -XX:+UseCompactObjectHeaders -Xlog:gc"
- name: node.roles
value: "master,data"
readinessProbe:
Expand Down
1 change: 1 addition & 0 deletions deploy/e2e/apply/zalando.org_elasticsearchdatasets.yaml
Loading
Loading