Skip to content

Commit ecdb75f

Browse files
committed
fix(#5063): deterministically order env vars in DeployWorkload
DeployWorkload built the StatefulSet pod's env list by iterating a Go map[string]string directly. Map iteration order is randomized by the runtime spec, so the resulting []EnvVar ordering changed between calls even when the contents were identical. Kubernetes hashes env list order into the pod template hash, so each reorder triggered a no-op StatefulSet rollout. In a deployed MCPServer this surfaced as repeated pod recreations after operator pod restarts, with secondary fallout: during the bridge recreation window the proxy runner's /health pinger blocks for its 5s timeout, racing the kubelet liveness probe (also 5s by default) and killing the proxy container. Each restart triggered another reconcile which re-iterated the env map and produced yet another order. Sort the keys before iterating so the env list is stable. Add a regression test that runs DeployWorkload 10+ times and asserts the StatefulSet's mcp container env order matches a fixed sorted slice; without the fix it fails reliably within a few iterations. Same class of bug as #4783 (vMCP discovery) and #3450 (vMCP server ordering), on the MCPServer code path.
1 parent 624c6d0 commit ecdb75f

2 files changed

Lines changed: 101 additions & 4 deletions

File tree

pkg/container/kubernetes/client.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"io"
1313
"log/slog"
1414
"os"
15+
"sort"
1516
"strconv"
1617
"strings"
1718
"time"
@@ -357,10 +358,18 @@ func (c *Client) DeployWorkload(ctx context.Context,
357358

358359
attachStdio := options == nil || options.AttachStdio
359360

360-
// Convert environment variables to Kubernetes format
361-
var envVarList []*corev1apply.EnvVarApplyConfiguration
362-
for k, v := range envVars {
363-
envVarList = append(envVarList, corev1apply.EnvVar().WithName(k).WithValue(v))
361+
// Convert environment variables to Kubernetes format.
362+
// Sort keys so the resulting list is deterministic — Go map iteration
363+
// is randomized, and any ordering shift here changes the pod template
364+
// hash and triggers an unnecessary StatefulSet rollout (#5063).
365+
envKeys := make([]string, 0, len(envVars))
366+
for k := range envVars {
367+
envKeys = append(envKeys, k)
368+
}
369+
sort.Strings(envKeys)
370+
envVarList := make([]*corev1apply.EnvVarApplyConfiguration, 0, len(envKeys))
371+
for _, k := range envKeys {
372+
envVarList = append(envVarList, corev1apply.EnvVar().WithName(k).WithValue(envVars[k]))
364373
}
365374

366375
// Create a pod template spec

pkg/container/kubernetes/client_test.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,94 @@ func TestDeployWorkloadCreatesBackendServices(t *testing.T) {
894894
}
895895
}
896896

897+
// TestDeployWorkloadEnvVarOrderingDeterministic verifies that DeployWorkload
898+
// produces an env list in deterministic (sorted) order regardless of Go map
899+
// iteration randomness. Without this, the StatefulSet pod template hash
900+
// flapped between reconciles, triggering needless pod rollouts (#5063).
901+
func TestDeployWorkloadEnvVarOrderingDeterministic(t *testing.T) {
902+
t.Parallel()
903+
904+
containerName := "test-svc"
905+
envVars := map[string]string{
906+
"ULI_MCP_TOKEN": "tok",
907+
"MCP_PORT": "8080",
908+
"MCP_TRANSPORT": "streamable-http",
909+
"MCP_HOST": "0.0.0.0",
910+
"FASTMCP_PORT": "8080",
911+
}
912+
913+
deploy := func() []corev1.EnvVar {
914+
mockStatefulSet := &appsv1.StatefulSet{
915+
ObjectMeta: metav1.ObjectMeta{
916+
Name: containerName, Namespace: "default", UID: "test-uid",
917+
},
918+
Spec: appsv1.StatefulSetSpec{
919+
Replicas: ptr.To(int32(1)),
920+
Template: corev1.PodTemplateSpec{Spec: corev1.PodSpec{Containers: []corev1.Container{}}},
921+
},
922+
Status: appsv1.StatefulSetStatus{ReadyReplicas: 1},
923+
}
924+
clientset := fake.NewClientset(mockStatefulSet)
925+
fakeConfig := &rest.Config{Host: "https://fake-k8s-api.example.com"}
926+
mockDetector := &mockPlatformDetector{platform: PlatformKubernetes}
927+
928+
client := NewClientWithConfigAndPlatformDetector(clientset, fakeConfig, mockDetector)
929+
client.waitForStatefulSetReadyFunc = mockWaitForStatefulSetReady
930+
client.namespaceFunc = func() string { return "default" }
931+
932+
options := runtime.NewDeployWorkloadOptions()
933+
options.PortBindings = map[string][]runtime.PortBinding{"8080/tcp": {{HostPort: "8080"}}}
934+
935+
_, err := client.DeployWorkload(
936+
t.Context(),
937+
"test-image",
938+
containerName,
939+
[]string{"serve"},
940+
envVars,
941+
map[string]string{"app": containerName},
942+
nil,
943+
"streamable-http",
944+
options,
945+
false,
946+
)
947+
require.NoError(t, err)
948+
949+
sts, err := clientset.AppsV1().StatefulSets("default").Get(t.Context(), containerName, metav1.GetOptions{})
950+
require.NoError(t, err)
951+
var mcp *corev1.Container
952+
for i := range sts.Spec.Template.Spec.Containers {
953+
if sts.Spec.Template.Spec.Containers[i].Name == mcpContainerName {
954+
mcp = &sts.Spec.Template.Spec.Containers[i]
955+
break
956+
}
957+
}
958+
require.NotNil(t, mcp, "mcp container should exist on the StatefulSet")
959+
return mcp.Env
960+
}
961+
962+
// Sorted ordering is the contract; assert against it explicitly so the
963+
// reason for the order is documented in the test, not just stable across runs.
964+
expected := []string{"FASTMCP_PORT", "MCP_HOST", "MCP_PORT", "MCP_TRANSPORT", "ULI_MCP_TOKEN"}
965+
966+
first := deploy()
967+
firstNames := make([]string, len(first))
968+
for i, e := range first {
969+
firstNames[i] = e.Name
970+
}
971+
assert.Equal(t, expected, firstNames, "first DeployWorkload should produce sorted env order")
972+
973+
// Run multiple times to catch any non-determinism that would slip through
974+
// a single-call test (Go map iteration order is randomized per call).
975+
for i := 0; i < 10; i++ {
976+
got := deploy()
977+
gotNames := make([]string, len(got))
978+
for j, e := range got {
979+
gotNames[j] = e.Name
980+
}
981+
assert.Equal(t, expected, gotNames, "iteration %d: env order must be deterministic", i)
982+
}
983+
}
984+
897985
// TestAttachToWorkloadExitFunc tests that the exit function is properly configured
898986
// and can be mocked for testing
899987
func TestAttachToWorkloadExitFunc(t *testing.T) {

0 commit comments

Comments
 (0)