Skip to content

Commit 4cb992b

Browse files
authored
Merge pull request #347 from mrsabath/fix/weather-e2e-progress-deadline
authbridge(weather): fix Kind E2E (Deployment progress deadline)
2 parents 36e4524 + 9bd9506 commit 4cb992b

3 files changed

Lines changed: 20 additions & 8 deletions

File tree

authbridge/demos/weather-agent/deploy_and_verify_advanced.sh

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515
# SKIP_DEPLOY=1 ./deploy_and_verify_advanced.sh # verify only (resources must exist)
1616
#
1717
# Timeouts (optional, for slow clusters / GitHub Kind: image pull + many sidecars):
18-
# WEATHER_TOOL_ROLLOUT_TIMEOUT kubectl rollout status for the tool (default: 900s)
19-
# WEATHER_AGENT_ROLLOUT_TIMEOUT kubectl rollout status for the agent (default: 600s)
20-
# WEATHER_TOOL_KC_CLIENT_SEC setup_keycloak --tool-client-timeout, seconds (default: 600)
18+
# WEATHER_TOOL_ROLLOUT_TIMEOUT kubectl rollout status for the tool (default: 1800s;
19+
# should be >= spec.progressDeadlineSeconds in the YAML)
20+
# WEATHER_AGENT_ROLLOUT_TIMEOUT kubectl rollout status for the agent (default: 1800s)
21+
# WEATHER_TOOL_KC_CLIENT_SEC setup_keycloak --tool-client-timeout, seconds (default: 900)
2122
#
2223
set -euo pipefail
2324

@@ -37,11 +38,11 @@ KC_REALM="${KC_REALM:-kagenti}"
3738
KC_USER_CLIENT_ID="${KC_USER_CLIENT_ID:-weather-advanced-e2e}"
3839
# For confidential "kagenti" UI client, set KC_USER_CLIENT_SECRET in the environment.
3940
#
40-
# Rollout: defaults are higher than 5m so Kind CI (cold ghcr.io pulls + 4+ containers) does
41-
# not fail on kubectl rollout status. Override when testing on fast clusters.
42-
WEATHER_TOOL_ROLLOUT_TIMEOUT="${WEATHER_TOOL_ROLLOUT_TIMEOUT:-900s}"
43-
WEATHER_AGENT_ROLLOUT_TIMEOUT="${WEATHER_AGENT_ROLLOUT_TIMEOUT:-600s}"
44-
WEATHER_TOOL_KC_CLIENT_SEC="${WEATHER_TOOL_KC_CLIENT_SEC:-600}"
41+
# Rollout: align with spec.progressDeadlineSeconds: 1800 on the tool/agent Deployments (Kind
42+
# can exceed 600s default) and with kubectl --timeout below.
43+
WEATHER_TOOL_ROLLOUT_TIMEOUT="${WEATHER_TOOL_ROLLOUT_TIMEOUT:-1800s}"
44+
WEATHER_AGENT_ROLLOUT_TIMEOUT="${WEATHER_AGENT_ROLLOUT_TIMEOUT:-1800s}"
45+
WEATHER_TOOL_KC_CLIENT_SEC="${WEATHER_TOOL_KC_CLIENT_SEC:-900}"
4546

4647
log() { printf '%s\n' "$*"; }
4748
die() { printf 'ERROR: %s\n' "$*" >&2; exit 1; }

authbridge/demos/weather-agent/k8s/weather-service-advanced.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@ metadata:
2121
labels:
2222
app.kubernetes.io/name: weather-service-advanced
2323
spec:
24+
# Match tool deployment: slow Kind rollouts can hit the 600s default.
25+
progressDeadlineSeconds: 1800
2426
replicas: 1
27+
strategy:
28+
type: Recreate
2529
selector:
2630
matchLabels:
2731
app.kubernetes.io/name: weather-service-advanced

authbridge/demos/weather-agent/k8s/weather-tool-advanced.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,14 @@ metadata:
3131
protocol.kagenti.io/mcp: ""
3232
kagenti.io/transport: streamable_http
3333
spec:
34+
# Default Deployment progress deadline is 600s; cold ghcr + webhook restart + many
35+
# sidecars in Kind can exceed that before the old ReplicaSet pod drains.
36+
progressDeadlineSeconds: 1800
3437
replicas: 1
38+
# Default RollingUpdate can leave the new pod Pending (Insufficient cpu) on a
39+
# single Kind node while the old multi-sidecar pod still runs. Recreate serializes.
40+
strategy:
41+
type: Recreate
3542
selector:
3643
matchLabels:
3744
app.kubernetes.io/name: weather-tool-advanced

0 commit comments

Comments
 (0)