Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: runwhen.com/v1
kind: GenerationRules
spec:
generationRules:
- resourceTypes:
- deployment
matchRules:
- type: pattern
pattern: ".+"
properties: [name]
mode: substring
slxs:
- baseName: k8s-dep-rollout
shortenedBaseName: k8s-dep-rollout
qualifiers: ["resource", "namespace", "cluster"]
baseTemplateName: k8s-deployment-rollout-troubleshoot
levelOfDetail: basic
outputItems:
- type: slx
- type: sli
- type: runbook
templateName: k8s-deployment-rollout-troubleshoot-taskset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelIndicator
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
displayUnitsLong: Execution Status
displayUnitsShort: exe
locations:
- {{default_location}}
description: Periodically triggers k8s-deployment-rollout-troubleshoot runbook for deployment {{match_resource.resource.metadata.name}}.
codeBundle:
repoUrl: https://github.com/runwhen-contrib/rw-workspace-utils.git
ref: main
pathToRobot: codebundles/cron-scheduler-sli/sli.robot
intervalStrategy: intermezzo
intervalSeconds: 300
configProvided:
- name: CRON_SCHEDULE
value: "0 */6 * * *"
- name: TARGET_SLX
value: ""
- name: DRY_RUN
value: "false"
secretsProvided: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelX
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/kubernetes/resources/labeled/deploy.svg
alias: {{match_resource.resource.metadata.name}} Rollout Troubleshoot
asMeasuredBy: Periodic execution of rollout troubleshoot tasks when rollout health degrades.
configProvided:
- name: DEPLOYMENT_NAME
value: {{match_resource.resource.metadata.name}}
owners:
- {{workspace.owner_email}}
statement: Deployment {{match_resource.resource.metadata.name}} rollouts should complete successfully without stalled or failed revisions.
additionalContext:
{% include "kubernetes-hierarchy.yaml" ignore missing %}
qualified_name: "{{ match_resource.qualified_name }}"
tags:
{% include "kubernetes-tags.yaml" ignore missing %}
- name: access
value: read-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
apiVersion: runwhen.com/v1
kind: Runbook
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
location: {{default_location}}
description: Read-only diagnostics for Kubernetes deployment rollout lifecycle failures.
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/k8s-deployment-rollout-troubleshoot/runbook.robot
configProvided:
- name: NAMESPACE
value: "{{match_resource.resource.metadata.namespace}}"
- name: CONTEXT
value: "{{context}}"
- name: DEPLOYMENT_NAME
value: "{{match_resource.resource.metadata.name}}"
- name: KUBERNETES_DISTRIBUTION_BINARY
value: "{{custom.kubernetes_distribution_binary | default('kubectl')}}"
- name: EVENT_AGE
value: "30m"
- name: ROLLOUT_STATUS_TIMEOUT
value: "30"
- name: STUCK_TERMINATING_THRESHOLD
value: "5"
secretsProvided:
{% if wb_version %}
{% include "kubernetes-auth.yaml" ignore missing %}
{% else %}
- name: kubeconfig
workspaceKey: {{custom.kubeconfig_secret_name | default("kubeconfig")}}
{% endif %}
15 changes: 15 additions & 0 deletions codebundles/k8s-deployment-rollout-troubleshoot/.test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Test Infrastructure for k8s-deployment-rollout-troubleshoot

Apply manifests with `task build-infra` to create test deployments covering rollout troubleshoot scenarios in the `test-rollout-troubleshoot` namespace.

## Scenarios

| Deployment | Scenario |
|---|---|
| `healthy-rollout` | Complete healthy rollout |
| `progress-deadline-fail` | ProgressDeadlineExceeded via failing readiness probe |
| `pdb-blocked-rollout` | PDB minAvailable blocks eviction during rollout |
| `image-pull-fail` | ImagePullBackOff on bad image tag |
| `stuck-terminating-seed` | Long preStop hook for terminating pod testing |

Run `task default` after committing and pushing changes to validate generation rules via RunWhen Local discovery.
178 changes: 178 additions & 0 deletions codebundles/k8s-deployment-rollout-troubleshoot/.test/Taskfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
version: "3"

tasks:
default:
desc: "Run/refresh config"
cmds:
- task: check-unpushed-commits
- task: generate-rwl-config
- task: run-rwl-discovery

clean:
desc: "Run cleanup tasks"
cmds:
- task: remove-kubernetes-objects
- task: delete-slxs
- task: clean-rwl-discovery

build-infra:
desc: "Build test infrastructure"
cmds:
- task: create-kubernetes-objects

create-kubernetes-objects:
desc: "Apply manifests from kubernetes directory using kubectl"
cmds:
- kubectl apply -f kubernetes/*
silent: true

remove-kubernetes-objects:
desc: "Delete kubernetes objects"
cmds:
- kubectl delete -f kubernetes/* --ignore-not-found
silent: true

check-unpushed-commits:
desc: Check if outstanding commits or file updates need to be pushed before testing.
vars:
BASE_DIR: "../"
cmds:
- |
echo "Checking for uncommitted changes in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..."
UNCOMMITTED_FILES=$(git diff --name-only HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
if [ -n "$UNCOMMITTED_FILES" ]; then
echo "✗"
echo "Uncommitted changes found:"
echo "$UNCOMMITTED_FILES"
echo "Remember to commit & push changes before executing the run-rwl-discovery task."
echo "------------"
exit 1
else
echo "√"
echo "No uncommitted changes in specified directories."
echo "------------"
fi
- |
echo "Checking for unpushed commits in $BASE_DIR and $BASE_DIR.runwhen, excluding '.test'..."
git fetch origin
UNPUSHED_FILES=$(git diff --name-only origin/$(git rev-parse --abbrev-ref HEAD) HEAD | grep -E "^${BASE_DIR}(\.runwhen|[^/]+)" | grep -v "/\.test/" || true)
if [ -n "$UNPUSHED_FILES" ]; then
echo "✗"
echo "Unpushed commits found:"
echo "$UNPUSHED_FILES"
echo "Remember to push changes before executing the run-rwl-discovery task."
echo "------------"
exit 1
else
echo "√"
echo "No unpushed commits in specified directories."
echo "------------"
fi
silent: true

generate-rwl-config:
desc: "Generate RunWhen Local configuration (workspaceInfo.yaml)"
env:
RW_WORKSPACE: '{{.RW_WORKSPACE | default "my-workspace"}}'
cmds:
- |
repo_url=$(git config --get remote.origin.url)
branch_name=$(git rev-parse --abbrev-ref HEAD)
codebundle=$(basename "$(dirname "$PWD")")

namespace=$(yq e 'select(.kind == "Namespace") | .metadata.name' kubernetes/manifest.yaml -N)
cat <<EOF > workspaceInfo.yaml
workspaceName: "$RW_WORKSPACE"
workspaceOwnerEmail: authors@runwhen.com
defaultLocation: location-01
defaultLOD: none
cloudConfig:
kubernetes:
kubeconfigFile: /shared/kubeconfig
namespaceLODs:
$namespace: detailed
namespaces:
- $namespace
codeCollections:
- repoURL: "$repo_url"
branch: "$branch_name"
codeBundles: ["$codebundle"]
custom:
kubeconfig_secret_name: "kubeconfig"
kubernetes_distribution_binary: kubectl
EOF
silent: true

run-rwl-discovery:
desc: "Run RunWhen Local Discovery on test infrastructure"
cmds:
- |
CONTAINER_NAME="RunWhenLocal"
if docker ps -q --filter "name=$CONTAINER_NAME" | grep -q .; then
echo "Stopping and removing existing container $CONTAINER_NAME..."
docker stop $CONTAINER_NAME && docker rm $CONTAINER_NAME
elif docker ps -a -q --filter "name=$CONTAINER_NAME" | grep -q .; then
echo "Removing existing stopped container $CONTAINER_NAME..."
docker rm $CONTAINER_NAME
else
echo "No existing container named $CONTAINER_NAME found."
fi

echo "Cleaning up output directory..."
sudo rm -rf output || { echo "Failed to remove output directory"; exit 1; }
mkdir output && chmod 777 output || { echo "Failed to set permissions"; exit 1; }

echo "Starting new container $CONTAINER_NAME..."

kubeconfig=$(echo $RW_FROM_FILE | jq -r .kubeconfig)

docker run --name $CONTAINER_NAME -p 8081:8081 \
-v "$(pwd)":/shared \
-v $kubeconfig:/shared/kubeconfig \
-d ghcr.io/runwhen-contrib/runwhen-local:latest || {
echo "Failed to start container"; exit 1;
}

echo "Running workspace builder script in container..."
docker exec -w /workspace-builder $CONTAINER_NAME ./run.sh $1 --verbose || {
echo "Error executing script in container"; exit 1;
}

echo "Review generated config files under output/workspaces/"
silent: true

validate-generation-rules:
desc: "Validate YAML files in .runwhen/generation-rules"
cmds:
- |
for cmd in curl yq ajv; do
if ! command -v $cmd &> /dev/null; then
echo "Error: $cmd is required but not installed."
exit 1
fi
done

temp_dir=$(mktemp -d)
curl -s -o "$temp_dir/generation-rule-schema.json" \
https://raw.githubusercontent.com/runwhen-contrib/runwhen-local/refs/heads/main/src/generation-rule-schema.json

for yaml_file in ../.runwhen/generation-rules/*.yaml; do
echo "Validating $yaml_file"
json_file="$temp_dir/$(basename "${yaml_file%.*}.json")"
yq -o=json "$yaml_file" > "$json_file"
ajv validate -s "$temp_dir/generation-rule-schema.json" -d "$json_file" \
--spec=draft2020 --strict=false \
&& echo "$yaml_file is valid." || echo "$yaml_file is invalid."
done

rm -rf "$temp_dir"
silent: true

clean-rwl-discovery:
desc: "Check and clean up RunWhen Local discovery output"
cmds:
- |
sudo rm -rf output
rm -f workspaceInfo.yaml
rm -f kubeconfig
silent: true
Loading
Loading