runwhen-contrib · theyashl · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
@@ -0,0 +1,33 @@
+# oVirt is not itself a RunWhen-discoverable platform, so this rule anchors on the
+# Kubernetes `cluster` resource purely as a generation trigger: it emits one
+# oVirt engine-health SLX per discovered cluster. All SLX/SLI/runbook content
+# comes from the workspaceInfo `custom.*` values (ovirt_engine_url, ovirt_engine_name)
+# and workspace secrets (ovirt_username, ovirt_password, optional ovirt_ca_cert),
+# not from the matched cluster.
+#
+# Requires the workspace to discover at least one Kubernetes cluster. If multiple
+# clusters are discovered, one oVirt SLX is generated per cluster.
+apiVersion: runwhen.com/v1
+kind: GenerationRules
+spec:
+  platform: kubernetes
+  generationRules:
+    - resourceTypes:
+        - cluster
+      matchRules:
+        - type: and
+          matches:
+            - type: pattern
+              pattern: ".+"
+              properties: [name]
+              mode: substring
+      slxs:
+        - baseName: ovirt-engine-health
+          qualifiers: ["cluster"]
+          baseTemplateName: ovirt-engine-health
+          levelOfDetail: detailed
+          outputItems:
+            - type: slx
+            - type: sli
+            - type: runbook
+              templateName: ovirt-engine-health-taskset.yaml
@@ -0,0 +1,44 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelIndicator
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  displayUnitsLong: OK
+  displayUnitsShort: ok
+  locations:
+      - {{default_location}}
+  description: The composite health score of the oVirt engine environment.
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/ovirt-engine-health/sli.robot
+  intervalStrategy: intermezzo
+  intervalSeconds: 600
+  configProvided:
+    - name: OVIRT_ENGINE_URL
+      value: {{custom.ovirt_engine_url}}
+  secretsProvided:
+    - name: OVIRT_USERNAME
+      workspaceKey: {{custom.ovirt_username}}
+    - name: OVIRT_PASSWORD
+      workspaceKey: {{custom.ovirt_password}}
+    {% if custom.ovirt_ca_cert %}
+    - name: OVIRT_CA_CERT
+      workspaceKey: {{custom.ovirt_ca_cert}}
+    {% endif %}
+  alertConfig:
+    tasks:
+      persona: eager-edgar
+      sessionTTL: 10m
@@ -0,0 +1,19 @@
+apiVersion: runwhen.com/v1
+kind: ServiceLevelX
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/ovirt.svg
+  alias: {{custom.ovirt_engine_name}} oVirt Engine Health
+  asMeasuredBy: The composite health score of the oVirt engine (hosts, VMs, storage, clusters, events, snapshots).
+  configProvided:
+  - name: OVIRT_ENGINE_URL
+    value: {{custom.ovirt_engine_url}}
+  owners:
+  - {{workspace.owner_email}}
+  statement: The oVirt engine and its hosts, VMs, and storage domains should be healthy.
+  additionalContext: []
@@ -0,0 +1,34 @@
+apiVersion: runwhen.com/v1
+kind: Runbook
+metadata:
+  name: {{slx_name}}
+  labels:
+    {% include "common-labels.yaml" %}
+  annotations:
+    {% include "common-annotations.yaml" %}
+spec:
+  location: {{default_location}}
+  codeBundle:
+    {% if repo_url %}
+    repoUrl: {{repo_url}}
+    {% else %}
+    repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
+    {% endif %}
+    {% if ref %}
+    ref: {{ref}}
+    {% else %}
+    ref: main
+    {% endif %}
+    pathToRobot: codebundles/ovirt-engine-health/runbook.robot
+  configProvided:
+    - name: OVIRT_ENGINE_URL
+      value: {{custom.ovirt_engine_url}}
+  secretsProvided:
+    - name: OVIRT_USERNAME
+      workspaceKey: {{custom.ovirt_username}}
+    - name: OVIRT_PASSWORD
+      workspaceKey: {{custom.ovirt_password}}
+    {% if custom.ovirt_ca_cert %}
+    - name: OVIRT_CA_CERT
+      workspaceKey: {{custom.ovirt_ca_cert}}
+    {% endif %}
@@ -0,0 +1,4 @@
+.env
+output-sli/
+output-runbook/
+*.pem
@@ -0,0 +1,65 @@
+# Testing — ovirt-engine-health
+
+This bundle talks to a live oVirt engine REST API, so testing means pointing it
+at a reachable engine. There is no cloud infrastructure to provision (and no
+Terraform), because oVirt is self-hosted.
+
+## What you need
+
+A reachable oVirt engine. Any of these works:
+- An existing oVirt / RHV / OLVM engine you have read access to.
+- A lab/self-hosted-engine deployment.
+- The upstream `ovirt-engine` appliance for a throwaway environment.
+
+A read-only user with the auth profile is sufficient (e.g. a user in the
+`@internal` profile with `UserRole` / `ReadOnlyAdmin`).
+
+## Configure
+
+Create a `.test/.env` file (gitignored) or export the variables:
+
+```bash
+OVIRT_ENGINE_URL=https://engine.example.com
+OVIRT_USERNAME=admin@internal
+OVIRT_PASSWORD=changeme
+# Optional:
+OVIRT_CA_CERT_FILE=/path/to/engine-ca.pem   # for TLS verification
+OVIRT_STORAGE_FREE_PCT=10
+OVIRT_EVENT_LOOKBACK=1h
+OVIRT_SNAPSHOT_MAX_AGE=7d
+MAX_PAUSED_VMS=0
+OVIRT_ENGINE_NAME=lab-ovirt
+```
+
+> Fetch the engine CA with:
+> `curl -sk https://engine.example.com/ovirt-engine/services/pki-resource?resource=ca-certificate&format=X509-PEM-CA -o engine-ca.pem`
+
+## Run
+
+```bash
+task check-config     # validate required env vars
+task smoke-scripts    # run the raw check scripts and print their JSON
+task run-sli          # run sli.robot (pushes the composite health score)
+task run-runbook      # run runbook.robot (raises issues + writes a report)
+task                  # check-config + run-sli + run-runbook
+task clean            # remove robot output dirs
+```
+
+`smoke-scripts` is the fastest way to confirm connectivity and that the engine's
+JSON shape matches what the scripts expect, without Robot Framework.
+
+## No engine handy? Use the mock
+
+`mock/` contains a dependency-free mock oVirt engine so you can exercise the
+full bundle flow with no real engine and no cloud cost:
+
+```bash
+task test-mock                        # start mock, run all check scripts, tear down
+task test-mock MOCK_SCENARIO=healthy  # nominal data (SLI score == 1, no issues)
+task mock                             # run mock in the foreground on :8080
+task run-sli-mock                     # run sli.robot against the mock (needs RW libs)
+```
+
+See `mock/README.md` for details and the scenarios it ships. The mock validates
+the bundle's wiring and parsing against the documented v4 API shape; it does not
+replace a one-time check against a real engine.
@@ -0,0 +1,152 @@
+version: "3"
+
+# Lightweight test harness for the ovirt-engine-health CodeBundle.
+#
+# Unlike the cloud bundles, there is no infrastructure to provision here: point
+# the tasks at a reachable oVirt engine (a real engine, a lab, or the upstream
+# ovirt-engine appliance) via the environment variables below, then run the
+# robots locally.
+#
+# Required env:
+#   OVIRT_ENGINE_URL   e.g. https://engine.example.com   (no trailing /ovirt-engine)
+#   OVIRT_USERNAME     e.g. admin@internal
+#   OVIRT_PASSWORD
+# Optional env:
+#   OVIRT_CA_CERT_FILE path to a PEM CA bundle for TLS verification
+#   OVIRT_STORAGE_FREE_PCT, OVIRT_EVENT_LOOKBACK, OVIRT_SNAPSHOT_MAX_AGE,
+#   MAX_PAUSED_VMS, OVIRT_ENGINE_NAME
+
+tasks:
+  default:
+    desc: "Run both the SLI and the runbook against the configured engine."
+    cmds:
+      - task: check-config
+      - task: run-sli
+      - task: run-runbook
+
+  check-config:
+    desc: "Verify the required environment variables are set."
+    cmds:
+      - |
+        missing=()
+        [ -z "${OVIRT_ENGINE_URL}" ] && missing+=("OVIRT_ENGINE_URL")
+        [ -z "${OVIRT_USERNAME}" ]   && missing+=("OVIRT_USERNAME")
+        [ -z "${OVIRT_PASSWORD}" ]   && missing+=("OVIRT_PASSWORD")
+        if [ ${#missing[@]} -ne 0 ]; then
+          echo "Missing required environment variables: ${missing[*]}"
+          exit 1
+        fi
+        echo "Configuration looks good for ${OVIRT_ENGINE_URL}"
+    silent: true
+
+  smoke-scripts:
+    desc: "Run the raw check scripts directly and print their JSON (no Robot Framework)."
+    dotenv: ['.env']
+    cmds:
+      - task: check-config
+      - |
+        export OVIRT_CA_CERT="$( [ -n "${OVIRT_CA_CERT_FILE}" ] && cat "${OVIRT_CA_CERT_FILE}" || true )"
+        echo "== engine_health ==";   ../engine_health.sh   | jq .
+        echo "== host_status ==";     ../host_status.sh     | jq '{total, unhealthy_hosts}'
+        echo "== vm_status ==";       ../vm_status.sh        | jq '{total, problem_vms}'
+        echo "== storage_domains =="; ../storage_domains.sh "${OVIRT_STORAGE_FREE_PCT:-10}" | jq '{problem_domains}'
+        echo "== cluster_health =="; ../cluster_health.sh   | jq '{problem_clusters}'
+        echo "== recent_events ==";   ../recent_events.sh "${OVIRT_EVENT_LOOKBACK:-1h}" | jq '{critical_events: (.critical_events|length)}'
+        echo "== stale_snapshots =="; ../stale_snapshots.sh "${OVIRT_SNAPSHOT_MAX_AGE:-7d}" | jq '{stale: (.stale_snapshots|length)}'
+    silent: true
+
+  run-sli:
+    desc: "Run sli.robot against the configured engine."
+    dotenv: ['.env']
+    cmds:
+      - task: check-config
+      - |
+        export OVIRT_CA_CERT="$( [ -n "${OVIRT_CA_CERT_FILE}" ] && cat "${OVIRT_CA_CERT_FILE}" || true )"
+        robot \
+          --variable OVIRT_ENGINE_URL:"${OVIRT_ENGINE_URL}" \
+          --variable OVIRT_USERNAME:"${OVIRT_USERNAME}" \
+          --variable OVIRT_PASSWORD:"${OVIRT_PASSWORD}" \
+          --variable OVIRT_CA_CERT:"${OVIRT_CA_CERT}" \
+          --variable OVIRT_STORAGE_FREE_PCT:"${OVIRT_STORAGE_FREE_PCT:-10}" \
+          --variable OVIRT_EVENT_LOOKBACK:"${OVIRT_EVENT_LOOKBACK:-1h}" \
+          --variable OVIRT_SNAPSHOT_MAX_AGE:"${OVIRT_SNAPSHOT_MAX_AGE:-7d}" \
+          --variable MAX_PAUSED_VMS:"${MAX_PAUSED_VMS:-0}" \
+          --variable OVIRT_ENGINE_NAME:"${OVIRT_ENGINE_NAME:-ovirt-engine}" \
+          --outputdir output-sli \
+          ../sli.robot
+    silent: true
+
+  run-runbook:
+    desc: "Run runbook.robot against the configured engine."
+    dotenv: ['.env']
+    cmds:
+      - task: check-config
+      - |
+        export OVIRT_CA_CERT="$( [ -n "${OVIRT_CA_CERT_FILE}" ] && cat "${OVIRT_CA_CERT_FILE}" || true )"
+        robot \
+          --variable OVIRT_ENGINE_URL:"${OVIRT_ENGINE_URL}" \
+          --variable OVIRT_USERNAME:"${OVIRT_USERNAME}" \
+          --variable OVIRT_PASSWORD:"${OVIRT_PASSWORD}" \
+          --variable OVIRT_CA_CERT:"${OVIRT_CA_CERT}" \
+          --variable OVIRT_STORAGE_FREE_PCT:"${OVIRT_STORAGE_FREE_PCT:-10}" \
+          --variable OVIRT_EVENT_LOOKBACK:"${OVIRT_EVENT_LOOKBACK:-1h}" \
+          --variable OVIRT_SNAPSHOT_MAX_AGE:"${OVIRT_SNAPSHOT_MAX_AGE:-7d}" \
+          --variable MAX_PAUSED_VMS:"${MAX_PAUSED_VMS:-0}" \
+          --variable OVIRT_ENGINE_NAME:"${OVIRT_ENGINE_NAME:-ovirt-engine}" \
+          --outputdir output-runbook \
+          ../runbook.robot
+    silent: true
+
+  mock:
+    desc: "Run the mock oVirt engine in the foreground (Ctrl-C to stop). Set MOCK_SCENARIO=healthy|unhealthy."
+    cmds:
+      - |
+        MOCK_SCENARIO="${MOCK_SCENARIO:-unhealthy}" MOCK_PORT="${MOCK_PORT:-8080}" python3 mock/ovirt_mock.py
+
+  test-mock:
+    desc: "Start the mock, run every check script against it, then tear it down. Set MOCK_SCENARIO=healthy|unhealthy."
+    cmds:
+      - |
+        SCENARIO="${MOCK_SCENARIO:-unhealthy}"
+        PORT="${MOCK_PORT:-8088}"
+        MOCK_SCENARIO="$SCENARIO" MOCK_PORT="$PORT" python3 mock/ovirt_mock.py &
+        MOCK_PID=$!
+        trap 'kill $MOCK_PID 2>/dev/null' EXIT
+        sleep 1
+        export OVIRT_ENGINE_URL="http://localhost:${PORT}"
+        export OVIRT_USERNAME="admin@internal"
+        export OVIRT_PASSWORD="mock"
+        echo "===== scenario: ${SCENARIO} ====="
+        echo "== engine_health ==";   ../engine_health.sh   | jq .
+        echo "== host_status ==";     ../host_status.sh     | jq '{total, unhealthy_hosts, maintenance_hosts}'
+        echo "== vm_status ==";       ../vm_status.sh        | jq '{total, problem_vms}'
+        echo "== storage_domains =="; ../storage_domains.sh "${OVIRT_STORAGE_FREE_PCT:-10}" | jq '{problem_domains}'
+        echo "== cluster_health =="; ../cluster_health.sh   | jq '{problem_clusters}'
+        echo "== recent_events ==";   ../recent_events.sh "${OVIRT_EVENT_LOOKBACK:-1h}" | jq '{critical_events}'
+        echo "== stale_snapshots =="; ../stale_snapshots.sh "${OVIRT_SNAPSHOT_MAX_AGE:-7d}" | jq '{stale_snapshots}'
+    silent: true
+
+  run-sli-mock:
+    desc: "Run sli.robot against the mock engine (requires Robot Framework + RW libraries)."
+    cmds:
+      - |
+        SCENARIO="${MOCK_SCENARIO:-unhealthy}"
+        PORT="${MOCK_PORT:-8088}"
+        MOCK_SCENARIO="$SCENARIO" MOCK_PORT="$PORT" python3 mock/ovirt_mock.py &
+        MOCK_PID=$!
+        trap 'kill $MOCK_PID 2>/dev/null' EXIT
+        sleep 1
+        robot \
+          --variable OVIRT_ENGINE_URL:"http://localhost:${PORT}" \
+          --variable OVIRT_USERNAME:"admin@internal" \
+          --variable OVIRT_PASSWORD:"mock" \
+          --variable OVIRT_CA_CERT:"" \
+          --outputdir output-sli \
+          ../sli.robot
+    silent: true
+
+  clean:
+    desc: "Remove robot output directories."
+    cmds:
+      - rm -rf output-sli output-runbook
+    silent: true
@@ -0,0 +1,7 @@
+FROM python:3.12-slim
+WORKDIR /app
+COPY ovirt_mock.py .
+ENV MOCK_SCENARIO=unhealthy
+ENV MOCK_PORT=8080
+EXPOSE 8080
+CMD ["python3", "ovirt_mock.py"]