Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# oVirt is not itself a RunWhen-discoverable platform, so this rule anchors on the
# Kubernetes `cluster` resource purely as a generation trigger: it emits one
# oVirt engine-health SLX per discovered cluster. All SLX/SLI/runbook content
# comes from the workspaceInfo `custom.*` values (ovirt_engine_url, ovirt_engine_name)
# and workspace secrets (ovirt_username, ovirt_password, optional ovirt_ca_cert),
# not from the matched cluster.
#
# Requires the workspace to discover at least one Kubernetes cluster. If multiple
# clusters are discovered, one oVirt SLX is generated per cluster.
apiVersion: runwhen.com/v1
kind: GenerationRules
spec:
platform: kubernetes
generationRules:
- resourceTypes:
- cluster
matchRules:
- type: and
matches:
- type: pattern
pattern: ".+"
properties: [name]
mode: substring
slxs:
- baseName: ovirt-engine-health
qualifiers: ["cluster"]
baseTemplateName: ovirt-engine-health
levelOfDetail: detailed
outputItems:
- type: slx
- type: sli
- type: runbook
templateName: ovirt-engine-health-taskset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelIndicator
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
displayUnitsLong: OK
displayUnitsShort: ok
locations:
- {{default_location}}
description: The composite health score of the oVirt engine environment.
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/ovirt-engine-health/sli.robot
intervalStrategy: intermezzo
intervalSeconds: 600
configProvided:
- name: OVIRT_ENGINE_URL
value: {{custom.ovirt_engine_url}}
secretsProvided:
- name: OVIRT_USERNAME
workspaceKey: {{custom.ovirt_username}}
- name: OVIRT_PASSWORD
workspaceKey: {{custom.ovirt_password}}
{% if custom.ovirt_ca_cert %}
- name: OVIRT_CA_CERT
workspaceKey: {{custom.ovirt_ca_cert}}
{% endif %}
alertConfig:
tasks:
persona: eager-edgar
sessionTTL: 10m
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: runwhen.com/v1
kind: ServiceLevelX
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/ovirt.svg
alias: {{custom.ovirt_engine_name}} oVirt Engine Health
asMeasuredBy: The composite health score of the oVirt engine (hosts, VMs, storage, clusters, events, snapshots).
configProvided:
- name: OVIRT_ENGINE_URL
value: {{custom.ovirt_engine_url}}
owners:
- {{workspace.owner_email}}
statement: The oVirt engine and its hosts, VMs, and storage domains should be healthy.
additionalContext: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apiVersion: runwhen.com/v1
kind: Runbook
metadata:
name: {{slx_name}}
labels:
{% include "common-labels.yaml" %}
annotations:
{% include "common-annotations.yaml" %}
spec:
location: {{default_location}}
codeBundle:
{% if repo_url %}
repoUrl: {{repo_url}}
{% else %}
repoUrl: https://github.com/runwhen-contrib/rw-cli-codecollection.git
{% endif %}
{% if ref %}
ref: {{ref}}
{% else %}
ref: main
{% endif %}
pathToRobot: codebundles/ovirt-engine-health/runbook.robot
configProvided:
- name: OVIRT_ENGINE_URL
value: {{custom.ovirt_engine_url}}
secretsProvided:
- name: OVIRT_USERNAME
workspaceKey: {{custom.ovirt_username}}
- name: OVIRT_PASSWORD
workspaceKey: {{custom.ovirt_password}}
{% if custom.ovirt_ca_cert %}
- name: OVIRT_CA_CERT
workspaceKey: {{custom.ovirt_ca_cert}}
{% endif %}
4 changes: 4 additions & 0 deletions codebundles/ovirt-engine-health/.test/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.env
output-sli/
output-runbook/
*.pem
65 changes: 65 additions & 0 deletions codebundles/ovirt-engine-health/.test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Testing — ovirt-engine-health

This bundle talks to a live oVirt engine REST API, so testing means pointing it
at a reachable engine. There is no cloud infrastructure to provision (and no
Terraform), because oVirt is self-hosted.

## What you need

A reachable oVirt engine. Any of these works:
- An existing oVirt / RHV / OLVM engine you have read access to.
- A lab/self-hosted-engine deployment.
- The upstream `ovirt-engine` appliance for a throwaway environment.

A read-only user with the auth profile is sufficient (e.g. a user in the
`@internal` profile with `UserRole` / `ReadOnlyAdmin`).

## Configure

Create a `.test/.env` file (gitignored) or export the variables:

```bash
OVIRT_ENGINE_URL=https://engine.example.com
OVIRT_USERNAME=admin@internal
OVIRT_PASSWORD=changeme
# Optional:
OVIRT_CA_CERT_FILE=/path/to/engine-ca.pem # for TLS verification
OVIRT_STORAGE_FREE_PCT=10
OVIRT_EVENT_LOOKBACK=1h
OVIRT_SNAPSHOT_MAX_AGE=7d
MAX_PAUSED_VMS=0
OVIRT_ENGINE_NAME=lab-ovirt
```

> Fetch the engine CA with:
> `curl -sk https://engine.example.com/ovirt-engine/services/pki-resource?resource=ca-certificate&format=X509-PEM-CA -o engine-ca.pem`

## Run

```bash
task check-config # validate required env vars
task smoke-scripts # run the raw check scripts and print their JSON
task run-sli # run sli.robot (pushes the composite health score)
task run-runbook # run runbook.robot (raises issues + writes a report)
task # check-config + run-sli + run-runbook
task clean # remove robot output dirs
```

`smoke-scripts` is the fastest way to confirm connectivity and that the engine's
JSON shape matches what the scripts expect, without Robot Framework.

## No engine handy? Use the mock

`mock/` contains a dependency-free mock oVirt engine so you can exercise the
full bundle flow with no real engine and no cloud cost:

```bash
task test-mock # start mock, run all check scripts, tear down
task test-mock MOCK_SCENARIO=healthy # nominal data (SLI score == 1, no issues)
task mock # run mock in the foreground on :8080
task run-sli-mock # run sli.robot against the mock (needs RW libs)
```

See `mock/README.md` for details and the scenarios it ships. The mock validates
the bundle's wiring and parsing against the documented v4 API shape; it does not
replace a one-time check against a real engine.
152 changes: 152 additions & 0 deletions codebundles/ovirt-engine-health/.test/Taskfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
version: "3"

# Lightweight test harness for the ovirt-engine-health CodeBundle.
#
# Unlike the cloud bundles, there is no infrastructure to provision here: point
# the tasks at a reachable oVirt engine (a real engine, a lab, or the upstream
# ovirt-engine appliance) via the environment variables below, then run the
# robots locally.
#
# Required env:
# OVIRT_ENGINE_URL e.g. https://engine.example.com (no trailing /ovirt-engine)
# OVIRT_USERNAME e.g. admin@internal
# OVIRT_PASSWORD
# Optional env:
# OVIRT_CA_CERT_FILE path to a PEM CA bundle for TLS verification
# OVIRT_STORAGE_FREE_PCT, OVIRT_EVENT_LOOKBACK, OVIRT_SNAPSHOT_MAX_AGE,
# MAX_PAUSED_VMS, OVIRT_ENGINE_NAME

tasks:
default:
desc: "Run both the SLI and the runbook against the configured engine."
cmds:
- task: check-config
- task: run-sli
- task: run-runbook

check-config:
desc: "Verify the required environment variables are set."
cmds:
- |
missing=()
[ -z "${OVIRT_ENGINE_URL}" ] && missing+=("OVIRT_ENGINE_URL")
[ -z "${OVIRT_USERNAME}" ] && missing+=("OVIRT_USERNAME")
[ -z "${OVIRT_PASSWORD}" ] && missing+=("OVIRT_PASSWORD")
if [ ${#missing[@]} -ne 0 ]; then
echo "Missing required environment variables: ${missing[*]}"
exit 1
fi
echo "Configuration looks good for ${OVIRT_ENGINE_URL}"
silent: true

smoke-scripts:
desc: "Run the raw check scripts directly and print their JSON (no Robot Framework)."
dotenv: ['.env']
cmds:
- task: check-config
- |
export OVIRT_CA_CERT="$( [ -n "${OVIRT_CA_CERT_FILE}" ] && cat "${OVIRT_CA_CERT_FILE}" || true )"
echo "== engine_health =="; ../engine_health.sh | jq .
echo "== host_status =="; ../host_status.sh | jq '{total, unhealthy_hosts}'
echo "== vm_status =="; ../vm_status.sh | jq '{total, problem_vms}'
echo "== storage_domains =="; ../storage_domains.sh "${OVIRT_STORAGE_FREE_PCT:-10}" | jq '{problem_domains}'
echo "== cluster_health =="; ../cluster_health.sh | jq '{problem_clusters}'
echo "== recent_events =="; ../recent_events.sh "${OVIRT_EVENT_LOOKBACK:-1h}" | jq '{critical_events: (.critical_events|length)}'
echo "== stale_snapshots =="; ../stale_snapshots.sh "${OVIRT_SNAPSHOT_MAX_AGE:-7d}" | jq '{stale: (.stale_snapshots|length)}'
silent: true

run-sli:
desc: "Run sli.robot against the configured engine."
dotenv: ['.env']
cmds:
- task: check-config
- |
export OVIRT_CA_CERT="$( [ -n "${OVIRT_CA_CERT_FILE}" ] && cat "${OVIRT_CA_CERT_FILE}" || true )"
robot \
--variable OVIRT_ENGINE_URL:"${OVIRT_ENGINE_URL}" \
--variable OVIRT_USERNAME:"${OVIRT_USERNAME}" \
--variable OVIRT_PASSWORD:"${OVIRT_PASSWORD}" \
--variable OVIRT_CA_CERT:"${OVIRT_CA_CERT}" \
--variable OVIRT_STORAGE_FREE_PCT:"${OVIRT_STORAGE_FREE_PCT:-10}" \
--variable OVIRT_EVENT_LOOKBACK:"${OVIRT_EVENT_LOOKBACK:-1h}" \
--variable OVIRT_SNAPSHOT_MAX_AGE:"${OVIRT_SNAPSHOT_MAX_AGE:-7d}" \
--variable MAX_PAUSED_VMS:"${MAX_PAUSED_VMS:-0}" \
--variable OVIRT_ENGINE_NAME:"${OVIRT_ENGINE_NAME:-ovirt-engine}" \
--outputdir output-sli \
../sli.robot
silent: true

run-runbook:
desc: "Run runbook.robot against the configured engine."
dotenv: ['.env']
cmds:
- task: check-config
- |
export OVIRT_CA_CERT="$( [ -n "${OVIRT_CA_CERT_FILE}" ] && cat "${OVIRT_CA_CERT_FILE}" || true )"
robot \
--variable OVIRT_ENGINE_URL:"${OVIRT_ENGINE_URL}" \
--variable OVIRT_USERNAME:"${OVIRT_USERNAME}" \
--variable OVIRT_PASSWORD:"${OVIRT_PASSWORD}" \
--variable OVIRT_CA_CERT:"${OVIRT_CA_CERT}" \
--variable OVIRT_STORAGE_FREE_PCT:"${OVIRT_STORAGE_FREE_PCT:-10}" \
--variable OVIRT_EVENT_LOOKBACK:"${OVIRT_EVENT_LOOKBACK:-1h}" \
--variable OVIRT_SNAPSHOT_MAX_AGE:"${OVIRT_SNAPSHOT_MAX_AGE:-7d}" \
--variable MAX_PAUSED_VMS:"${MAX_PAUSED_VMS:-0}" \
--variable OVIRT_ENGINE_NAME:"${OVIRT_ENGINE_NAME:-ovirt-engine}" \
--outputdir output-runbook \
../runbook.robot
silent: true

mock:
desc: "Run the mock oVirt engine in the foreground (Ctrl-C to stop). Set MOCK_SCENARIO=healthy|unhealthy."
cmds:
- |
MOCK_SCENARIO="${MOCK_SCENARIO:-unhealthy}" MOCK_PORT="${MOCK_PORT:-8080}" python3 mock/ovirt_mock.py

test-mock:
desc: "Start the mock, run every check script against it, then tear it down. Set MOCK_SCENARIO=healthy|unhealthy."
cmds:
- |
SCENARIO="${MOCK_SCENARIO:-unhealthy}"
PORT="${MOCK_PORT:-8088}"
MOCK_SCENARIO="$SCENARIO" MOCK_PORT="$PORT" python3 mock/ovirt_mock.py &
MOCK_PID=$!
trap 'kill $MOCK_PID 2>/dev/null' EXIT
sleep 1
export OVIRT_ENGINE_URL="http://localhost:${PORT}"
export OVIRT_USERNAME="admin@internal"
export OVIRT_PASSWORD="mock"
echo "===== scenario: ${SCENARIO} ====="
echo "== engine_health =="; ../engine_health.sh | jq .
echo "== host_status =="; ../host_status.sh | jq '{total, unhealthy_hosts, maintenance_hosts}'
echo "== vm_status =="; ../vm_status.sh | jq '{total, problem_vms}'
echo "== storage_domains =="; ../storage_domains.sh "${OVIRT_STORAGE_FREE_PCT:-10}" | jq '{problem_domains}'
echo "== cluster_health =="; ../cluster_health.sh | jq '{problem_clusters}'
echo "== recent_events =="; ../recent_events.sh "${OVIRT_EVENT_LOOKBACK:-1h}" | jq '{critical_events}'
echo "== stale_snapshots =="; ../stale_snapshots.sh "${OVIRT_SNAPSHOT_MAX_AGE:-7d}" | jq '{stale_snapshots}'
silent: true

run-sli-mock:
desc: "Run sli.robot against the mock engine (requires Robot Framework + RW libraries)."
cmds:
- |
SCENARIO="${MOCK_SCENARIO:-unhealthy}"
PORT="${MOCK_PORT:-8088}"
MOCK_SCENARIO="$SCENARIO" MOCK_PORT="$PORT" python3 mock/ovirt_mock.py &
MOCK_PID=$!
trap 'kill $MOCK_PID 2>/dev/null' EXIT
sleep 1
robot \
--variable OVIRT_ENGINE_URL:"http://localhost:${PORT}" \
--variable OVIRT_USERNAME:"admin@internal" \
--variable OVIRT_PASSWORD:"mock" \
--variable OVIRT_CA_CERT:"" \
--outputdir output-sli \
../sli.robot
silent: true

clean:
desc: "Remove robot output directories."
cmds:
- rm -rf output-sli output-runbook
silent: true
7 changes: 7 additions & 0 deletions codebundles/ovirt-engine-health/.test/mock/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM python:3.12-slim
WORKDIR /app
COPY ovirt_mock.py .
ENV MOCK_SCENARIO=unhealthy
ENV MOCK_PORT=8080
EXPOSE 8080
CMD ["python3", "ovirt_mock.py"]
Loading
Loading