|
| 1 | +#!/bin/bash |
| 2 | +# Shared helpers for Charmed Apache Spark tutorial spread tests. |
| 3 | +# |
| 4 | +# Source this file at the top of every task execute/prepare block: |
| 5 | +# . "$SPREAD_PATH/python/tests/tutorial/helpers.sh" |
| 6 | + |
| 7 | +# Spread SSHs in as root but does not always set HOME=/root, which causes the |
| 8 | +# Juju client to fail looking up its config in $HOME/.local/share/juju. |
| 9 | +export HOME=/root |
| 10 | + |
| 11 | +# --------------------------------------------------------------------------- |
| 12 | +# wait_idle – poll until every Juju unit in the model is active/idle. |
| 13 | +# |
| 14 | +# Usage: |
| 15 | +# wait_idle [--timeout SECONDS] [--interval SECONDS] |
| 16 | +# [--allow-blocked APP1,APP2,...] |
| 17 | +# |
| 18 | +# Defaults: |
| 19 | +# --timeout 600 (10 minutes) |
| 20 | +# --interval 30 (check every 30 seconds) |
| 21 | +# |
| 22 | +# --allow-blocked accepts a comma-separated list of application names that |
| 23 | +# are expected to be in blocked/idle state (e.g. data-integrator without a |
| 24 | +# relation). Units belonging to those apps are treated as settled when they |
| 25 | +# are blocked/idle. All other units must still be active/idle. |
| 26 | +# |
| 27 | +# Progress output (one line per poll interval): |
| 28 | +# "still provisioning" – juju status returned no units yet |
| 29 | +# "N unit(s) not yet active/idle" – units exist but are still settling |
| 30 | +# "All units active/idle" – success, final juju status is printed |
| 31 | +# "Timed out after Xs" – timeout reached, final juju status is printed |
| 32 | +# |
| 33 | +# Returns 0 when all units are active/idle, 1 on timeout. |
| 34 | +# --------------------------------------------------------------------------- |
| 35 | +wait_idle() { |
| 36 | + local timeout=600 |
| 37 | + local interval=30 |
| 38 | + local allow_blocked="" |
| 39 | + |
| 40 | + # Parse named options, consuming two tokens per flag (name + value). |
| 41 | + while [[ $# -gt 0 ]]; do |
| 42 | + case "$1" in |
| 43 | + --timeout) timeout="$2"; shift 2 ;; |
| 44 | + --interval) interval="$2"; shift 2 ;; |
| 45 | + --allow-blocked) allow_blocked="$2"; shift 2 ;; |
| 46 | + *) echo "wait_idle: unknown option: $1" >&2; return 1 ;; |
| 47 | + esac |
| 48 | + done |
| 49 | + |
| 50 | + local elapsed=0 |
| 51 | + echo "Waiting for all Juju units to be active/idle (timeout=${timeout}s, poll=${interval}s)…" |
| 52 | + |
| 53 | + while [[ "$elapsed" -lt "$timeout" ]]; do |
| 54 | + local not_ready |
| 55 | + # Run the poll pipeline with pipefail disabled so a non-zero exit from |
| 56 | + # "juju status" (common while machines are still provisioning) does not |
| 57 | + # abort a calling script that has set -euo pipefail active. |
| 58 | + not_ready=$( |
| 59 | + set +o pipefail |
| 60 | + export ALLOW_BLOCKED="$allow_blocked" |
| 61 | + juju status --format=json 2>/dev/null | python3 -c ' |
| 62 | +import json, sys, os |
| 63 | +try: |
| 64 | + data = json.load(sys.stdin) |
| 65 | + allowed = set(os.environ.get("ALLOW_BLOCKED", "").split(",")) - {""} |
| 66 | + not_ready = 0 |
| 67 | + total_units = 0 |
| 68 | + for app_name, app in data.get("applications", {}).items(): |
| 69 | + for unit in app.get("units", {}).values(): |
| 70 | + total_units += 1 |
| 71 | + ws = unit.get("workload-status", {}).get("current", "") |
| 72 | + js = unit.get("juju-status", {}).get("current", "") |
| 73 | + if ws == "active" and js == "idle": |
| 74 | + continue |
| 75 | + if ws == "blocked" and js == "idle" and app_name in allowed: |
| 76 | + continue |
| 77 | + not_ready += 1 |
| 78 | + if total_units == 0: |
| 79 | + print("provisioning") |
| 80 | + else: |
| 81 | + print(not_ready) |
| 82 | +except Exception: |
| 83 | + print("provisioning") |
| 84 | +' |
| 85 | + ) || not_ready="provisioning" |
| 86 | + |
| 87 | + if [[ "$not_ready" == "0" ]]; then |
| 88 | + echo "All units active/idle after ${elapsed}s." |
| 89 | + juju status |
| 90 | + return 0 |
| 91 | + elif [[ "$not_ready" == "provisioning" ]]; then |
| 92 | + echo "[${elapsed}s elapsed] still provisioning – rechecking in ${interval}s…" |
| 93 | + else |
| 94 | + echo "[${elapsed}s elapsed] ${not_ready} unit(s) not yet active/idle – rechecking in ${interval}s…" |
| 95 | + fi |
| 96 | + sleep "$interval" |
| 97 | + elapsed=$(( elapsed + interval )) |
| 98 | + done |
| 99 | + |
| 100 | + echo "Timed out after ${timeout}s. Final status:" |
| 101 | + juju status |
| 102 | + return 1 |
| 103 | +} |
0 commit comments