|
| 1 | +#!/bin/bash |
| 2 | +# comparison_node_isolation/run.sh — Compare II between namespace and node isolation. |
| 3 | +# |
| 4 | +# Usage: ./run.sh [duration_seconds] [concurrency] |
| 5 | +# Example: ./run.sh 60 50 |
| 6 | +# |
| 7 | +# Runs two sequential arms: |
| 8 | +# Arm A — namespace-only isolation (aggressor co-located on same node) |
| 9 | +# Arm B — node-level isolation (aggressor on a separate node via nodeSelector) |
| 10 | +# |
| 11 | +# Requires a 2-node cluster for Arm B to be meaningful. |
| 12 | +# On a 1-node cluster, Arm B produces a WARNING and is skipped. |
| 13 | +# |
| 14 | +# Answers: RQ5 — Is namespace isolation sufficient vs dedicated node isolation? |
| 15 | + |
| 16 | +set -euo pipefail |
| 17 | + |
| 18 | +DURATION=${1:-60} |
| 19 | +CONCURRENCY=${2:-50} |
| 20 | +SEED=42 |
| 21 | +CHART="$(cd "$(dirname "$0")/../.." && pwd)/helm-charts/saas-app" |
| 22 | +DIR="$(cd "$(dirname "$0")" && pwd)" |
| 23 | +PORT_A=19006 |
| 24 | +PORT_B=19007 |
| 25 | +TIMESTAMP=$(date +%Y%m%d_%H%M%S) |
| 26 | +RESULTS_DIR="$DIR/results/${TIMESTAMP}" |
| 27 | +PF_PID_A="" |
| 28 | +PF_PID_B="" |
| 29 | + |
| 30 | +GREEN='\033[0;32m'; YELLOW='\033[1;33m'; RED='\033[0;31m'; NC='\033[0m' |
| 31 | +log() { echo -e "${GREEN}[$(date +%H:%M:%S)]${NC} $*"; } |
| 32 | +warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } |
| 33 | +fail() { echo -e "${RED}[FAIL]${NC} $*"; exit 1; } |
| 34 | + |
| 35 | +check_prereqs() { |
| 36 | + for cmd in kubectl helm hey jq; do |
| 37 | + command -v "$cmd" &>/dev/null || fail "Missing prerequisite: $cmd" |
| 38 | + done |
| 39 | +} |
| 40 | + |
| 41 | +check_node_count() { |
| 42 | + NODE_COUNT=$(kubectl get nodes --no-headers 2>/dev/null | wc -l | tr -d ' ') |
| 43 | + if [[ "$NODE_COUNT" -lt 2 ]]; then |
| 44 | + warn "Only $NODE_COUNT node found. Arm B (node isolation) requires 2+ nodes." |
| 45 | + warn "Arm B will be SKIPPED. To run it, use a multi-node cluster and label nodes:" |
| 46 | + warn " kubectl label node <victim-node> isolation-role=victim" |
| 47 | + warn " kubectl label node <aggressor-node> isolation-role=aggressor" |
| 48 | + TWO_NODE=false |
| 49 | + else |
| 50 | + TWO_NODE=true |
| 51 | + log "Found $NODE_COUNT nodes — Arm B will run." |
| 52 | + fi |
| 53 | +} |
| 54 | + |
| 55 | +run_arm() { |
| 56 | + local arm="$1" # "A" or "B" |
| 57 | + local ns="$2" |
| 58 | + local release="$3" |
| 59 | + local values="$4" |
| 60 | + local aggressor_name="$5" |
| 61 | + local aggressor_ns="$6" |
| 62 | + local port="$7" |
| 63 | + local pf_pid_var="$8" |
| 64 | + |
| 65 | + log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" |
| 66 | + log "Arm ${arm}: deploying to namespace $ns" |
| 67 | + log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" |
| 68 | + |
| 69 | + kubectl create namespace "$ns" --dry-run=client -o yaml | kubectl apply -f - |
| 70 | + helm upgrade --install "$release" "$CHART" \ |
| 71 | + --namespace "$ns" --values "$values" --wait --timeout=120s |
| 72 | + |
| 73 | + log "Arm ${arm}: deploying aggressor $aggressor_name in $aggressor_ns..." |
| 74 | + kubectl create namespace "$aggressor_ns" --dry-run=client -o yaml | kubectl apply -f - 2>/dev/null || true |
| 75 | + kubectl apply -f "$DIR/stress-profile.yaml" \ |
| 76 | + --dry-run=client -o json \ |
| 77 | + | jq --arg name "$aggressor_name" --arg ns "$aggressor_ns" \ |
| 78 | + '.metadata.name = $name | .metadata.namespace = $ns' \ |
| 79 | + | kubectl apply -f - |
| 80 | + |
| 81 | + kubectl rollout status "deployment/$aggressor_name" -n "$aggressor_ns" --timeout=60s |
| 82 | + sleep 10 # let aggressor reach steady state |
| 83 | + |
| 84 | + local svc |
| 85 | + svc=$(kubectl get svc -n "$ns" \ |
| 86 | + -l "app.kubernetes.io/component=api-service" \ |
| 87 | + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "${release}-api") |
| 88 | + |
| 89 | + log "Arm ${arm}: port-forwarding $svc -> localhost:${port}..." |
| 90 | + kubectl port-forward -n "$ns" "svc/${svc}" "${port}:3002" & |
| 91 | + eval "$pf_pid_var=$!" |
| 92 | + sleep 3 |
| 93 | + |
| 94 | + log "Arm ${arm}: warm-up 10s..." |
| 95 | + hey -z 10s -c 10 "http://localhost:${port}/health" > /dev/null 2>&1 || true |
| 96 | + sleep 2 |
| 97 | + |
| 98 | + log "Arm ${arm}: load test ${DURATION}s @ concurrency=${CONCURRENCY}..." |
| 99 | + RAND_SEED=$SEED hey \ |
| 100 | + -z "${DURATION}s" \ |
| 101 | + -c "$CONCURRENCY" \ |
| 102 | + -m GET \ |
| 103 | + "http://localhost:${port}/health" \ |
| 104 | + > "$RESULTS_DIR/arm_${arm}_hey.txt" 2>&1 |
| 105 | + |
| 106 | + eval "kill \${$pf_pid_var} 2>/dev/null" || true |
| 107 | + |
| 108 | + # Parse |
| 109 | + local f="$RESULTS_DIR/arm_${arm}_hey.txt" |
| 110 | + local rps p50 p95 p99 |
| 111 | + rps=$(awk '/Requests\/sec:/{printf "%.2f", $2}' "$f") |
| 112 | + p50=$(awk '/50% in/{printf "%.3f", $3 * 1000}' "$f") |
| 113 | + p95=$(awk '/95% in/{printf "%.3f", $3 * 1000}' "$f") |
| 114 | + p99=$(awk '/99% in/{printf "%.3f", $3 * 1000}' "$f") |
| 115 | + |
| 116 | + echo "$rps $p50 $p95 $p99" |
| 117 | +} |
| 118 | + |
| 119 | +cleanup_arm() { |
| 120 | + local release="$1" ns="$2" aggressor="$3" aggressor_ns="$4" |
| 121 | + helm uninstall "$release" -n "$ns" 2>/dev/null || true |
| 122 | + kubectl delete deployment "$aggressor" -n "$aggressor_ns" 2>/dev/null || true |
| 123 | + kubectl delete namespace "$ns" --wait=false 2>/dev/null || true |
| 124 | + [[ "$aggressor_ns" != "$ns" ]] && \ |
| 125 | + kubectl delete namespace "$aggressor_ns" --wait=false 2>/dev/null || true |
| 126 | +} |
| 127 | + |
| 128 | +cleanup() { |
| 129 | + [[ -n "$PF_PID_A" ]] && kill "$PF_PID_A" 2>/dev/null || true |
| 130 | + [[ -n "$PF_PID_B" ]] && kill "$PF_PID_B" 2>/dev/null || true |
| 131 | + cleanup_arm "bench-ns-iso" "bench-ns-iso" "cpu-aggressor-colocated" "bench-ns-iso" |
| 132 | + cleanup_arm "bench-node-iso" "bench-node-iso" "cpu-aggressor-isolated" "bench-node-iso" |
| 133 | +} |
| 134 | + |
| 135 | +# ── Main ────────────────────────────────────────────────────────────────────── |
| 136 | +mkdir -p "$RESULTS_DIR" |
| 137 | +trap cleanup EXIT |
| 138 | + |
| 139 | +check_prereqs |
| 140 | +check_node_count |
| 141 | + |
| 142 | +# Arm A — namespace isolation |
| 143 | +read -r RPS_A P50_A P95_A P99_A <<< "$(run_arm A bench-ns-iso bench-ns-iso \ |
| 144 | + "$DIR/workload-namespace.yaml" cpu-aggressor-colocated bench-ns-iso $PORT_A PF_PID_A)" |
| 145 | + |
| 146 | +# Arm B — node isolation (skip on single-node) |
| 147 | +if [[ "$TWO_NODE" == true ]]; then |
| 148 | + read -r RPS_B P50_B P95_B P99_B <<< "$(run_arm B bench-node-iso bench-node-iso \ |
| 149 | + "$DIR/workload-node.yaml" cpu-aggressor-isolated bench-node-iso $PORT_B PF_PID_B)" |
| 150 | +else |
| 151 | + RPS_B="null"; P50_B="null"; P95_B="null"; P99_B="null" |
| 152 | + warn "Arm B skipped (single-node cluster)" |
| 153 | +fi |
| 154 | + |
| 155 | +# Compute II for each arm (using Arm A as the baseline reference) |
| 156 | +II_A=$(awk -v b="$P95_A" -v s="$P95_A" 'BEGIN{print "0.0000"}') # Arm A IS the stressed case |
| 157 | +II_B="null" |
| 158 | +if [[ "$TWO_NODE" == true && "$P95_A" != "null" && "$P95_B" != "null" ]]; then |
| 159 | + II_B=$(awk -v base="$P95_B" -v stress="$P95_A" \ |
| 160 | + 'BEGIN{if(base>0) printf "%.4f",(stress-base)/base; else print "null"}') |
| 161 | +fi |
| 162 | + |
| 163 | +k8s_ver=$(kubectl version -o json 2>/dev/null | jq -r '.serverVersion.gitVersion' 2>/dev/null || echo "unknown") |
| 164 | + |
| 165 | +jq -n \ |
| 166 | + --arg exp "comparison_node_isolation" \ |
| 167 | + --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ |
| 168 | + --arg k8sv "$k8s_ver" \ |
| 169 | + --arg nodespec "${NODE_SPEC:-unknown}" \ |
| 170 | + --arg cni "${CNI:-unknown}" \ |
| 171 | + --argjson dur "$DURATION" \ |
| 172 | + --argjson conc "$CONCURRENCY" \ |
| 173 | + --argjson seed "$SEED" \ |
| 174 | + --argjson rps_a "${RPS_A:-0}" \ |
| 175 | + --argjson p95_a "${P95_A:-0}" \ |
| 176 | + --argjson p99_a "${P99_A:-0}" \ |
| 177 | + --argjson rps_b "${RPS_B:-null}" \ |
| 178 | + --argjson p95_b "${P95_B:-null}" \ |
| 179 | + --argjson p99_b "${P99_B:-null}" \ |
| 180 | + --argjson ii_b "${II_B:-null}" \ |
| 181 | + --arg two_node "$TWO_NODE" \ |
| 182 | +'{ |
| 183 | + experiment: $exp, |
| 184 | + timestamp: $ts, |
| 185 | + cluster: { k8s_version: $k8sv, node_spec: $nodespec, cni: $cni }, |
| 186 | + workload: { duration_s: $dur, concurrency: $conc, seed: $seed }, |
| 187 | + arm_A_namespace_isolation: { |
| 188 | + description: "Victim and aggressor share the same node; namespace ResourceQuota only", |
| 189 | + throughput_rps: $rps_a, |
| 190 | + p95_ms: $p95_a, |
| 191 | + p99_ms: $p99_a, |
| 192 | + interference_index: "reference (stressed arm)" |
| 193 | + }, |
| 194 | + arm_B_node_isolation: { |
| 195 | + description: "Victim on dedicated node; aggressor on separate node", |
| 196 | + two_node_cluster: ($two_node == "true"), |
| 197 | + throughput_rps: $rps_b, |
| 198 | + p95_ms: $p95_b, |
| 199 | + p99_ms: $p99_b, |
| 200 | + interference_index_vs_arm_A: $ii_b |
| 201 | + }, |
| 202 | + metrics: { |
| 203 | + interference_index: $ii_b, |
| 204 | + resource_fairness_deviation: null, |
| 205 | + autoscaling_stability_score: null |
| 206 | + } |
| 207 | +}' > "$RESULTS_DIR/results.json" |
| 208 | + |
| 209 | +log "Results saved: $RESULTS_DIR/results.json" |
| 210 | +cat "$RESULTS_DIR/results.json" |
| 211 | +log "Comparison experiment complete." |
0 commit comments