|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Probe a worker pod's network egress and report which destinations are |
| 3 | +# reachable. Intended to be run before and after a Cilium NetworkPolicy is |
| 4 | +# applied — the table output is identical-shape so a diff highlights exactly |
| 5 | +# which targets the policy newly blocks. |
| 6 | +# |
| 7 | +# Mechanism: each probe runs as its own `kubectl debug` ephemeral container |
| 8 | +# attached to the worker pod's network namespace. Cilium endpoints cover all |
| 9 | +# containers in a pod, so the ephemeral container inherits whatever policy |
| 10 | +# applies to the worker. One probe per debug invocation pays ~3 s startup |
| 11 | +# each (image cached after first call) but keeps the bash quoting trivial — |
| 12 | +# a previous one-shot inline approach silently dropped output past ~3 probes |
| 13 | +# under ephemeral-container streaming buffers. |
| 14 | +# |
| 15 | +# Usage: |
| 16 | +# WORKER_POD=duckgres-…-worker-7664 \ |
| 17 | +# TENANT_RDS_HOST=tenant.cluster-….rds.amazonaws.com \ |
| 18 | +# TENANT_BUCKET=posthog-tenant-bucket \ |
| 19 | +# OTHER_RDS_HOST=posthog-duckgres-config-store-mw-dev.cluster-….rds.amazonaws.com \ |
| 20 | +# OTHER_WORKER_POD=duckgres-…-worker-7665 \ |
| 21 | +# ./scripts/probe_worker_egress.sh |
| 22 | +set -euo pipefail |
| 23 | + |
| 24 | +WORKER_POD="${WORKER_POD:?WORKER_POD env var required}" |
| 25 | +NAMESPACE="${NAMESPACE:-duckgres}" |
| 26 | +TENANT_RDS_HOST="${TENANT_RDS_HOST:-}" |
| 27 | +TENANT_BUCKET="${TENANT_BUCKET:-}" |
| 28 | +OTHER_RDS_HOST="${OTHER_RDS_HOST:-}" |
| 29 | +OTHER_WORKER_POD="${OTHER_WORKER_POD:-}" |
| 30 | + |
| 31 | +NODE_IP="$(kubectl -n "$NAMESPACE" get pod "$WORKER_POD" -o jsonpath='{.status.hostIP}')" |
| 32 | +WORKER_NODE="$(kubectl -n "$NAMESPACE" get pod "$WORKER_POD" -o jsonpath='{.spec.nodeName}')" |
| 33 | +# The cache proxy is a per-node DaemonSet pod (not hostNetwork) — find the |
| 34 | +# instance running on the same node as the worker so we test the actual |
| 35 | +# path the worker would take. |
| 36 | +CACHE_PROXY_IP="$(kubectl -n "$NAMESPACE" get pod \ |
| 37 | + -l app.kubernetes.io/name=duckgres-cache-proxy \ |
| 38 | + --field-selector="spec.nodeName=$WORKER_NODE" \ |
| 39 | + -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)" |
| 40 | +CACHE_PROXY_PORT=8080 # S3 forward proxy port; 8081 is peer↔peer, 8082 is health |
| 41 | +APISERVER_IP="$(kubectl get svc kubernetes -n default -o jsonpath='{.spec.clusterIP}')" |
| 42 | +KUBE_DNS_IP="$(kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}')" |
| 43 | +S3_REGION="${S3_REGION:-us-east-1}" |
| 44 | +S3_HOST="s3.${S3_REGION}.amazonaws.com" |
| 45 | + |
| 46 | +OTHER_WORKER_IP="" |
| 47 | +if [[ -n "$OTHER_WORKER_POD" ]]; then |
| 48 | + OTHER_WORKER_IP="$(kubectl -n "$NAMESPACE" get pod "$OTHER_WORKER_POD" -o jsonpath='{.status.podIP}')" |
| 49 | +fi |
| 50 | + |
| 51 | +echo "Probing worker: $WORKER_POD (node $NODE_IP)" |
| 52 | +echo "Cache proxy on node: ${CACHE_PROXY_IP:-MISSING}:$CACHE_PROXY_PORT" |
| 53 | +echo |
| 54 | + |
| 55 | +# probe runs one command inside an ephemeral container sharing $WORKER_POD's |
| 56 | +# netns. Echoes one tab-separated row: KIND TARGET EXPECTED RESULT VERDICT DETAIL. |
| 57 | +probe() { |
| 58 | + local kind="$1" target="$2" expected="$3" cmd="$4" |
| 59 | + local out ec result verdict detail |
| 60 | + # kubectl debug --attach=true does NOT propagate the inner shell's exit |
| 61 | + # code (always exits 0 once the ephemeral container is attached). Embed |
| 62 | + # the inner exit code in the output as a sentinel and parse it back. |
| 63 | + raw=$(kubectl -n "$NAMESPACE" debug "$WORKER_POD" \ |
| 64 | + --image=nicolaka/netshoot \ |
| 65 | + --target=duckdb-worker \ |
| 66 | + --image-pull-policy=IfNotPresent \ |
| 67 | + --profile=general \ |
| 68 | + -q --attach=true \ |
| 69 | + -- sh -c "$cmd; echo __PROBE_EXIT=\$?" 2>&1) || true |
| 70 | + ec=$(printf '%s' "$raw" | grep -oE '__PROBE_EXIT=[0-9]+' | tail -1 | cut -d= -f2) |
| 71 | + ec="${ec:-1}" |
| 72 | + out=$(printf '%s' "$raw" | grep -v "consider using" | grep -v "deprecated and will be removed" | grep -v "__PROBE_EXIT=" || true) |
| 73 | + detail=$(printf '%s' "$out" | tr '\t\n' ' ' | tail -c 100) |
| 74 | + # Tools we use (nc, curl with --max-time, dig +tries=1) all exit non-zero |
| 75 | + # when the network path is denied/unreachable, so the exit code alone is |
| 76 | + # the reachable/blocked signal. |
| 77 | + if [[ "$ec" == "0" ]]; then |
| 78 | + result=reachable |
| 79 | + else |
| 80 | + result=blocked |
| 81 | + fi |
| 82 | + if [[ "$expected" == "allow" && "$result" == "reachable" ]] || \ |
| 83 | + [[ "$expected" == "block" && "$result" == "blocked" ]]; then |
| 84 | + verdict=PASS |
| 85 | + else |
| 86 | + verdict=FAIL |
| 87 | + fi |
| 88 | + printf "%-7s %-32s %-8s %-9s %-8s %s\n" \ |
| 89 | + "$kind" "$target" "$expected" "$result" "$verdict" "$detail" |
| 90 | +} |
| 91 | + |
| 92 | +printf "%-7s %-32s %-8s %-9s %-8s %s\n" KIND TARGET EXPECTED RESULT VERDICT DETAIL |
| 93 | +printf "%-7s %-32s %-8s %-9s %-8s %s\n" ------- -------------------------------- -------- --------- -------- ------------------------------ |
| 94 | + |
| 95 | +if [[ -n "$CACHE_PROXY_IP" ]]; then |
| 96 | + probe TCP "cache-proxy (node-local)" allow "nc -zv -w 3 $CACHE_PROXY_IP $CACHE_PROXY_PORT" |
| 97 | +fi |
| 98 | +probe DNS "kube-dns resolution" allow "dig +time=2 +tries=1 +short @${KUBE_DNS_IP} kubernetes.default.svc.cluster.local" |
| 99 | +probe HTTPS "S3 region endpoint" allow "curl -sS -o /dev/null -w %{http_code} --max-time 5 https://$S3_HOST/" |
| 100 | +probe HTTPS "public internet (example.com)" allow "curl -sS -o /dev/null -w %{http_code} --max-time 5 https://example.com/" |
| 101 | +# Port-scope regression checks: world egress is allowlisted to TCP 443 + |
| 102 | +# 5432 only, so any other port to a public host must stay blocked. If |
| 103 | +# either of these flips to reachable in a future probe run, somebody |
| 104 | +# widened the world rule and we want to catch it. Targets are chosen so |
| 105 | +# the destination port is genuinely listening pre-policy (otherwise the |
| 106 | +# "block" outcome would be a false positive caused by the host refusing |
| 107 | +# the connection rather than Cilium): example.com:80 is served by |
| 108 | +# Cloudflare's HTTP redirector, github.com:22 is GitHub's SSH endpoint. |
| 109 | +probe TCP "public HTTP example.com:80" block "nc -zv -w 3 example.com 80" |
| 110 | +probe TCP "public SSH github.com:22" block "nc -zv -w 3 github.com 22" |
| 111 | +probe TCP "EC2 IMDS (169.254.169.254)" block "nc -zv -w 3 169.254.169.254 80" |
| 112 | +probe HTTP "EC2 IMDS" block "curl -sS -o /dev/null -w %{http_code} --max-time 3 http://169.254.169.254/latest/meta-data/" |
| 113 | +probe TCP "kube-apiserver" block "nc -zv -w 3 $APISERVER_IP 443" |
| 114 | + |
| 115 | +if [[ -n "$TENANT_RDS_HOST" ]]; then |
| 116 | + probe TCP "tenant RDS" allow "nc -zv -w 3 $TENANT_RDS_HOST 5432" |
| 117 | +fi |
| 118 | +if [[ -n "$TENANT_BUCKET" ]]; then |
| 119 | + probe HTTPS "tenant bucket" allow "curl -sS -o /dev/null -w %{http_code} --max-time 5 https://${TENANT_BUCKET}.s3.${S3_REGION}.amazonaws.com/" |
| 120 | +fi |
| 121 | +if [[ -n "$OTHER_RDS_HOST" ]]; then |
| 122 | + # Documented trade-off: this policy does not scope RDS hostnames per |
| 123 | + # tenant, so any RDS in the VPC remains reachable at the network layer |
| 124 | + # (AWS-credential layers gate actual data access). Expected `allow`. |
| 125 | + probe TCP "other tenant RDS (world)" allow "nc -zv -w 3 $OTHER_RDS_HOST 5432" |
| 126 | +fi |
| 127 | +if [[ -n "$OTHER_WORKER_IP" ]]; then |
| 128 | + probe TCP "other worker (Flight)" block "nc -zv -w 3 $OTHER_WORKER_IP 8816" |
| 129 | +fi |
0 commit comments