Skip to content

Commit 5b8f8a3

Browse files
authored
scripts: probe worker pod egress (#541)
1 parent b774ce6 commit 5b8f8a3

1 file changed

Lines changed: 129 additions & 0 deletions

File tree

scripts/probe_worker_egress.sh

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/env bash
2+
# Probe a worker pod's network egress and report which destinations are
3+
# reachable. Intended to be run before and after a Cilium NetworkPolicy is
4+
# applied — the table output is identical-shape so a diff highlights exactly
5+
# which targets the policy newly blocks.
6+
#
7+
# Mechanism: each probe runs as its own `kubectl debug` ephemeral container
8+
# attached to the worker pod's network namespace. Cilium endpoints cover all
9+
# containers in a pod, so the ephemeral container inherits whatever policy
10+
# applies to the worker. One probe per debug invocation pays ~3 s startup
11+
# each (image cached after first call) but keeps the bash quoting trivial —
12+
# a previous one-shot inline approach silently dropped output past ~3 probes
13+
# under ephemeral-container streaming buffers.
14+
#
15+
# Usage:
16+
# WORKER_POD=duckgres-…-worker-7664 \
17+
# TENANT_RDS_HOST=tenant.cluster-….rds.amazonaws.com \
18+
# TENANT_BUCKET=posthog-tenant-bucket \
19+
# OTHER_RDS_HOST=posthog-duckgres-config-store-mw-dev.cluster-….rds.amazonaws.com \
20+
# OTHER_WORKER_POD=duckgres-…-worker-7665 \
21+
# ./scripts/probe_worker_egress.sh
22+
set -euo pipefail
23+
24+
WORKER_POD="${WORKER_POD:?WORKER_POD env var required}"
25+
NAMESPACE="${NAMESPACE:-duckgres}"
26+
TENANT_RDS_HOST="${TENANT_RDS_HOST:-}"
27+
TENANT_BUCKET="${TENANT_BUCKET:-}"
28+
OTHER_RDS_HOST="${OTHER_RDS_HOST:-}"
29+
OTHER_WORKER_POD="${OTHER_WORKER_POD:-}"
30+
31+
NODE_IP="$(kubectl -n "$NAMESPACE" get pod "$WORKER_POD" -o jsonpath='{.status.hostIP}')"
32+
WORKER_NODE="$(kubectl -n "$NAMESPACE" get pod "$WORKER_POD" -o jsonpath='{.spec.nodeName}')"
33+
# The cache proxy is a per-node DaemonSet pod (not hostNetwork) — find the
34+
# instance running on the same node as the worker so we test the actual
35+
# path the worker would take.
36+
CACHE_PROXY_IP="$(kubectl -n "$NAMESPACE" get pod \
37+
-l app.kubernetes.io/name=duckgres-cache-proxy \
38+
--field-selector="spec.nodeName=$WORKER_NODE" \
39+
-o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)"
40+
CACHE_PROXY_PORT=8080 # S3 forward proxy port; 8081 is peer↔peer, 8082 is health
41+
APISERVER_IP="$(kubectl get svc kubernetes -n default -o jsonpath='{.spec.clusterIP}')"
42+
KUBE_DNS_IP="$(kubectl get svc kube-dns -n kube-system -o jsonpath='{.spec.clusterIP}')"
43+
S3_REGION="${S3_REGION:-us-east-1}"
44+
S3_HOST="s3.${S3_REGION}.amazonaws.com"
45+
46+
OTHER_WORKER_IP=""
47+
if [[ -n "$OTHER_WORKER_POD" ]]; then
48+
OTHER_WORKER_IP="$(kubectl -n "$NAMESPACE" get pod "$OTHER_WORKER_POD" -o jsonpath='{.status.podIP}')"
49+
fi
50+
51+
echo "Probing worker: $WORKER_POD (node $NODE_IP)"
52+
echo "Cache proxy on node: ${CACHE_PROXY_IP:-MISSING}:$CACHE_PROXY_PORT"
53+
echo
54+
55+
# probe runs one command inside an ephemeral container sharing $WORKER_POD's
56+
# netns. Echoes one tab-separated row: KIND TARGET EXPECTED RESULT VERDICT DETAIL.
57+
probe() {
58+
local kind="$1" target="$2" expected="$3" cmd="$4"
59+
local out ec result verdict detail
60+
# kubectl debug --attach=true does NOT propagate the inner shell's exit
61+
# code (always exits 0 once the ephemeral container is attached). Embed
62+
# the inner exit code in the output as a sentinel and parse it back.
63+
raw=$(kubectl -n "$NAMESPACE" debug "$WORKER_POD" \
64+
--image=nicolaka/netshoot \
65+
--target=duckdb-worker \
66+
--image-pull-policy=IfNotPresent \
67+
--profile=general \
68+
-q --attach=true \
69+
-- sh -c "$cmd; echo __PROBE_EXIT=\$?" 2>&1) || true
70+
ec=$(printf '%s' "$raw" | grep -oE '__PROBE_EXIT=[0-9]+' | tail -1 | cut -d= -f2)
71+
ec="${ec:-1}"
72+
out=$(printf '%s' "$raw" | grep -v "consider using" | grep -v "deprecated and will be removed" | grep -v "__PROBE_EXIT=" || true)
73+
detail=$(printf '%s' "$out" | tr '\t\n' ' ' | tail -c 100)
74+
# Tools we use (nc, curl with --max-time, dig +tries=1) all exit non-zero
75+
# when the network path is denied/unreachable, so the exit code alone is
76+
# the reachable/blocked signal.
77+
if [[ "$ec" == "0" ]]; then
78+
result=reachable
79+
else
80+
result=blocked
81+
fi
82+
if [[ "$expected" == "allow" && "$result" == "reachable" ]] || \
83+
[[ "$expected" == "block" && "$result" == "blocked" ]]; then
84+
verdict=PASS
85+
else
86+
verdict=FAIL
87+
fi
88+
printf "%-7s %-32s %-8s %-9s %-8s %s\n" \
89+
"$kind" "$target" "$expected" "$result" "$verdict" "$detail"
90+
}
91+
92+
printf "%-7s %-32s %-8s %-9s %-8s %s\n" KIND TARGET EXPECTED RESULT VERDICT DETAIL
93+
printf "%-7s %-32s %-8s %-9s %-8s %s\n" ------- -------------------------------- -------- --------- -------- ------------------------------
94+
95+
if [[ -n "$CACHE_PROXY_IP" ]]; then
96+
probe TCP "cache-proxy (node-local)" allow "nc -zv -w 3 $CACHE_PROXY_IP $CACHE_PROXY_PORT"
97+
fi
98+
probe DNS "kube-dns resolution" allow "dig +time=2 +tries=1 +short @${KUBE_DNS_IP} kubernetes.default.svc.cluster.local"
99+
probe HTTPS "S3 region endpoint" allow "curl -sS -o /dev/null -w %{http_code} --max-time 5 https://$S3_HOST/"
100+
probe HTTPS "public internet (example.com)" allow "curl -sS -o /dev/null -w %{http_code} --max-time 5 https://example.com/"
101+
# Port-scope regression checks: world egress is allowlisted to TCP 443 +
102+
# 5432 only, so any other port to a public host must stay blocked. If
103+
# either of these flips to reachable in a future probe run, somebody
104+
# widened the world rule and we want to catch it. Targets are chosen so
105+
# the destination port is genuinely listening pre-policy (otherwise the
106+
# "block" outcome would be a false positive caused by the host refusing
107+
# the connection rather than Cilium): example.com:80 is served by
108+
# Cloudflare's HTTP redirector, github.com:22 is GitHub's SSH endpoint.
109+
probe TCP "public HTTP example.com:80" block "nc -zv -w 3 example.com 80"
110+
probe TCP "public SSH github.com:22" block "nc -zv -w 3 github.com 22"
111+
probe TCP "EC2 IMDS (169.254.169.254)" block "nc -zv -w 3 169.254.169.254 80"
112+
probe HTTP "EC2 IMDS" block "curl -sS -o /dev/null -w %{http_code} --max-time 3 http://169.254.169.254/latest/meta-data/"
113+
probe TCP "kube-apiserver" block "nc -zv -w 3 $APISERVER_IP 443"
114+
115+
if [[ -n "$TENANT_RDS_HOST" ]]; then
116+
probe TCP "tenant RDS" allow "nc -zv -w 3 $TENANT_RDS_HOST 5432"
117+
fi
118+
if [[ -n "$TENANT_BUCKET" ]]; then
119+
probe HTTPS "tenant bucket" allow "curl -sS -o /dev/null -w %{http_code} --max-time 5 https://${TENANT_BUCKET}.s3.${S3_REGION}.amazonaws.com/"
120+
fi
121+
if [[ -n "$OTHER_RDS_HOST" ]]; then
122+
# Documented trade-off: this policy does not scope RDS hostnames per
123+
# tenant, so any RDS in the VPC remains reachable at the network layer
124+
# (AWS-credential layers gate actual data access). Expected `allow`.
125+
probe TCP "other tenant RDS (world)" allow "nc -zv -w 3 $OTHER_RDS_HOST 5432"
126+
fi
127+
if [[ -n "$OTHER_WORKER_IP" ]]; then
128+
probe TCP "other worker (Flight)" block "nc -zv -w 3 $OTHER_WORKER_IP 8816"
129+
fi

0 commit comments

Comments
 (0)