Skip to content

Commit b86879a

Browse files
authored
add pod monitor support and collect metrics data (#734)
* add pod monitor support and collect metrics data * replace ev.get() to ev[] * fix conflict
1 parent b9fcc8c commit b86879a

File tree

9 files changed

+181
-9
lines changed

9 files changed

+181
-9
lines changed

build/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.
2222

2323
RUN apt-get update; \
2424
apt-get install -y google-cloud-sdk-gke-gcloud-auth-plugin
25+
# Install kubectl for in-pod cluster operations (e.g. vLLM metrics scraping)
26+
RUN curl -fsSL "https://dl.k8s.io/release/$(curl -fsSL https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
27+
-o /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl
2528

2629
RUN echo "# /etc/rsyncd: configuration file for rsync daemon mode" > /etc/rsyncd.conf; echo -e "\
2730
\n\

build/llm-d-benchmark.sh

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,72 @@ fi
9696

9797
env | grep ^LLMDBENCH | grep -v BASE64 | sort
9898

99+
# Scrape vLLM /metrics from all serving pods in the namespace.
100+
# Usage: scrape_vllm_metrics <phase> (phase = "pre" or "post")
101+
function scrape_vllm_metrics {
102+
local phase=$1
103+
local namespace=${LLMDBENCH_VLLM_COMMON_NAMESPACE:-llmdbench}
104+
local metrics_port=${LLMDBENCH_VLLM_COMMON_METRICS_PORT:-8200}
105+
local inference_port=${LLMDBENCH_VLLM_COMMON_INFERENCE_PORT:-8000}
106+
local metrics_path=${LLMDBENCH_VLLM_MONITORING_METRICS_PATH:-/metrics}
107+
local metrics_dir="${LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR}/vllm_metrics"
108+
local timestamp
109+
timestamp=$(date --iso-8601=seconds 2>/dev/null || date -u +"%Y-%m-%dT%H:%M:%S%z")
110+
111+
mkdir -p "${metrics_dir}"
112+
echo "Scraping vLLM ${phase} metrics (namespace=${namespace}, port=${metrics_port}, fallback_port=${inference_port})..."
113+
114+
# Try modelservice labels first, then standalone
115+
local pod_info
116+
pod_info=$(kubectl --namespace "$namespace" get pods \
117+
-l llm-d.ai/inferenceServing=true \
118+
--field-selector=status.phase=Running \
119+
-o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.podIP}{" "}{.metadata.labels.llm-d\.ai/role}{"\n"}{end}' 2>/dev/null || true)
120+
121+
if [[ -z "$pod_info" ]]; then
122+
pod_info=$(kubectl --namespace "$namespace" get pods \
123+
-l stood-up-via=standalone \
124+
--field-selector=status.phase=Running \
125+
-o jsonpath='{range .items[*]}{.metadata.name}{" "}{.status.podIP}{" "}{"standalone"}{"\n"}{end}' 2>/dev/null || true)
126+
fi
127+
128+
if [[ -z "$pod_info" ]]; then
129+
echo "WARNING: No vLLM pods found for metrics scraping in namespace ${namespace}"
130+
return 0
131+
fi
132+
133+
echo "$pod_info" | while read -r pod_name pod_ip role; do
134+
[[ -z "$pod_ip" || -z "$pod_name" ]] && continue
135+
local outfile="${metrics_dir}/${phase}_${pod_name}.log"
136+
echo " Scraping ${pod_name} (${pod_ip}:${metrics_port}, role=${role})..."
137+
curl -s --connect-timeout 5 --max-time 30 \
138+
"http://${pod_ip}:${metrics_port}${metrics_path}" > "$outfile" 2>/dev/null
139+
# If metrics port fails or returns empty, fall back to inference port (standalone vLLM serves /metrics on --port)
140+
if [[ ! -s "$outfile" && "$metrics_port" != "$inference_port" ]]; then
141+
echo " Retrying ${pod_name} on inference port (${pod_ip}:${inference_port})..."
142+
curl -s --connect-timeout 5 --max-time 30 \
143+
"http://${pod_ip}:${inference_port}${metrics_path}" > "$outfile" 2>/dev/null || \
144+
echo " WARNING: Failed to scrape metrics from ${pod_name}"
145+
fi
146+
done
147+
148+
cat > "${metrics_dir}/${phase}_metadata.json" <<METAEOF
149+
{
150+
"phase": "${phase}",
151+
"timestamp": "${timestamp}",
152+
"namespace": "${namespace}",
153+
"metrics_port": ${metrics_port},
154+
"metrics_path": "${metrics_path}"
155+
}
156+
METAEOF
157+
158+
echo "vLLM ${phase} metrics scraping complete. Files saved to ${metrics_dir}/"
159+
}
160+
161+
# Scrape vLLM /metrics before benchmark run
162+
if [[ "${LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED:-false}" == "true" ]]; then
163+
scrape_vllm_metrics "pre" || echo "WARNING: Pre-benchmark metrics scrape failed"
164+
fi
99165

100166
echo "Running harness: /usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_HARNESS}"
101167
counter=1
@@ -113,6 +179,11 @@ while [[ $LLMDBENCH_RUN_EXPERIMENT_HARNESS_LOADGEN_EC -ne 0 && "${counter}" -le
113179
done
114180
echo "Harness completed: /usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_HARNESS}"
115181

182+
# Scrape vLLM /metrics after benchmark run
183+
if [[ "${LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED:-false}" == "true" ]]; then
184+
scrape_vllm_metrics "post" || echo "WARNING: Post-benchmark metrics scrape failed"
185+
fi
186+
116187
if [[ -f ~/fixbashrc ]]; then
117188
mv -f ~/fixbashrc ~/.bashrc
118189
fi

scenarios/examples/spyre.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ cat << EOF > $LLMDBENCH_VLLM_COMMON_ENVVARS_TO_YAML
7171
value: SENTIENT
7272
- name: FLEX_DEVICE
7373
value: VF
74+
- name: VLLM_SPYRE_PERF_METRIC_LOGGING_ENABLED
75+
value: '1'
7476
- name: FLEX_HDMA_P2PSIZE
7577
value: '268435456'
7678
- name: FLEX_HDMA_COLLSIZE

setup/env.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ export LLMDBENCH_VLLM_COMMON_FQDN=${LLMDBENCH_VLLM_COMMON_FQDN:-".svc.cluster.lo
150150
export LLMDBENCH_VLLM_COMMON_TIMEOUT=${LLMDBENCH_VLLM_COMMON_TIMEOUT:-3600}
151151
export LLMDBENCH_VLLM_COMMON_INFERENCE_PORT=${LLMDBENCH_VLLM_COMMON_INFERENCE_PORT:-"8000"}
152152
export LLMDBENCH_VLLM_COMMON_METRICS_PORT=${LLMDBENCH_VLLM_COMMON_METRICS_PORT:-"8200"}
153+
export LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED=${LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED:-false}
154+
155+
# vLLM Prometheus PodMonitor
156+
export LLMDBENCH_VLLM_MONITORING_PODMONITOR_ENABLED=${LLMDBENCH_VLLM_MONITORING_PODMONITOR_ENABLED:-false}
157+
export LLMDBENCH_VLLM_MONITORING_SCRAPE_INTERVAL=${LLMDBENCH_VLLM_MONITORING_SCRAPE_INTERVAL:-"30s"}
158+
export LLMDBENCH_VLLM_MONITORING_METRICS_PATH=${LLMDBENCH_VLLM_MONITORING_METRICS_PATH:-"/metrics"}
159+
153160
export LLMDBENCH_VLLM_COMMON_NIXL_SIDE_CHANNEL_PORT=${LLMDBENCH_VLLM_COMMON_NIXL_SIDE_CHANNEL_PORT:-"5557"}
154161
export LLMDBENCH_VLLM_COMMON_UCX_TLS=${LLMDBENCH_VLLM_COMMON_UCX_TLS:-"sm,cuda_ipc,cuda_copy,tcp"}
155162
export LLMDBENCH_VLLM_COMMON_UCX_SOCKADDR_TLS_PRIORITY=${LLMDBENCH_VLLM_COMMON_UCX_SOCKADDR_TLS_PRIORITY:-"tcp"}

setup/run.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ function show_usage {
5858
-v/--verbose [print the command being executed, and result (default=$LLMDBENCH_CONTROL_VERBOSE)] \n \
5959
-x/--dataset [url for dataset to be replayed (default=$LLMDBENCH_RUN_DATASET_URL)] \n \
6060
-u/--wva [deploy model with Workload Variant Autoscaler (default=$LLMDBENCH_WVA_ENABLED)] \n \
61+
-f/--monitoring [enable vLLM /metrics scraping before and after each benchmark run (default=$LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED)] \n \
6162
-j/--parallelism [number of harness pods to be created (default=$LLMDBENCH_HARNESS_LOAD_PARALLELISM)] \n \
6263
-s/--wait [time to wait until the benchmark run is complete (default=$LLMDBENCH_HARNESS_WAIT_TIMEOUT, value \"0\" means \"do not wait\"] \n \
6364
-g/--envvarspod [list all environment variables which should be propagated to the harness pods (default=$LLMDBENCH_HARNESS_ENVVARS_TO_YAML)] \n \
@@ -197,6 +198,9 @@ while [[ $# -gt 0 ]]; do
197198
-u|--wva)
198199
export LLMDBENCH_WVA_ENABLED=1
199200
;;
201+
-f|--monitoring)
202+
export LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED=true
203+
;;
200204
-z|--skip)
201205
export LLMDBENCH_CLIOVERRIDE_HARNESS_SKIP_RUN=1
202206
;;

setup/standup.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ function show_usage {
4040
-r/--release [modelservice helm chart release name (default=$LLMDBENCH_VLLM_MODELSERVICE_RELEASE)] \n \
4141
-x/--dataset [url for dataset to be replayed (default=$LLMDBENCH_RUN_DATASET_URL)] \n \
4242
-u/--wva [deploy model with Workload Variant Autoscaler (default=$LLMDBENCH_WVA_ENABLED)] \n \
43+
-f/--monitoring [enable PodMonitor for Prometheus and vLLM /metrics scraping (default=$LLMDBENCH_VLLM_MONITORING_PODMONITOR_ENABLED)] \n \
4344
-n/--dry-run [just print the command which would have been executed (default=$LLMDBENCH_CONTROL_DRY_RUN) ] \n \
4445
-v/--verbose [print the command being executed, and result (default=$LLMDBENCH_CONTROL_VERBOSE) ] \n \
4546
-i/--non-admin [run the setup script as a non-cluster-level admin user] \n \
@@ -152,6 +153,10 @@ while [[ $# -gt 0 ]]; do
152153
-u|--wva)
153154
export LLMDBENCH_WVA_ENABLED=1
154155
;;
156+
-f|--monitoring)
157+
export LLMDBENCH_VLLM_MONITORING_PODMONITOR_ENABLED=true
158+
export LLMDBENCH_VLLM_COMMON_METRICS_SCRAPE_ENABLED=true
159+
;;
155160
-n|--dry-run)
156161
export LLMDBENCH_CLIOVERRIDE_CONTROL_DRY_RUN=1
157162
;;

setup/steps/06_deploy_vllm_standalone_models.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def main():
7373
yamls_dir.mkdir(parents=True, exist_ok=True)
7474

7575
# Process each model - First pass: Deploy resources
76-
model_list = ev.get("deploy_model_list", "").replace(",", " ").split()
76+
model_list = ev["deploy_model_list"].replace(",", " ").split()
7777
for model in model_list:
7878
# Generate filename-safe model name
7979
modelfn = model.replace("/", "___")
@@ -107,6 +107,17 @@ def main():
107107
kubectl_service_cmd = f"{ev['control_kcmd']} apply -f {service_file}"
108108
llmdbench_execute_cmd(actual_cmd=kubectl_service_cmd, dry_run=ev["control_dry_run"], verbose=ev["control_verbose"], fatal=True)
109109

110+
# Optional PodMonitor for Prometheus scraping
111+
if ev["vllm_monitoring_podmonitor_enabled"] == "true":
112+
podmonitor_yaml = generate_podmonitor_yaml(ev, model, model_label)
113+
podmonitor_file = yamls_dir / f"{ev['current_step']}_c_podmonitor_{modelfn}.yaml"
114+
with open(podmonitor_file, 'w') as f:
115+
f.write(podmonitor_yaml)
116+
117+
kubectl_podmonitor_cmd = f"{ev['control_kcmd']} apply -f {podmonitor_file}"
118+
llmdbench_execute_cmd(actual_cmd=kubectl_podmonitor_cmd, dry_run=ev["control_dry_run"], verbose=ev["control_verbose"], fatal=False)
119+
announce(f"📊 PodMonitor for \"{model}\" created for Prometheus scraping")
120+
110121
# Optional HTTPRoute for OpenShift
111122
srl = "deployment,service,pods,secrets"
112123
if ev["control_deploy_is_openshift"] == "1" :
@@ -169,7 +180,7 @@ def main():
169180
propagate_standup_parameters(ev, api)
170181

171182
else:
172-
deploy_methods = ev.get("deploy_methods", "")
183+
deploy_methods = ev["deploy_methods"]
173184
announce(f"⏭️ Environment types are \"{deploy_methods}\". Skipping this step.")
174185

175186
return 0
@@ -254,11 +265,12 @@ def generate_deployment_yaml(ev, model, model_label):
254265
- name: HUGGING_FACE_HUB_TOKEN
255266
valueFrom:
256267
secretKeyRef:
257-
name: {ev.get('vllm_common_hf_token_name', '')}
268+
name: {ev['vllm_common_hf_token_name']}
258269
key: HF_TOKEN
259270
{additional_env}
260271
ports:
261272
- containerPort: {ev['vllm_common_inference_port']}
273+
name: metrics
262274
startupProbe:
263275
httpGet:
264276
path: {ev["vllm_standalone_startup_probe_path"]}
@@ -309,7 +321,7 @@ def generate_deployment_yaml(ev, model, model_label):
309321
- name: HUGGING_FACE_HUB_TOKEN
310322
valueFrom:
311323
secretKeyRef:
312-
name: {ev.get('vllm_common_hf_token_name', '')}
324+
name: {ev['vllm_common_hf_token_name']}
313325
key: HF_TOKEN
314326
{additional_env}
315327
ports:
@@ -382,11 +394,34 @@ def generate_service_yaml(ev, model, model_label):
382394
"""
383395
return service_yaml
384396

397+
def generate_podmonitor_yaml(ev, model, model_label):
398+
"""Generate Kubernetes PodMonitor YAML for Prometheus to scrape vLLM standalone model metrics."""
399+
400+
podmonitor_yaml = f"""apiVersion: monitoring.coreos.com/v1
401+
kind: PodMonitor
402+
metadata:
403+
name: vllm-standalone-{model_label}
404+
namespace: {ev['vllm_common_namespace']}
405+
labels:
406+
stood-up-by: "{ev['control_username']}"
407+
stood-up-from: llm-d-benchmark
408+
stood-up-via: "{ev['deploy_methods']}"
409+
spec:
410+
selector:
411+
matchLabels:
412+
app: vllm-standalone-{model_label}
413+
podMetricsEndpoints:
414+
- port: metrics
415+
path: {ev['vllm_monitoring_metrics_path']}
416+
interval: {ev['vllm_monitoring_scrape_interval']}
417+
"""
418+
return podmonitor_yaml
419+
385420
def generate_httproute_yaml(ev, model, model_label):
386421
"""Generate HTTPRoute YAML for vLLM standalone model."""
387422

388423
# Extract cluster URL for hostname
389-
cluster_url = ev.get("cluster_url", "").replace("https://api.", "")
424+
cluster_url = ev["cluster_url"].replace("https://api.", "")
390425

391426
# Get model attributes for backend reference
392427
model_parameters = model_attribute(model, "parameters")

setup/steps/09_deploy_via_modelservice.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,41 @@ def generate_ms_values_yaml(
244244

245245
return clear_string(yaml_content)
246246

247+
def generate_podmonitor_yaml(ev: dict) -> str:
248+
"""Generate a PodMonitor CRD for Prometheus to scrape vLLM model serving pods.
249+
250+
Args:
251+
ev: Environment variables dictionary
252+
253+
Returns:
254+
PodMonitor YAML manifest as string
255+
"""
256+
model_id_label = ev["deploy_current_model_id_label"]
257+
namespace = ev["vllm_common_namespace"]
258+
scrape_interval = ev["vllm_monitoring_scrape_interval"]
259+
metrics_path = ev["vllm_monitoring_metrics_path"]
260+
metrics_port = ev["vllm_common_metrics_port"]
261+
262+
return f"""apiVersion: monitoring.coreos.com/v1
263+
kind: PodMonitor
264+
metadata:
265+
name: vllm-{model_id_label}
266+
namespace: {namespace}
267+
labels:
268+
stood-up-by: "{ev['control_username']}"
269+
stood-up-from: llm-d-benchmark
270+
stood-up-via: "{ev['deploy_methods']}"
271+
spec:
272+
selector:
273+
matchLabels:
274+
llm-d.ai/inferenceServing: "true"
275+
llm-d.ai/model: {model_id_label}
276+
podMetricsEndpoints:
277+
- port: "{metrics_port}"
278+
path: {metrics_path}
279+
interval: {scrape_interval}
280+
"""
281+
247282
def define_httproute(
248283
ev: dict,
249284
single_model: bool = True
@@ -260,9 +295,9 @@ def define_httproute(
260295
YAML manifest for HTTPRoute
261296
"""
262297
release = ev["vllm_modelservice_release"]
263-
namespace = ev.get("vllm_common_namespace", "")
264-
model_id_label = ev.get("deploy_current_model_id_label", "")
265-
service_port = ev.get("vllm_common_inference_port", "8000")
298+
namespace = ev["vllm_common_namespace"]
299+
model_id_label = ev["deploy_current_model_id_label"]
300+
service_port = ev["vllm_common_inference_port"]
266301

267302
manifest=f"""apiVersion: gateway.networking.k8s.io/v1
268303
kind: HTTPRoute
@@ -395,7 +430,7 @@ def main():
395430
# Create directory structure (Do not use "llmdbench_execute_cmd" for these commands)
396431
model_num = f"{model_number:02d}"
397432
release = ev["vllm_modelservice_release"]
398-
work_dir = Path(ev.get("control_work_dir", ""))
433+
work_dir = Path(ev["control_work_dir"])
399434
helm_dir = work_dir / "setup" / "helm" / release / model_num
400435

401436
# Always create directory structure (even in dry-run)
@@ -491,6 +526,15 @@ def main():
491526
if result != 0:
492527
return result
493528

529+
# Optional PodMonitor for Prometheus scraping of vLLM pods
530+
if ev["vllm_monitoring_podmonitor_enabled"] == "true":
531+
podmonitor_yaml = generate_podmonitor_yaml(ev)
532+
podmonitor_file = work_dir / "setup" / "yamls" / f"{ev['current_step_nr']}_podmonitor_{ev['deploy_current_model_id_label']}.yaml"
533+
podmonitor_file.parent.mkdir(parents=True, exist_ok=True)
534+
podmonitor_file.write_text(podmonitor_yaml)
535+
kubectl_apply(api=api, manifest_data=podmonitor_yaml, dry_run=ev["control_dry_run"])
536+
announce(f"📊 PodMonitor for \"{model}\" created for Prometheus scraping")
537+
494538
# Collect decode logs
495539
collect_logs(ev, ev["vllm_modelservice_decode_replicas"], "decode")
496540

setup/teardown.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ else
246246
hpa
247247
va
248248
servicemonitor
249+
podmonitor
249250
pod
250251
pvc
251252
)

0 commit comments

Comments
 (0)