Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/docker-compose.override-example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ services:
volumes:
# Uncomment the following lines to enable
# # Mount TRTLLM data volume:
# - /home/scratch.trt_llm_data/:/home/scratch.trt_llm_data/:ro
# - /home/scratch.trt_llm_data_ci/:/home/scratch.trt_llm_data_ci/:ro
2 changes: 1 addition & 1 deletion examples/disaggregated/slurm/benchmark/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ environment:
cuda_architectures: "" # Optional CUDA architectures to build for (e.g. "90-real;100-real"). If empty, builds for all architectures
trtllm_wheel_path: "" # Path to pre-built TensorRT-LLM wheel. If provided, install from this wheel instead
work_dir: "<full_path_to_work_dir>"
worker_env_var: "TLLM_LOG_LEVEL=INFO TRTLLM_SERVER_DISABLE_GC=1 TRTLLM_WORKER_DISABLE_GC=1 TRTLLM_ENABLE_PDL=1 ENROOT_ALLOW_DEV=yes"
worker_env_var: "TLLM_LOG_LEVEL=INFO TRTLLM_SERVER_DISABLE_GC=1 TRTLLM_WORKER_DISABLE_GC=1 TRTLLM_ENABLE_PDL=1 ENROOT_ALLOW_DEV=yes NCCL_GRAPH_MIXING_SUPPORT=0"
server_env_var: "TRTLLM_SERVER_DISABLE_GC=1"

# Profiling Configuration
Expand Down
11 changes: 9 additions & 2 deletions examples/disaggregated/slurm/benchmark/disaggr_torch.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,16 @@ elif [ -d "${trtllm_repo}" ]; then
echo "TensorRT-LLM installation completed successfully"
else
echo "trtllm_wheel_path and trtllm_repo are not provided, will use the installed TensorRT-LLM from the container"
if [ -v TRT_LLM_GIT_COMMIT ]; then
echo "TRT_LLM_GIT_COMMIT: ${TRT_LLM_GIT_COMMIT}"
# get_env file is in the same directory as this script
get_env_file=${work_dir}/get_env.py
if ! srun --container-name=${container_name} \
--container-mounts=${container_mount} --no-container-mount-home \
--mpi=pmix --overlap -N 1 --ntasks-per-node=1 \
bash -c "python ${get_env_file} -e ${full_logdir}/env_vars.json" \
&> ${full_logdir}/2_get_env.log; then
cleanup_on_failure "Failed to get TensorRT-LLM environment variables. Check ${full_logdir}/2_get_env.log for details"
fi
echo "TensorRT-LLM environment variables saved to ${full_logdir}/env_vars.json"
fi

# Get node lists and replace the placeholder with the actual node names
Expand Down
33 changes: 33 additions & 0 deletions examples/disaggregated/slurm/benchmark/get_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
import argparse
import json
import os


def main():
parser = argparse.ArgumentParser(
description="Get TensorRT-LLM environment variables and save to JSON"
)
parser.add_argument("-e", "--env-file", required=True, help="Environment file path")
args = parser.parse_args()

# read env file, append new envs to it
with open(args.env_file, "r") as f:
env_data = json.load(f)

# Get environment variables
new_env_data = {
"TRT_LLM_GIT_COMMIT": os.environ.get("TRT_LLM_GIT_COMMIT", ""),
"TRT_LLM_VERSION": os.environ.get("TRT_LLM_VERSION", ""),
}
print(f"Environment variables: {new_env_data}")
env_data.update(new_env_data)
# Save to environment file
with open(args.env_file, "w") as f:
json.dump(env_data, f, indent=2)

print(f"Environment variables saved to {args.env_file}")


if __name__ == "__main__":
main()
103 changes: 100 additions & 3 deletions examples/disaggregated/slurm/benchmark/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ set -u
trap 'echo "Error occurred at line $LINENO"; exit 1' ERR

# Add parameter validation
if [ "$#" -lt 9 ]; then
echo "Error: Missing required arguments"
echo "Usage: $0 model_name dataset_file multi_round concurrency_list streaming log_path hostname port"
if [ "$#" -lt 10 ]; then
echo "Error: Missing required arguments, got $# arguments, args: $@"
echo "Usage: $0 model_name dataset_file multi_round num_gen_servers concurrency_list streaming log_path hostname port ucx_warmup_requests"
exit 1
fi

Expand All @@ -21,20 +21,115 @@ streaming=$6
log_path=$7
hostname=$8
port=$9
ucx_warmup_requests=${10}

# check process id is not 0
if [[ ${SLURM_PROCID} != "0" ]]; then
echo "Process id is ${SLURM_PROCID} for loadgen, exiting"
exit 0
fi

do_get_logs(){
local input_file=$1
local output_file=$2
local mode=$3
local start_line=$4
# check mode is ctx or gen
if [ "${mode}" = "ctx" ]; then
sed -n "${start_line},\$p" ${input_file} | grep -a "'num_generation_tokens': 0" > ${output_file} || true
elif [ "${mode}" = "gen" ]; then
sed -n "${start_line},\$p" ${input_file} | grep -a "'num_ctx_requests': 0, 'num_ctx_tokens': 0" > ${output_file} || true
else
echo "Invalid mode: ${mode}"
return 1
fi
return 0
}

do_process_all_logs(){
local input_folder=$1
local output_folder=$2
local mode=$3
if [ "${mode}" != "line" ] && [ "${mode}" != "log" ] && [ "${mode}" != "clean" ]; then
echo "Invalid mode: ${mode}"
exit 1
fi
local ctx_log
local ctx_num
local gen_log
local gen_num
local line_count
local start_line
for ctx_log in ${input_folder}/3_output_CTX_*.log; do
if [ -f "${ctx_log}" ]; then
ctx_num=$(basename "${ctx_log}" | sed 's/3_output_CTX_\([0-9]*\)\.log/\1/')
if [ "${mode}" = "line" ]; then
line_count=$(wc -l < ${ctx_log})
echo ${line_count} > ${output_folder}/ctx_only_line_${ctx_num}.txt
elif [ "${mode}" = "log" ]; then
if [ ! -f "${output_folder}/ctx_only_line_${ctx_num}.txt" ]; then
start_line=0
else
start_line=$(cat ${output_folder}/ctx_only_line_${ctx_num}.txt)
rm -f ${output_folder}/ctx_only_line_${ctx_num}.txt
fi
do_get_logs ${ctx_log} ${output_folder}/ctx_only_${ctx_num}.txt "ctx" ${start_line}
elif [ "${mode}" = "clean" ]; then
rm -f ${ctx_log}
fi
fi
done
# process all the gen log files in the input folder
for gen_log in ${input_folder}/3_output_GEN_*.log; do
if [ -f "${gen_log}" ]; then
gen_num=$(basename "${gen_log}" | sed 's/3_output_GEN_\([0-9]*\)\.log/\1/')
if [ "${mode}" = "line" ]; then
line_count=$(wc -l < ${gen_log})
echo ${line_count} > ${output_folder}/gen_only_line_${gen_num}.txt
elif [ "${mode}" = "log" ]; then
if [ ! -f "${output_folder}/gen_only_line_${gen_num}.txt" ]; then
start_line=0
else
start_line=$(cat ${output_folder}/gen_only_line_${gen_num}.txt)
rm -f ${output_folder}/gen_only_line_${gen_num}.txt
fi
do_get_logs ${gen_log} ${output_folder}/gen_only_${gen_num}.txt "gen" ${start_line}
elif [ "${mode}" = "clean" ]; then
rm -f ${gen_log}
fi
fi
done
}

mkdir -p ${log_path}/start_logs
cp ${log_path}/3_output_CTX_*.log ${log_path}/start_logs/ 2>/dev/null || true
cp ${log_path}/3_output_GEN_*.log ${log_path}/start_logs/ 2>/dev/null || true

# warmup requests for ucx connections
if [ "${ucx_warmup_requests}" -gt 0 ]; then
echo "warming up ucx connections with small requests... ${ucx_warmup_requests}"
python -m tensorrt_llm.serve.scripts.benchmark_serving \
--model ${model_name} \
--dataset-name random \
--random-ids \
--random-input-len 100 \
--random-output-len 10 \
--num-prompts ${ucx_warmup_requests} \
--host ${hostname} \
--port ${port} \
--ignore-eos \
--non-streaming
echo "UCX warmup done"
fi

echo "Hostname: ${hostname}, Port: ${port}"
echo "Starting benchmark..."
for concurrency in ${concurrency_list}; do
concurrency=$((concurrency * num_gen_servers))
num_prompts=$((concurrency * multi_round))
echo "Benchmarking with concurrency ${concurrency} ... ${num_prompts} prompts"
mkdir -p ${log_path}/concurrency_${concurrency}
do_process_all_logs ${log_path}/ ${log_path}/concurrency_${concurrency} "line"
python -m tensorrt_llm.serve.scripts.benchmark_serving \
--model ${model_name} \
--backend openai \
Expand All @@ -53,4 +148,6 @@ for concurrency in ${concurrency_list}; do
--percentile-metrics "ttft,tpot,itl,e2el" \
$(if [ "${streaming}" = "false" ]; then echo "--non-streaming"; fi)
echo "Benchmark with concurrency ${concurrency} done"
do_process_all_logs ${log_path}/ ${log_path}/concurrency_${concurrency} "log"
done
# do_process_all_logs ${log_path}/ ${log_path}/concurrency_${concurrency} "clean"
98 changes: 97 additions & 1 deletion examples/disaggregated/slurm/benchmark/run_benchmark_nv_sa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ streaming=$8
log_path=$9
hostname=${10}
port=${11}
ucx_warmup_requests=${12}

# check process id is not 0
if [[ ${SLURM_PROCID} != "0" ]]; then
Expand All @@ -59,14 +60,107 @@ fi
echo "Cloning benchmark repository..."
git clone "${BENCH_SERVING_REPO}" "${BENCH_SERVING_DIR}"

do_get_logs(){
local input_file=$1
local output_file=$2
local mode=$3
local start_line=$4
# check mode is ctx or gen
if [ "${mode}" = "ctx" ]; then
sed -n "${start_line},\$p" ${input_file} | grep -a "'num_generation_tokens': 0" > ${output_file} || true
elif [ "${mode}" = "gen" ]; then
sed -n "${start_line},\$p" ${input_file} | grep -a "'num_ctx_requests': 0, 'num_ctx_tokens': 0" > ${output_file} || true
else
echo "Invalid mode: ${mode}"
return 1
fi
return 0
}

do_process_all_logs(){
local input_folder=$1
local output_folder=$2
local mode=$3
if [ "${mode}" != "line" ] && [ "${mode}" != "log" ] && [ "${mode}" != "clean" ]; then
echo "Invalid mode: ${mode}"
exit 1
fi
local ctx_log
local ctx_num
local gen_log
local gen_num
local line_count
local start_line
for ctx_log in ${input_folder}/output_ctx_*.log; do
if [ -f "${ctx_log}" ]; then
ctx_num=$(basename "${ctx_log}" | sed 's/output_ctx_\([0-9]*\)\.log/\1/')
if [ "${mode}" = "line" ]; then
line_count=$(wc -l < ${ctx_log})
echo ${line_count} > ${output_folder}/ctx_only_line_${ctx_num}.txt
elif [ "${mode}" = "log" ]; then
if [ ! -f "${output_folder}/ctx_only_line_${ctx_num}.txt" ]; then
start_line=0
else
start_line=$(cat ${output_folder}/ctx_only_line_${ctx_num}.txt)
rm -f ${output_folder}/ctx_only_line_${ctx_num}.txt
fi
do_get_logs ${ctx_log} ${output_folder}/ctx_only_${ctx_num}.txt "ctx" ${start_line}
elif [ "${mode}" = "clean" ]; then
rm -f ${ctx_log}
fi
fi
done
# process all the gen log files in the input folder
for gen_log in ${input_folder}/output_gen_*.log; do
if [ -f "${gen_log}" ]; then
gen_num=$(basename "${gen_log}" | sed 's/output_gen_\([0-9]*\)\.log/\1/')
if [ "${mode}" = "line" ]; then
line_count=$(wc -l < ${gen_log})
echo ${line_count} > ${output_folder}/gen_only_line_${gen_num}.txt
elif [ "${mode}" = "log" ]; then
if [ ! -f "${output_folder}/gen_only_line_${gen_num}.txt" ]; then
start_line=0
else
start_line=$(cat ${output_folder}/gen_only_line_${gen_num}.txt)
rm -f ${output_folder}/gen_only_line_${gen_num}.txt
fi
do_get_logs ${gen_log} ${output_folder}/gen_only_${gen_num}.txt "gen" ${start_line}
elif [ "${mode}" = "clean" ]; then
rm -f ${gen_log}
fi
fi
done
}

mkdir -p ${log_path}/start_logs
cp ${log_path}/output_ctx_*.log ${log_path}/start_logs/ 2>/dev/null || true
cp ${log_path}/output_gen_*.log ${log_path}/start_logs/ 2>/dev/null || true

# warmup requests for ucx connections
if [ "${ucx_warmup_requests}" -gt 0 ]; then
echo "warming up ucx connections with small requests... ${ucx_warmup_requests}"
python -m tensorrt_llm.serve.scripts.benchmark_serving \
--model ${model_name} \
--dataset-name random \
--random-ids \
--random-input-len 100 \
--random-output-len 10 \
--num-prompts ${ucx_warmup_requests} \
--host ${hostname} \
--port ${port} \
--ignore-eos \
--non-streaming
echo "UCX warmup done"
fi

# Run benchmarks
echo "Starting benchmark..."
for concurrency in ${concurrency_list}; do
concurrency=$((concurrency * num_gen_servers))
num_prompts=$((concurrency * multi_round))
output_dir="${log_path}/concurrency_${concurrency}"

echo "Benchmarking with concurrency ${concurrency} ... ${num_prompts} prompts"
do_process_all_logs ${log_path}/ ${log_path}/concurrency_${concurrency} "line"
mkdir -p "${output_dir}"

python "${BENCH_SCRIPT}" \
Expand All @@ -89,4 +183,6 @@ for concurrency in ${concurrency_list}; do
$([ "${streaming}" = "false" ] && echo "--non-streaming")

echo "Benchmark with concurrency ${concurrency} done"
do_process_all_logs ${log_path}/ ${log_path}/concurrency_${concurrency} "log"
done
# do_process_all_logs ${log_path}/ ${log_path}/concurrency_${concurrency} "clean"
1 change: 0 additions & 1 deletion examples/disaggregated/slurm/benchmark/start_worker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ else
fi

if [ "${benchmark_mode}" = "gen_only" ]; then
export TRTLLM_DISABLE_KV_CACHE_TRANSFER_OVERLAP=1
export TLLM_BENCHMARK_REQ_QUEUES_SIZE=${concurrency}
fi

Expand Down
Loading
Loading