Skip to content

Commit 7ddf7ca

Browse files
authored
Assorted bug fixes (llm-d#206)
Signed-off-by: Nick Masluk <nick@randombytes.net>
1 parent e2a185c commit 7ddf7ca

File tree

3 files changed

+18
-45
lines changed

3 files changed

+18
-45
lines changed

build/llm-d-benchmark.sh

Lines changed: 9 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export LLMDBENCH_RUN_EXPERIMENT_HARNESS_EC=1
44
export LLMDBENCH_HARNESS_NAME=${1}
55
export LLMDBENCH_RUN_EXPERIMENT_HARNESS=$(find /usr/local/bin | grep ${1}.*-llm-d-benchmark | rev | cut -d '/' -f 1 | rev)
66
export LLMDBENCH_RUN_EXPERIMENT_ANALYZER=$(find /usr/local/bin | grep ${1}.*-analyze_results | rev | cut -d '/' -f 1 | rev)
7-
export LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR=/requests/$(echo $LLMDBENCH_RUN_EXPERIMENT_HARNESS | sed "s^-llm-d-benchmark^^g" | cut -d '.' -f 1)_${LLMDBENCH_RUN_EXPERIMENT_ID}_${LLMDBENCH_HARNESS_STACK_NAME}_${date -u +%Y-%m-%d_%H.%M.%S}
7+
export LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR=/requests/$(echo $LLMDBENCH_RUN_EXPERIMENT_HARNESS | sed "s^-llm-d-benchmark^^g" | cut -d '.' -f 1)_${LLMDBENCH_RUN_EXPERIMENT_ID}_${LLMDBENCH_HARNESS_STACK_NAME}
88
export LLMDBENCH_CONTROL_WORK_DIR=$LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR
99
fi
1010
if [[ ! -z $2 ]]; then
@@ -30,66 +30,33 @@ if [[ -f ~/.bashrc ]]; then
3030
mv -f ~/.bashrc ~/fixbashrc
3131
fi
3232

33-
#if [[ -d $LLMDBENCH_RUN_EXPERIMENT_HARNESS_DIR && ! -z $LLMDBENCH_HARNESS_GIT_REPO ]]; then
34-
# pushd /workspace/$LLMDBENCH_RUN_EXPERIMENT_HARNESS_DIR
35-
# current_repo=$(git remote -v | grep \(fetch\) | awk '{ print $2 }')
36-
# if [[ $current_repo == $LLMDBENCH_HARNESS_GIT_REPO ]]; then
37-
# export LLMDBENCH_RUN_EXPERIMENT_HARNESS_CURRENT_COMMIT=$(git rev-parse --short HEAD)
38-
# git fetch
39-
# else
40-
# popd
41-
# rm -rf /workspace/$LLMDBENCH_RUN_EXPERIMENT_HARNESS_DIR
42-
# git clone $LLMDBENCH_HARNESS_GIT_REPO
43-
# pushd /workspace/$LLMDBENCH_RUN_EXPERIMENT_HARNESS_DIR
44-
# fi
45-
# git checkout $LLMDBENCH_HARNESS_GIT_BRANCH
46-
# if [[ $(git rev-parse --short HEAD) != ${LLMDBENCH_RUN_EXPERIMENT_HARNESS_CURRENT_COMMIT} ]]; then
47-
# case ${LLMDBENCH_RUN_EXPERIMENT_HARNESS_DIR} in
48-
# fmperf*)
49-
# pip install --no-cache-dir -r requirements.txt && pip install .
50-
# ;;
51-
# inference-perf*)
52-
# pip install .
53-
# ;;
54-
# vllm-benchmark*)
55-
# VLLM_USE_PRECOMPILED=1 pip install .
56-
# pushd ..
57-
# if [[ ! -d vllm ]]; then
58-
# mv -f vllm vllm-benchmark
59-
# fi
60-
# popd
61-
# ;;
62-
# guidellm*)
63-
# pip install .
64-
# ;;
65-
# esac
66-
# fi
67-
# popd
68-
#fi
69-
7033
env | grep ^LLMDBENCH | grep -v BASE64 | sort
7134

72-
if [[ $LLMDBENCH_RUN_EXPERIMENT_HARNESS_EC -ne 0 ]]; then
35+
# Repeat run until success
36+
while [[ $LLMDBENCH_RUN_EXPERIMENT_HARNESS_EC -ne 0 ]]; do
7337
/usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_HARNESS}
7438
ec=$?
7539
if [[ $ec -ne 0 ]]; then
76-
echo "execution of /usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_HARNESS} failed, wating 120 seconds and trying again"
77-
sleep 120
40+
echo "execution of /usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_HARNESS} failed, wating 30 seconds and trying again"
41+
sleep 30
7842
set -x
7943
else
8044
export LLMDBENCH_RUN_EXPERIMENT_HARNESS_EC=0
8145
fi
82-
fi
46+
done
8347

8448
if [[ -f ~/fixbashrc ]]; then
8549
mv -f ~/fixbashrc ~/.bashrc
8650
fi
8751

52+
# Try to run analysis twice then give up
8853
/usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_ANALYZER}
8954
ec=$?
9055
if [[ $ec -ne 0 ]]; then
9156
echo "execution of /usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_ANALYZER} failed, wating 120 seconds and trying again"
9257
sleep 120
9358
set -x
59+
/usr/local/bin/${LLMDBENCH_RUN_EXPERIMENT_ANALYZER}
9460
fi
61+
# Return with error code of first iteration of experiment analyzer
9562
exit $ec

setup/env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ export LLMDBENCH_IMAGE_NAME=${LLMDBENCH_IMAGE_NAME:-llm-d-benchmark}
1313
export LLMDBENCH_IMAGE_TAG=${LLMDBENCH_IMAGE_TAG:-auto}
1414
export LLMDBENCH_LLMD_IMAGE_REGISTRY=${LLMDBENCH_LLMD_IMAGE_REGISTRY:-ghcr.io}
1515
export LLMDBENCH_LLMD_IMAGE_REPO=${LLMDBENCH_LLMD_IMAGE_REPO:-llm-d}
16-
export LLMDBENCH_LLMD_IMAGE_NAME=${LLMDBENCH_LLMD_IMAGE_REPO:-llm-d}
16+
export LLMDBENCH_LLMD_IMAGE_NAME=${LLMDBENCH_LLMD_IMAGE_NAME:-llm-d}
1717
export LLMDBENCH_LLMD_IMAGE_TAG=${LLMDBENCH_LLMD_IMAGE_TAG:-auto}
1818
export LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REGISTRY=${LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REGISTRY:-ghcr.io}
1919
export LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REPO=${LLMDBENCH_LLMD_MODELSERVICE_IMAGE_REPO:-llm-d}

workload/harnesses/vllm-benchmark-llm-d-benchmark.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,16 @@ find /workspace/vllm-benchmark -maxdepth 1 -mindepth 1 -name '*.json' -exec mv -
1010

1111
# If benchmark harness returned with an error, exit here
1212
if [[ $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC -ne 0 ]]; then
13+
echo "Harness returned with error $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC"
1314
exit $LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC
1415
fi
16+
echo "Harness completed successfully."
1517

1618
# Convert results into universal format
17-
convert.py $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR -w vllm-benchmark > $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/results_${date -u +%Y-%m-%d_%H.%M.%S}.yaml 2> >(tee -a $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/stderr.log >&2)
19+
convert.py $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR -w vllm-benchmark > $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/results_$(date -u +%Y-%m-%d_%H.%M.%S).yaml 2> >(tee -a $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/stderr.log >&2)
1820
export LLMDBENCH_RUN_EXPERIMENT_CONVERT_RC=$?
19-
exit $LLMDBENCH_RUN_EXPERIMENT_CONVERT_RC
21+
if [[ $LLMDBENCH_RUN_EXPERIMENT_CONVERT_RC -ne 0 ]]; then
22+
echo "convert.py returned with error $LLMDBENCH_RUN_EXPERIMENT_CONVERT_RC"
23+
exit $LLMDBENCH_RUN_EXPERIMENT_CONVERT_RC
24+
fi
25+
echo "Results data conversion completed successfully."

0 commit comments

Comments
 (0)