Skip to content

SageMaker LLM Benchmark #2

SageMaker LLM Benchmark

SageMaker LLM Benchmark #2

name: SageMaker LLM Benchmark
on:
workflow_dispatch:
schedule:
- cron: '0 17 * * 5' # Run every Friday at 5pm
permissions:
id-token: write
contents: read
jobs:
create-runners:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new CPU instance
id: create_cpu1
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
- name: Create new CPU instance
id: create_cpu2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_cpu $token djl-serving
outputs:
cpu_instance_id1: ${{ steps.create_cpu1.outputs.action_cpu_instance_id }}
cpu_instance_id2: ${{ steps.create_cpu2.outputs.action_cpu_instance_id }}
endpoint-tests:
runs-on:
- self-hosted
- cpu
- RUN_ID-${{ github.run_id }}
- RUN_NUMBER-${{ github.run_number }}
- SHA-${{ github.sha }}
timeout-minutes: 720
needs: create-runners
strategy:
fail-fast: false
matrix:
engine: [lmi-dist, trtllm]
steps:
- uses: actions/checkout@v4
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install -U boto3 awscli IPython
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-west-2
role-duration-seconds: 43200
- name: Install IR-LLM
working-directory: tests/integration/benchmark/ir-llm
run: |
aws s3 cp s3://sagemaker-python-sdk-ir-llm/sagemaker-2.227.1.dev0-py3-none-any.whl . --source-region us-east-1
aws s3 cp s3://sagemaker-python-sdk-ir-llm/sagemaker-2017-07-24.normal.json . --source-region us-east-1
aws s3 cp s3://djl-accounts/hf_token . --source-region us-east-1
export HF_TOKEN=$(head -n1 hf_token|cut -d '=' -f2)
pip3 install --quiet sagemaker-2.227.1.dev0-py3-none-any.whl
aws configure add-model --service-model file://sagemaker-2017-07-24.normal.json --service-name sagemaker
- name: Prepare dir to store metrics
working-directory: tests/integration/benchmark/ir-llm
run: |
METRICS_DIR="metrics_${{ matrix.engine }}"
if [ ! -d "$METRICS_DIR" ]; then
mkdir -p "$METRICS_DIR"
echo "Directory '$METRICS_DIR' created."
else
echo "Directory '$METRICS_DIR' already exists."
fi
- name: Run IR-LLM
working-directory: tests/integration/benchmark/ir-llm
run: |
METRICS_DIR="metrics_${{ matrix.engine }}"
CONFIG_DIR="config/${{ matrix.engine }}"
python3 scripts/cw_metrics.py -j ${CONFIG_DIR}/config.yml -c ${CONFIG_DIR}/config_ir_job -m ${METRICS_DIR}
echo "sleep 30 seconds to allow endpoint deletion"
sleep 30
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners, endpoint-tests ]
steps:
- name: Cleanup dangling SageMaker resources
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
./cleanup_sagemaker_resources.sh sm-integration-test us-west-2
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.cpu_instance_id1 }}
./stop_instance.sh $instance_id