CI - Nightly Run Benchmark on OCP #41
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI - Nightly Run Benchmark on OCP | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| input_dir: | |
| description: 'Input directory for benchmark results' | |
| required: false | |
| default: '/tmp/cicd/analysis' | |
| output_dir: | |
| description: 'Output directory name (S3 prefix and artifact name)' | |
| required: false | |
| default: '' | |
| # push: | |
| # branches: | |
| # - main | |
| schedule: | |
| - cron: '0 0 * * *' # Daily at midnight UTC | |
| jobs: | |
| run-benchmark: | |
| name: Benchmark Test | |
| runs-on: [k8s-util] | |
| timeout-minutes: 240 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.11' | |
| - name: Display OS used | |
| run: | | |
| cat /etc/*os-* | |
| shell: bash | |
| - name: Set input and output directory environment variables | |
| run: | | |
| DEFAULT_INPUT_DIR=/tmp/cicd/analysis | |
| INPUT_DIR="${{ github.event.inputs.input_dir }}" | |
| if [ -z "$INPUT_DIR" ]; then | |
| INPUT_DIR="$DEFAULT_INPUT_DIR" | |
| fi | |
| echo "INPUT_DIR=$INPUT_DIR" >> $GITHUB_ENV | |
| if [ -z "${{ github.event.inputs.output_dir }}" ]; then | |
| timestamp=$(date -u +%Y%m%dT%H%M%SZ) | |
| echo "OUTPUT_DIR=benchmark-results-${timestamp}" >> $GITHUB_ENV | |
| echo "Using generated output dir: benchmark-results-${timestamp}" | |
| else | |
| echo "OUTPUT_DIR=${{ github.event.inputs.output_dir }}" >> $GITHUB_ENV | |
| echo "Using provided output dir: ${{ github.event.inputs.output_dir }}" | |
| fi | |
| shell: bash | |
| - name: Set up kubeconfig from secret | |
| run: | | |
| mkdir -p ~/.kube | |
| echo "${{ secrets.KUBECONFIG_DATA }}" | base64 -d > ~/.kube/config | |
| chmod 600 ~/.kube/config | |
| shell: bash | |
| - name: Run install_deps.sh | |
| run: | | |
| sudo apt-get update | |
| sudo apt install bc | |
| ./setup/install_deps.sh | |
| shell: bash | |
| - name: Install config explorer dependencies | |
| run: pip install ./config_explorer | |
| shell: bash | |
| - name: Install kubectl-view-allocations | |
| run: | | |
| cd / | |
| curl https://raw.githubusercontent.com/davidB/kubectl-view-allocations/master/scripts/getLatest.sh | sudo bash | |
| kubectl-view-allocations -h | |
| shell: bash | |
| - name: Cleanup target cloud (modelservice) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: ./setup/teardown.sh -c ocp_fb -t modelservice -d | |
| shell: bash | |
| - name: Cleanup target cloud (standalone) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| ./setup/teardown.sh -c ocp_fb -t standalone -d | |
| shell: bash | |
| - name: Standup target cloud (standalone) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| if [[ $(echo "$(kubectl-view-allocations -r gpu -o csv | grep resource,nvidia.com/gpu | cut -d ',' -f 11) - 10.00" | bc | cut -d '.' -f 1) -lt 0 ]]; then echo "LLM-D SIMULATOR"; sed -i 's^####^^g' scenarios/cicd/ocp_fb.sh; fi | |
| ./setup/standup.sh -c ocp_fb -t standalone | |
| shell: bash | |
| - name: Run benchmark (standalone, inference-perf) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| ./setup/run.sh -c ocp_fb -t standalone | |
| shell: bash | |
| - name: Run benchmark (standalone, fmperf) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| ./setup/run.sh -c ocp_fb -t standalone -l fmperf -w sanity_short-input | |
| shell: bash | |
| - name: Run benchmark (standalone, guidellm) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| ./setup/run.sh -c ocp_fb -t standalone -l guidellm -w sanity_concurrent | |
| shell: bash | |
| - name: Run benchmark (standalone, vllm-benchmark) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| ./setup/run.sh -c ocp_fb -t standalone -l vllm-benchmark | |
| shell: bash | |
| - name: Cleanup target cloud (standalone) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| ./setup/teardown.sh -c ocp_fb -t standalone -d | |
| shell: bash | |
| - name: E2E target cloud (modelservice, inference-perf) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| if [[ $(echo "$(kubectl-view-allocations -r gpu -o csv | grep resource,nvidia.com/gpu | cut -d ',' -f 11) - 20.00" | bc | cut -d '.' -f 1) -lt 0 ]]; then echo "LLM-D SIMULATOR"; sed -i 's^####^^g' scenarios/cicd/ocp_fb.sh; fi | |
| ./setup/e2e.sh -c ocp_fb -t modelservice --deep | |
| shell: bash | |
| - name: E2E target cloud (modelservice, fmperf) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| if [[ $(echo "$(kubectl-view-allocations -r gpu -o csv | grep resource,nvidia.com/gpu | cut -d ',' -f 11) - 20.00" | bc | cut -d '.' -f 1) -lt 0 ]]; then echo "LLM-D SIMULATOR"; sed -i 's^####^^g' scenarios/cicd/ocp_fb.sh; fi | |
| ./setup/e2e.sh -c ocp_fb -t modelservice --deep -l fmperf -w sanity_short-input.yaml | |
| shell: bash | |
| - name: E2E target cloud (modelservice, guidellm) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| if [[ $(echo "$(kubectl-view-allocations -r gpu -o csv | grep resource,nvidia.com/gpu | cut -d ',' -f 11) - 20.00" | bc | cut -d '.' -f 1) -lt 0 ]]; then echo "LLM-D SIMULATOR"; sed -i 's^####^^g' scenarios/cicd/ocp_fb.sh; fi | |
| ./setup/e2e.sh -c ocp_fb -t modelservice --deep -l guidellm -w sanity_concurrent.yaml | |
| shell: bash | |
| - name: E2E target cloud (modelservice, vllm-benchmark) | |
| env: | |
| LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} | |
| run: | | |
| if [[ $(echo "$(kubectl-view-allocations -r gpu -o csv | grep resource,nvidia.com/gpu | cut -d ',' -f 11) - 20.00" | bc | cut -d '.' -f 1) -lt 0 ]]; then echo "LLM-D SIMULATOR"; sed -i 's^####^^g' scenarios/cicd/ocp_fb.sh; fi | |
| ./setup/e2e.sh -c ocp_fb -t modelservice --deep -l vllm-benchmark | |
| shell: bash | |
| - name: Install AWS CLI | |
| run: | | |
| curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" | |
| unzip awscliv2.zip | |
| sudo ./aws/install | |
| aws --version | |
| - name: Upload results to IBM COS | |
| env: | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| run: | | |
| aws configure set default.s3.signature_version s3v4 | |
| aws s3 cp "$INPUT_DIR" "s3://${{ secrets.COS_BUCKET_NAME }}/$OUTPUT_DIR/" \ | |
| --recursive --endpoint-url ${{ secrets.COS_ENDPOINT_URL }} || true | |
| - name: Archive benchmark results as GitHub artifact | |
| if: success() || failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ env.OUTPUT_DIR }} | |
| path: ${{ env.INPUT_DIR }} | |
| retention-days: 14 |