LLama Fine-tuning Benchmark CML #32
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: LLama Fine-tuning Benchmark CML | |
| on: | |
| schedule: | |
| - cron: '0 0 1 * *' | |
| workflow_dispatch: | |
| inputs: | |
| git-ref: | |
| description: Repo reference (branch, tag or SHA) | |
| default: "main" | |
| required: true | |
| type: string | |
| alternative-cp-wheel-artifact-id: | |
| description: Alternative Concrete-Python Wheel Artifact-ID (see https://github.com/zama-ai/concrete/actions/workflows/concrete_python_release.yml) | |
| default: "none" | |
| required: true | |
| type: string | |
| alternative-cp-branch: | |
| description: Alternative Concrete-Python Branch | |
| default: "none" | |
| required: true | |
| type: string | |
| mode: | |
| description: Training mode (torch, 7bit, or 16bit) | |
| default: "7bit" | |
| type: choice | |
| options: | |
| - "torch" | |
| - "7bit" | |
| - "16bit" | |
| fhe-mode: | |
| description: FHE execution mode (disable, simulate, or execute) | |
| default: "execute" | |
| type: choice | |
| options: | |
| - "disable" | |
| - "simulate" | |
| - "execute" | |
| max_length: | |
| description: Maximum sequence length | |
| default: "64" | |
| type: string | |
| required: true | |
| batch_size: | |
| description: Batch size for training | |
| default: "1" | |
| type: string | |
| required: true | |
| training_steps: | |
| description: Number of training steps to run | |
| default: "1" | |
| type: string | |
| required: true | |
| permissions: | |
| contents: read | |
| # Global environment variables | |
| env: | |
| ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache | |
| RUNNER_TOOL_CACHE: /opt/hostedtoolcache | |
| # Jobs | |
| jobs: | |
| setup-instances: | |
| name: Setup EC2 instances | |
| runs-on: ubuntu-24.04 | |
| strategy: | |
| matrix: | |
| device: | |
| - type: cpu | |
| profile: big-cpu | |
| - type: gpu | |
| profile: gpu_ciprofile | |
| outputs: | |
| cpu-runner: ${{ steps.set-outputs.outputs.cpu-runner }} | |
| gpu-runner: ${{ steps.set-outputs.outputs.gpu-runner }} | |
| steps: | |
| - name: Start ${{ matrix.device.type }} instance | |
| id: start-instance | |
| uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac | |
| with: | |
| mode: start | |
| github-token: ${{ secrets.SLAB_ACTION_TOKEN }} | |
| slab-url: ${{ secrets.SLAB_BASE_URL }} | |
| job-secret: ${{ secrets.JOB_SECRET }} | |
| backend: aws | |
| profile: ${{ matrix.device.profile }} | |
| - name: Set outputs | |
| id: set-outputs | |
| run: | | |
| if [ "${{ matrix.device.type }}" == "cpu" ]; then | |
| echo "cpu-runner=${{ steps.start-instance.outputs.label }}" >> $GITHUB_OUTPUT | |
| else | |
| echo "gpu-runner=${{ steps.start-instance.outputs.label }}" >> $GITHUB_OUTPUT | |
| fi | |
| run-benchmark: | |
| needs: [setup-instances] | |
| name: Run LLama LoRA Math benchmark on ${{ matrix.device }} | |
| strategy: | |
| fail-fast: false # IMPORTANT: This ensures parallel execution continues even if one fails | |
| matrix: | |
| device: [cpu, gpu] | |
| runs-on: ${{ matrix.device == 'cpu' && needs.setup-instances.outputs.cpu-runner || needs.setup-instances.outputs.gpu-runner }} | |
| outputs: | |
| cpu-status: ${{ steps.set-final-status.outputs.cpu-status }} | |
| gpu-status: ${{ steps.set-final-status.outputs.gpu-status }} | |
| cpu-has-metrics: ${{ steps.set-final-status.outputs.cpu-has-metrics }} | |
| gpu-has-metrics: ${{ steps.set-final-status.outputs.gpu-has-metrics }} | |
| env: | |
| PIP_INDEX_URL: ${{ secrets.PIP_INDEX_URL }} | |
| MAX_LENGTH: ${{ github.event.inputs.max_length || '64' }} | |
| BATCH_SIZE: ${{ github.event.inputs.batch_size || '1' }} | |
| TRAINING_STEPS: ${{ github.event.inputs.training_steps || '1' }} | |
| MODE: ${{ github.event.inputs.mode || '7bit' }} | |
| FHE_MODE: ${{ github.event.inputs.fhe-mode || 'execute' }} | |
| steps: | |
| - name: Install git-lfs | |
| run: | | |
| # Install git-lfs package only - the checkout action will handle initialization | |
| curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash | |
| sudo apt-get install -y git-lfs | |
| - name: Add masks | |
| run: | | |
| echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL_FOR_MASK }}" | |
| echo "::add-mask::${{ secrets.INTERNAL_REPO_URL_FOR_MASK }}" | |
| echo "::add-mask::${{ secrets.INTERNAL_PYPI_URL }}" | |
| echo "::add-mask::${{ secrets.INTERNAL_REPO_URL }}" | |
| - name: Checkout code | |
| uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
| with: | |
| persist-credentials: 'false' | |
| lfs: true | |
| ref: ${{ github.event.inputs.git-ref }} | |
| - name: Set up Python | |
| uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 | |
| with: | |
| python-version: "3.10" | |
| - name: Install dependencies | |
| run: | | |
| sudo apt-get update -y | |
| # Install system dependencies | |
| sudo apt-get install --no-install-recommends -y \ | |
| gnome-keyring \ | |
| graphviz \ | |
| graphviz-dev \ | |
| libgraphviz-dev \ | |
| pkg-config \ | |
| python3-dev \ | |
| python3-pip \ | |
| python3-venv | |
| # Try to install python3.10-venv if available, but don't fail if not | |
| sudo apt-get install -y python3.10-venv || true | |
| sudo apt-mark hold docker.io | |
| ./script/make_utils/setup_os_deps.sh | |
| make setup_env | |
| source .venv/bin/activate | |
| # Verify Python version in venv | |
| echo "Python version in venv:" | |
| python --version | |
| which python | |
| # Upgrade pip and setuptools first | |
| pip install --upgrade pip setuptools wheel | |
| # Install PyTorch with appropriate CUDA support | |
| if [ "${{ matrix.device }}" == "gpu" ]; then | |
| pip install torch>=2.0.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 | |
| else | |
| pip install torch>=2.0.0 | |
| fi | |
| # Install accelerate BEFORE other dependencies to ensure correct version | |
| pip install 'accelerate>=1.1.0' | |
| # Install other dependencies | |
| pip install 'transformers>=4.30.0' 'datasets>=2.12.0' 'peft>=0.4.0' 'tqdm>=4.65.0' 'numpy>=1.24.0' 'psutil>=5.9.0' 'py-cpuinfo>=9.0.0' | |
| # Verify accelerate installation and version | |
| python -c "import accelerate; print(f'Accelerate version: {accelerate.__version__}')" | |
| pip show accelerate | |
| # Install any additional requirements from the project | |
| if [ -f "requirements.txt" ]; then | |
| pip install -r requirements.txt | |
| fi | |
| # Install concrete-ml requirements | |
| pip install -e . | |
| - name: Alternative Concrete Python Wheel Download | |
| if: github.event_name == 'workflow_dispatch' && github.event.inputs.alternative-cp-wheel-artifact-id != 'none' | |
| run: | | |
| curl -L \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| -o concrete-python.whl.zip \ | |
| https://api.github.com/repos/zama-ai/concrete/actions/artifacts/${{ github.event.inputs.alternative-cp-wheel-artifact-id }}/zip | |
| - name: Alternative Concrete Python Wheel Install | |
| if: github.event_name == 'workflow_dispatch' && github.event.inputs.alternative-cp-wheel-artifact-id != 'none' | |
| run: | | |
| source .venv/bin/activate | |
| unzip concrete-python.whl.zip | |
| pip install concrete_python-*.whl | |
| - name: Alternative Concrete Python Branch Checkout | |
| if: github.event_name == 'workflow_dispatch' && github.event.inputs.alternative-cp-branch != 'none' | |
| uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
| with: | |
| persist-credentials: 'false' | |
| path: concrete | |
| repository: zama-ai/concrete | |
| ref: ${{ github.event.inputs.alternative-cp-branch }} | |
| - name: Alternative Concrete Python Branch Source Install | |
| if: github.event_name == 'workflow_dispatch' && github.event.inputs.alternative-cp-branch != 'none' | |
| run: | | |
| cp -R concrete/frontends/concrete-python/concrete/* .venv/lib/python3.*/site-packages/concrete/ | |
| - name: Run Benchmark - LLama LoRA Math Word Problems (${{ matrix.device }}) | |
| id: run-benchmark | |
| continue-on-error: true # Allow workflow to continue even if this step fails | |
| run: | | |
| source .venv/bin/activate | |
| # Login to Hugging Face | |
| huggingface-cli login --token ${{ secrets.LLAMA_HF_TOKEN }} | |
| # Run benchmark with appropriate device flag | |
| if [ "${{ matrix.device }}" == "cpu" ]; then | |
| python3 benchmarks/llama_lora_math_benchmark.py --save-model --device-type cpu | |
| else | |
| python3 benchmarks/llama_lora_math_benchmark.py --save-model --device-type gpu | |
| fi | |
| - name: Check if metrics exist | |
| id: check-metrics | |
| if: always() | |
| run: | | |
| if [ -f "to_upload.json" ]; then | |
| echo "metrics-exist=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "metrics-exist=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Archive metrics | |
| if: always() && steps.check-metrics.outputs.metrics-exist == 'true' | |
| uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 | |
| with: | |
| name: metrics-${{ matrix.device }}.json | |
| path: to_upload.json | |
| - name: Check if model exists | |
| id: check-model | |
| if: always() | |
| run: | | |
| MODEL_MODE="${{ env.MODE }}" | |
| if [ -d "deployment/llama_lora_finetuned_${MODEL_MODE}" ]; then | |
| echo "model-exist=true" >> $GITHUB_OUTPUT | |
| echo "model-path=deployment/llama_lora_finetuned_${MODEL_MODE}" >> $GITHUB_OUTPUT | |
| else | |
| echo "model-exist=false" >> $GITHUB_OUTPUT | |
| echo "Model directory not found: deployment/llama_lora_finetuned_${MODEL_MODE}" >&2 | |
| fi | |
| - name: Archive fine-tuned model | |
| if: always() && steps.check-model.outputs.model-exist == 'true' | |
| uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 | |
| with: | |
| name: llama_lora_finetuned_${{ env.MODE }}_${{ matrix.device }} | |
| path: ${{ steps.check-model.outputs.model-path }} | |
| - name: Upload results to benchmark database | |
| if: | | |
| always() && | |
| steps.run-benchmark.outcome == 'success' && | |
| steps.check-metrics.outputs.metrics-exist == 'true' && | |
| (github.event_name != 'workflow_dispatch' || (github.event.inputs.alternative-cp-branch == 'none' && github.event.inputs.alternative-cp-wheel-artifact-id == 'none')) | |
| run: | | |
| # Log the json | |
| cat to_upload.json | jq | |
| # Wait to avoid log issues | |
| sleep 1 | |
| # Upload to benchmark database | |
| curl --fail-with-body \ | |
| -H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \ | |
| -H "Content-Type: application/json; charset=UTF-8" \ | |
| -d @to_upload.json \ | |
| -X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment" | |
| - name: Set final job status output | |
| id: set-final-status | |
| if: always() | |
| run: | | |
| # Set status for the appropriate device | |
| if [ "${{ matrix.device }}" == "cpu" ]; then | |
| echo "cpu-status=${{ steps.run-benchmark.outcome }}" >> $GITHUB_OUTPUT | |
| echo "cpu-has-metrics=${{ steps.check-metrics.outputs.metrics-exist }}" >> $GITHUB_OUTPUT | |
| else | |
| echo "gpu-status=${{ steps.run-benchmark.outcome }}" >> $GITHUB_OUTPUT | |
| echo "gpu-has-metrics=${{ steps.check-metrics.outputs.metrics-exist }}" >> $GITHUB_OUTPUT | |
| fi | |
| # This step will fail the job if the benchmark failed, making it red in GitHub UI | |
| - name: Check benchmark status | |
| if: always() && steps.run-benchmark.outcome == 'failure' | |
| run: | | |
| echo "Benchmark failed for ${{ matrix.device }}" | |
| echo "Exit code from benchmark: ${{ steps.run-benchmark.conclusion }}" | |
| # Log additional debugging info | |
| echo "Current directory contents:" | |
| ls -la | |
| echo "Deployment directory contents (if exists):" | |
| ls -la deployment/ || echo "Deployment directory not found" | |
| exit 1 | |
| teardown-instances: | |
| name: Teardown EC2 instances | |
| if: ${{ always() }} | |
| needs: [setup-instances, run-benchmark] | |
| runs-on: ubuntu-24.04 | |
| strategy: | |
| matrix: | |
| device: | |
| - type: cpu | |
| runner: ${{ needs.setup-instances.outputs.cpu-runner }} | |
| - type: gpu | |
| runner: ${{ needs.setup-instances.outputs.gpu-runner }} | |
| steps: | |
| - name: Stop ${{ matrix.device.type }} instance | |
| uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac | |
| with: | |
| mode: stop | |
| github-token: ${{ secrets.SLAB_ACTION_TOKEN }} | |
| slab-url: ${{ secrets.SLAB_BASE_URL }} | |
| job-secret: ${{ secrets.JOB_SECRET }} | |
| label: ${{ matrix.device.runner }} | |
| slack-notification: | |
| runs-on: ubuntu-24.04 | |
| needs: [run-benchmark] | |
| if: | | |
| always() && | |
| (github.event_name != 'workflow_dispatch' || (github.event.inputs.alternative-cp-branch == 'none' && github.event.inputs.alternative-cp-wheel-artifact-id == 'none')) | |
| steps: | |
| - name: Determine detailed status | |
| id: detailed-status | |
| run: | | |
| # Initialize variables | |
| OVERALL_STATUS="success" | |
| SLACK_COLOR="good" | |
| STATUS_DETAILS="" | |
| # Check CPU status | |
| CPU_STATUS="${{ needs.run-benchmark.outputs.cpu-status }}" | |
| if [ -z "$CPU_STATUS" ]; then | |
| CPU_STATUS="skipped" | |
| fi | |
| # Check GPU status | |
| GPU_STATUS="${{ needs.run-benchmark.outputs.gpu-status }}" | |
| if [ -z "$GPU_STATUS" ]; then | |
| GPU_STATUS="skipped" | |
| fi | |
| # Build status details | |
| CPU_EMOJI="✅" | |
| GPU_EMOJI="✅" | |
| if [ "$CPU_STATUS" == "failure" ]; then | |
| CPU_EMOJI="❌" | |
| OVERALL_STATUS="partial" | |
| SLACK_COLOR="warning" | |
| elif [ "$CPU_STATUS" == "skipped" ]; then | |
| CPU_EMOJI="⏭️" | |
| fi | |
| if [ "$GPU_STATUS" == "failure" ]; then | |
| GPU_EMOJI="❌" | |
| OVERALL_STATUS="partial" | |
| SLACK_COLOR="warning" | |
| elif [ "$GPU_STATUS" == "skipped" ]; then | |
| GPU_EMOJI="⏭️" | |
| fi | |
| # If both failed, it's a complete failure | |
| if [ "$CPU_STATUS" == "failure" ] && [ "$GPU_STATUS" == "failure" ]; then | |
| OVERALL_STATUS="failure" | |
| SLACK_COLOR="danger" | |
| fi | |
| STATUS_DETAILS="CPU: $CPU_EMOJI ($CPU_STATUS) | GPU: $GPU_EMOJI ($GPU_STATUS)" | |
| # Output results | |
| echo "overall-status=$OVERALL_STATUS" >> $GITHUB_OUTPUT | |
| echo "slack-color=$SLACK_COLOR" >> $GITHUB_OUTPUT | |
| echo "status-details=$STATUS_DETAILS" >> $GITHUB_OUTPUT | |
| # Check if any metrics were produced | |
| HAS_METRICS="false" | |
| if [ "${{ needs.run-benchmark.outputs.cpu-has-metrics }}" == "true" ] || [ "${{ needs.run-benchmark.outputs.gpu-has-metrics }}" == "true" ]; then | |
| HAS_METRICS="true" | |
| fi | |
| echo "has-metrics=$HAS_METRICS" >> $GITHUB_OUTPUT | |
| - name: Slack Notification | |
| continue-on-error: true | |
| uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 | |
| env: | |
| SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }} | |
| SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png | |
| SLACK_COLOR: ${{ steps.detailed-status.outputs.slack-color }} | |
| SLACK_MESSAGE: | | |
| LLama LoRA Math Benchmark | |
| Mode: ${{ github.event.inputs.mode || '7bit' }}, FHE: ${{ github.event.inputs.fhe-mode || 'execute' }} | |
| Status: ${{ steps.detailed-status.outputs.status-details }} | |
| Overall: ${{ steps.detailed-status.outputs.overall-status }} | |
| Metrics uploaded: ${{ steps.detailed-status.outputs.has-metrics }} | |
| Run: ${{ env.ACTION_RUN_URL }} | |
| SLACK_USERNAME: ${{ secrets.BOT_USERNAME }} | |
| SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} |