-
Notifications
You must be signed in to change notification settings - Fork 141
116 lines (107 loc) · 4.23 KB
/
example-batched-matrix.yml
File metadata and controls
116 lines (107 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
name: "Example: Batched Matrix Workflow"
# This is an example workflow demonstrating how to use the batching feature
# to work around GitHub Actions' 256 job matrix limit
on:
workflow_dispatch:
inputs:
model-prefix:
description: "Model prefix to benchmark"
required: true
type: string
seq-lens:
description: "Sequence length config (e.g., 1k1k)"
required: true
type: string
jobs:
# Step 1: Determine how many batches are needed
get-batch-count:
runs-on: ubuntu-latest
outputs:
batch-count: ${{ steps.count.outputs.batch-count }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- id: count
run: |
pip install pydantic
BATCH_COUNT=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py \
full-sweep \
--config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml \
${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml \
--seq-lens ${{ inputs.seq-lens }} \
--model-prefix ${{ inputs.model-prefix }} \
--get-batch-count)
echo "batch-count=$BATCH_COUNT" >> $GITHUB_OUTPUT
echo "Total batches needed: $BATCH_COUNT"
# Step 2: Generate config for each batch
# This job runs once per batch (up to the batch-count)
get-batch-configs:
needs: get-batch-count
runs-on: ubuntu-latest
# Create a matrix with one entry per batch
strategy:
matrix:
# Generate array [0, 1, 2, ..., batch-count-1]
batch-index: ${{ fromJson(format('[{0}]', join(range(0, fromJson(needs.get-batch-count.outputs.batch-count)), ','))) }}
outputs:
# Each batch gets its own output
configs-${{ matrix.batch-index }}: ${{ steps.get-configs.outputs.configs }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- id: get-configs
run: |
pip install pydantic
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py \
full-sweep \
--config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml \
${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml \
--seq-lens ${{ inputs.seq-lens }} \
--model-prefix ${{ inputs.model-prefix }} \
--batch-index ${{ matrix.batch-index }})
echo "configs=$CONFIG_JSON" >> $GITHUB_OUTPUT
echo "Generated batch ${{ matrix.batch-index }}"
# Step 3: Run benchmarks for batch 0
# You would create similar jobs for batch-1, batch-2, etc. if needed
benchmark-batch-0:
needs: get-batch-configs
# Only run if batch 0 exists
if: ${{ fromJson(needs.get-batch-count.outputs.batch-count) > 0 }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: ${{ inputs.model-prefix }} ${{ inputs.seq-lens }} batch-0 /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.get-batch-configs.outputs.configs-0) }}
secrets: inherit
with:
exp-name: "${{ inputs.model-prefix }}_${{ inputs.seq-lens }}_batch0"
isl: 1024
osl: 1024
max-model-len: 2048
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
# Step 4 (optional): Collect results from all batches
collect-results:
needs: [get-batch-count, benchmark-batch-0]
if: ${{ always() }}
runs-on: ubuntu-latest
steps:
- name: Summary
run: |
echo "Processed ${{ needs.get-batch-count.outputs.batch-count }} batch(es)"
echo "Benchmark complete"
# Note: For production use with multiple batches, you would either:
# 1. Create multiple benchmark-batch-N jobs (one per possible batch)
# 2. Use a dynamic workflow generation approach
# 3. Use GitHub's reusable workflows with a loop construct (when available)
#
# The current InferenceMAX workflows split by model-prefix instead,
# which naturally keeps each job under the 256 limit.