InferenceX/.github/workflows/example-batched-matrix.yml at 59dbbb06701e5e320a9afec4895fd28896356d1e · SemiAnalysisAI/InferenceX · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
name: "Example: Batched Matrix Workflow"

# This is an example workflow demonstrating how to use the batching feature
# to work around GitHub Actions' 256 job matrix limit

on:
  workflow_dispatch:
    inputs:
      model-prefix:
        description: "Model prefix to benchmark"
        required: true
        type: string
      seq-lens:
        description: "Sequence length config (e.g., 1k1k)"
        required: true
        type: string

jobs:
  # Step 1: Determine how many batches are needed
  get-batch-count:
    runs-on: ubuntu-latest
    outputs:
      batch-count: ${{ steps.count.outputs.batch-count }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - id: count
        run: |
          pip install pydantic
          BATCH_COUNT=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py \
            full-sweep \
            --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml \
                          ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml \
            --seq-lens ${{ inputs.seq-lens }} \
            --model-prefix ${{ inputs.model-prefix }} \
            --get-batch-count)
          echo "batch-count=$BATCH_COUNT" >> $GITHUB_OUTPUT
          echo "Total batches needed: $BATCH_COUNT"

  # Step 2: Generate config for each batch
  # This job runs once per batch (up to the batch-count)
  get-batch-configs:
    needs: get-batch-count
    runs-on: ubuntu-latest
    # Create a matrix with one entry per batch
    strategy:
      matrix:
        # Generate array [0, 1, 2, ..., batch-count-1]
        batch-index: ${{ fromJson(format('[{0}]', join(range(0, fromJson(needs.get-batch-count.outputs.batch-count)), ','))) }}
    outputs:
      # Each batch gets its own output
      configs-${{ matrix.batch-index }}: ${{ steps.get-configs.outputs.configs }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - id: get-configs
        run: |
          pip install pydantic
          CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py \
            full-sweep \
            --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml \
                          ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml \
            --seq-lens ${{ inputs.seq-lens }} \
            --model-prefix ${{ inputs.model-prefix }} \
            --batch-index ${{ matrix.batch-index }})
          echo "configs=$CONFIG_JSON" >> $GITHUB_OUTPUT
          echo "Generated batch ${{ matrix.batch-index }}"

  # Step 3: Run benchmarks for batch 0
  # You would create similar jobs for batch-1, batch-2, etc. if needed
  benchmark-batch-0:
    needs: get-batch-configs
    # Only run if batch 0 exists
    if: ${{ fromJson(needs.get-batch-count.outputs.batch-count) > 0 }}
    uses: ./.github/workflows/benchmark-tmpl.yml
    name: ${{ inputs.model-prefix }} ${{ inputs.seq-lens }} batch-0 /
    strategy:
      fail-fast: false
      matrix:
        config: ${{ fromJson(needs.get-batch-configs.outputs.configs-0) }}
    secrets: inherit
    with:
      exp-name: "${{ inputs.model-prefix }}_${{ inputs.seq-lens }}_batch0"
      isl: 1024
      osl: 1024
      max-model-len: 2048
      runner: ${{ matrix.config.runner }}
      image: ${{ matrix.config.image }}
      model: ${{ matrix.config.model }}
      framework: ${{ matrix.config.framework }}
      precision: ${{ matrix.config.precision }}
      tp: ${{ matrix.config.tp }}
      ep: ${{ matrix.config.ep }}
      dp-attn: ${{ matrix.config.dp-attn }}
      conc: ${{ matrix.config.conc }}

  # Step 4 (optional): Collect results from all batches
  collect-results:
    needs: [get-batch-count, benchmark-batch-0]
    if: ${{ always() }}
    runs-on: ubuntu-latest
    steps:
      - name: Summary
        run: |
          echo "Processed ${{ needs.get-batch-count.outputs.batch-count }} batch(es)"
          echo "Benchmark complete"

# Note: For production use with multiple batches, you would either:
# 1. Create multiple benchmark-batch-N jobs (one per possible batch)
# 2. Use a dynamic workflow generation approach
# 3. Use GitHub's reusable workflows with a loop construct (when available)
#
# The current InferenceMAX workflows split by model-prefix instead,
# which naturally keeps each job under the 256 limit.