llm-d-fast-model-actuation/.github/workflows/launcher-based-e2e-test.yml at bf0f89289915a0d7ae3133b746605fb867a3f9e1 · waltforme/llm-d-fast-model-actuation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Tests launcher-based server-providing pods in a `kind` cluster
name: Launcher-Based E2E Test

on:
  push:
    paths:
      - ".github/workflows/launcher-based-e2e-test.yml"
      - Makefile
      - cmd/dual-pods-controller/**
      - cmd/test-requester/**
      - cmd/launcher-populator/**
      - inference_server/launcher/**
      - dockerfiles/Dockerfile.launcher.benchmark
      - pkg/**
      - test/e2e/run-launcher-based.sh
      - test/e2e/mkobjs.sh
  pull_request:
    branches:
      - main

jobs:
  debug:
    name: print relevant info
    runs-on: ubuntu-latest
    steps:
      - run: |
          echo "github.actor=${{ github.actor }}"
          echo "github.action_ref=${{ github.action_ref }}"
          echo "github.event_name=${{ github.event_name }}"
          echo "github.head_ref=$GITHUB_HEAD_REF"
          echo "github.ref=${{ github.ref }}"
          echo "github.ref_name=${{ github.ref_name }}"
          echo "github.repository=${{ github.repository }}"
          echo "github.repository_owner=${{ github.repository_owner }}"
          echo "github.triggering_actor=${{ github.triggering_actor }}"
          echo "GITHUB_ACTION_REF=$GITHUB_ACTION_REF"

  run-launcher-test:
    runs-on: ubuntu-22.04-arm
    steps:
      - uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
        with:
          go-version: '1.24.2'

      - name: Install ko
        uses: ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d # v0.9
        with:
          version: v0.15.2

      - name: Checkout code
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0

      - name: Run launcher-based E2E test
        run: test/e2e/run-launcher-based.sh

      - name: show all pods
        if: always()
        run: kubectl get pods -A -o wide

      - name: show test pods with labels
        if: always()
        run: kubectl get pods -L dual-pods.llm-d.ai/dual,dual-pods.llm-d.ai/sleeping,dual-pods.llm-d.ai/launcher-config-name

      - name: show ReplicaSets
        if: always()
        run: kubectl get rs -A

      - name: show dual-pods controller log
        if: always()
        run: kubectl logs deploy/fma-dual-pods-controller

      - name: show launcher-populator log
        if: always()
        run: kubectl logs deploy/fma-launcher-populator || echo "launcher-populator not deployed"

      - name: show GPU allocations
        if: always()
        run: kubectl get cm gpu-allocs -o yaml

      - name: show GPU map
        if: always()
        run: kubectl get cm gpu-map -o yaml

      - name: show InferenceServerConfigs
        if: always()
        run: kubectl get inferenceserverconfigs -o yaml

      - name: show LauncherConfigs
        if: always()
        run: kubectl get launcherconfigs -o yaml

      - name: show YAML of test pods
        if: always()
        run: kubectl get pods -o yaml

      - name: show launcher pod logs
        if: always()
        run: |
          for pod in $(kubectl get pods -l dual-pods.llm-d.ai/launcher-config-name -o name); do
            echo "=== Logs for $pod ==="
            kubectl logs $pod || echo "Failed to get logs for $pod"
          done