Skip to content

Commit 9425977

Browse files
committed
Initial source code release for Intel GPU base operator
Enables deployment of GPU Device Plugin or GPU DRA driver, and XPU Manager via a ClusterPolicy CR. The operator can also expose GPU metrics to Prometheus, configure NFD rules for targeted deployment, and deploy Kueue queues for advanced scheduling. GPUFirmwareUpdate CR can be used to update firmware on GPU devices. Signed-off-by: Tuomas Katila <tuomas.katila@intel.com> Signed-off-by: Patrik Flykt <patrik.flykt@intel.com>
1 parent 62893da commit 9425977

192 files changed

Lines changed: 21797 additions & 1 deletion

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
name: build and push common
2+
on:
3+
workflow_call:
4+
inputs:
5+
runner:
6+
required: true
7+
type: string
8+
9+
permissions:
10+
pull-requests: read # for golangci/golangci-lint-action to fetch pull requests
11+
contents: read
12+
packages: write
13+
14+
env:
15+
REGISTRY: ${{ vars.REGISTRY }}
16+
PROJECT: ${{ vars.PROJECT }}
17+
18+
jobs:
19+
push:
20+
name: Publish Operator image
21+
runs-on: ${{ inputs.runner }}
22+
steps:
23+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
24+
with:
25+
clean: true
26+
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5
27+
with:
28+
go-version-file: go.mod
29+
check-latest: true
30+
cache: false
31+
- name: Generate tag
32+
id: gentag
33+
run: |
34+
TAG=$(sed -n 's/^TAG ?= \(.*\)/\1/p' Makefile)
35+
echo "TAG=$TAG" >> $GITHUB_OUTPUT
36+
- run: make docker-build
37+
- name: Log in to GitHub Container Registry
38+
if: inputs.runner != 'self-hosted'
39+
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
40+
with:
41+
registry: ${{ env.REGISTRY }}
42+
username: ${{ github.actor }}
43+
password: ${{ secrets.GITHUB_TOKEN }}
44+
- name: Push to GitHub Container Registry
45+
run: |
46+
docker tag intel/intel-gpu-base-operator:${{ steps.gentag.outputs.TAG }} ${REGISTRY}/${PROJECT}/intel-gpu-base-operator:${{ steps.gentag.outputs.TAG }}
47+
docker push ${REGISTRY}/${PROJECT}/intel-gpu-base-operator:${{ steps.gentag.outputs.TAG }}
48+
- name: Get image digest
49+
if: ${{ steps.gentag.outputs.TAG != 'devel' }}
50+
id: digest
51+
run: |
52+
echo "image_sha=$(docker inspect --format='{{index .RepoDigests 0}}' ${REGISTRY}/${PROJECT}/intel-gpu-base-operator:${{ steps.gentag.outputs.TAG }})" >> $GITHUB_OUTPUT
53+
# TODO: remove "false &&" when the repository is made public
54+
- name: Install cosign
55+
if: ${{ false && steps.gentag.outputs.TAG != 'devel' }}
56+
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 #v4.1.1
57+
- name: Keyless image sign
58+
if: ${{ false && steps.gentag.outputs.TAG != 'devel' }}
59+
run: |
60+
cosign sign --yes ${{ steps.digest.outputs.image_sha }}
61+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: Build and Push (public)
2+
on:
3+
push:
4+
branches:
5+
- 'main'
6+
tags:
7+
- 'v*'
8+
9+
workflow_dispatch:
10+
11+
permissions:
12+
pull-requests: read # for golangci/golangci-lint-action to fetch pull requests
13+
contents: read
14+
packages: write
15+
16+
jobs:
17+
push:
18+
name: Publish Operator image
19+
uses: "./.github/workflows/build-push-common.yaml"
20+
with:
21+
runner: ubuntu-latest
22+
secrets: inherit
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: Build and Push (self-hosted)
2+
on:
3+
push:
4+
branches:
5+
- 'main'
6+
tags:
7+
- 'v*'
8+
9+
workflow_dispatch:
10+
11+
permissions:
12+
pull-requests: read # for golangci/golangci-lint-action to fetch pull requests
13+
contents: read
14+
packages: write
15+
16+
jobs:
17+
push:
18+
name: Publish Operator image
19+
uses: "./.github/workflows/build-push-common.yaml"
20+
with:
21+
runner: self-hosted
22+
secrets: inherit
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
name: Package and upload helm
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
runner:
7+
default: "self-hosted"
8+
required: false
9+
type: string
10+
11+
permissions:
12+
contents: read
13+
14+
env:
15+
REGISTRY: ${{ vars.REGISTRY }}
16+
PROJECT: ${{ vars.PROJECT }}
17+
RELEASE_REGISTRY: ${{ vars.REGISTRY }}/${{ vars.PROJECT }}
18+
19+
jobs:
20+
push-helm-charts:
21+
runs-on: ${{ inputs.runner }}
22+
permissions:
23+
packages: write
24+
contents: read
25+
26+
steps:
27+
- name: Git checkout
28+
uses: actions/checkout@v4
29+
with:
30+
fetch-tags: true
31+
32+
- uses: azure/setup-helm@v4.3.0
33+
- name: Log in to the Container registry
34+
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
35+
if: inputs.runner != 'self-hosted'
36+
with:
37+
registry: ${{ env.REGISTRY }}
38+
username: ${{ github.actor }}
39+
password: ${{ secrets.GITHUB_TOKEN }}
40+
41+
- name: Push packaged Helm chart
42+
run: make helm-push-chart
43+
44+
- name: Push packaged Helm policy chart
45+
run: make helm-push-policy-chart
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
name: validation common
2+
on:
3+
workflow_call:
4+
inputs:
5+
runner:
6+
required: true
7+
type: string
8+
9+
permissions:
10+
pull-requests: read # for golangci/golangci-lint-action to fetch pull requests
11+
contents: read
12+
13+
env:
14+
isubuntu: ${{ startsWith (inputs.runner, 'ubuntu') }}
15+
16+
jobs:
17+
generated:
18+
name: Check generated files are in sync
19+
runs-on: ${{ inputs.runner }}
20+
steps:
21+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
22+
with:
23+
clean: true
24+
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5
25+
with:
26+
go-version-file: go.mod
27+
check-latest: true
28+
cache: false
29+
- run: |
30+
make check-generated-files
31+
golangci:
32+
name: Run lint
33+
runs-on: ${{ inputs.runner }}
34+
steps:
35+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
36+
with:
37+
clean: true
38+
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5
39+
with:
40+
go-version-file: go.mod
41+
check-latest: true
42+
cache: false
43+
- run: |
44+
make lint
45+
build:
46+
name: Build all
47+
runs-on: ${{ inputs.runner }}
48+
steps:
49+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
50+
with:
51+
clean: true
52+
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5
53+
with:
54+
go-version-file: go.mod
55+
check-latest: true
56+
cache: false
57+
- run: make build
58+
- run: make docker-build
59+
- name: Run Trivy for operator image (json)
60+
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
61+
with:
62+
scan-type: image
63+
scan-ref: intel/intel-gpu-base-operator:devel
64+
format: json
65+
trivy-config: trivy.yaml
66+
exit-code: 1
67+
output: operator-image-vulnerabilities.json
68+
- name: Run Trivy for go.mod (json)
69+
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
70+
with:
71+
scan-type: fs
72+
scan-ref: go.mod
73+
format: json
74+
trivy-config: trivy.yaml
75+
exit-code: 1
76+
output: go.mod-vulnerabilities.json
77+
list-all-pkgs: true
78+
- run: |
79+
cp .trivyignore.yaml trivyignore.yaml
80+
- name: Store image reports as artifacts
81+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
82+
with:
83+
name: trivy-vulnerabilities
84+
path: |
85+
trivyignore.yaml
86+
operator-image-vulnerabilities.json
87+
go.mod-vulnerabilities.json
88+
retention-days: 14
89+
90+
tests:
91+
name: Run tests
92+
runs-on: ${{ inputs.runner }}
93+
steps:
94+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
95+
with:
96+
clean: true
97+
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5
98+
with:
99+
go-version-file: go.mod
100+
check-latest: true
101+
cache: false
102+
- name: Run tests
103+
run: |
104+
make envtest
105+
make test
106+
goverify:
107+
name: Run golang verify
108+
runs-on: ${{ inputs.runner }}
109+
steps:
110+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
111+
with:
112+
clean: true
113+
- uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5
114+
with:
115+
go-version-file: go.mod
116+
check-latest: true
117+
cache: false
118+
- name: Run golang mod verify
119+
run: |
120+
go mod verify
121+
122+
trivy_dockerfiles:
123+
name: Run trivy dockerfile
124+
runs-on: ${{ inputs.runner }}
125+
steps:
126+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
127+
with:
128+
clean: true
129+
- name: Run Trivy for dockerfiles
130+
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
131+
with:
132+
scan-type: config
133+
scan-ref: build/
134+
format: table
135+
trivy-config: trivy.yaml
136+
exit-code: 1
137+
severity: CRITICAL,HIGH,MEDIUM
138+
139+
- name: Run Trivy for dockerfiles (json)
140+
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
141+
if: always()
142+
with:
143+
scan-type: config
144+
scan-ref: build/
145+
format: json
146+
trivy-config: trivy.yaml
147+
exit-code: 1
148+
severity: CRITICAL,HIGH,MEDIUM
149+
output: trivy-dockerfiles.json
150+
- run: |
151+
cp .trivyignore.yaml trivyignore.yaml
152+
- name: Store dockerfile analysis report as artifact
153+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
154+
if: always()
155+
with:
156+
name: trivy-dockerfiles-json
157+
path: |
158+
trivy-dockerfiles.json
159+
trivyignore.yaml
160+
retention-days: 14
161+
162+
trivy_deployments:
163+
name: Run trivy deployments
164+
runs-on: ${{ inputs.runner }}
165+
steps:
166+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
167+
with:
168+
clean: true
169+
- name: Run Trivy for deployments
170+
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
171+
with:
172+
scan-type: config
173+
scan-ref: config/deployments
174+
format: table
175+
trivy-config: trivy.yaml
176+
exit-code: 1
177+
severity: CRITICAL,HIGH,MEDIUM
178+
- name: Run Trivy for deployments (json)
179+
uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # 0.35.0
180+
if: always()
181+
with:
182+
scan-type: config
183+
scan-ref: config/deployments
184+
format: json
185+
trivy-config: trivy.yaml
186+
exit-code: 1
187+
severity: CRITICAL,HIGH,MEDIUM
188+
output: trivy-deployments.json
189+
- run: |
190+
cp .trivyignore.yaml trivyignore.yaml
191+
- name: Store vulnerability report as artifact
192+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
193+
if: always()
194+
with:
195+
name: trivy-deployments-json
196+
path: |
197+
trivy-deployments.json
198+
trivyignore.yaml
199+
retention-days: 14

0 commit comments

Comments
 (0)