Skip to content

Commit 298704d

Browse files
committed
vfio passthrough support
Signed-off-by: Varun Ramachandra Sekar <[email protected]>
1 parent 3903df7 commit 298704d

File tree

36 files changed

+44008
-56
lines changed

36 files changed

+44008
-56
lines changed

api/nvidia.com/resource/v1beta1/api.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const (
2929

3030
GpuConfigKind = "GpuConfig"
3131
MigDeviceConfigKind = "MigDeviceConfig"
32+
VfioDeviceConfigKind = "VfioDeviceConfig"
3233
ComputeDomainChannelConfigKind = "ComputeDomainChannelConfig"
3334
ComputeDomainDaemonConfigKind = "ComputeDomainDaemonConfig"
3435
ComputeDomainKind = "ComputeDomain"
@@ -66,6 +67,7 @@ func init() {
6667
scheme.AddKnownTypes(schemeGroupVersion,
6768
&GpuConfig{},
6869
&MigDeviceConfig{},
70+
&VfioDeviceConfig{},
6971
&ComputeDomainChannelConfig{},
7072
&ComputeDomainDaemonConfig{},
7173
&ComputeDomain{},
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package v1beta1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
22+
"github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates"
23+
)
24+
25+
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
26+
27+
// VfioDeviceConfig holds the set of parameters for configuring a VFIO device.
28+
type VfioDeviceConfig struct {
29+
metav1.TypeMeta `json:",inline"`
30+
}
31+
32+
// DefaultVfioDeviceConfig provides the default configuration of a VFIO device.
33+
func DefaultVfioDeviceConfig() *VfioDeviceConfig {
34+
if !featuregates.Enabled(featuregates.PassthroughSupport) {
35+
return nil
36+
}
37+
return &VfioDeviceConfig{
38+
TypeMeta: metav1.TypeMeta{
39+
APIVersion: GroupName + "/" + Version,
40+
Kind: VfioDeviceConfigKind,
41+
},
42+
}
43+
}
44+
45+
// Normalize updates a VfioDeviceConfig config with implied default values based on other settings.
46+
func (c *VfioDeviceConfig) Normalize() error {
47+
return nil
48+
}
49+
50+
// Validate ensures that VfioDeviceConfig has a valid set of values.
51+
func (c *VfioDeviceConfig) Validate() error {
52+
return nil
53+
}

api/nvidia.com/resource/v1beta1/zz_generated.deepcopy.go

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/gpu-kubelet-plugin/allocatable.go

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION. All rights reserved.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -24,9 +24,22 @@ import (
2424

2525
type AllocatableDevices map[string]*AllocatableDevice
2626

27+
func (d AllocatableDevices) GetGPUByPCIeBusID(pcieBusID string) *AllocatableDevice {
28+
for _, device := range d {
29+
if device.Type() != GpuDeviceType {
30+
continue
31+
}
32+
if device.Gpu.pcieBusID == pcieBusID {
33+
return device
34+
}
35+
}
36+
return nil
37+
}
38+
2739
type AllocatableDevice struct {
28-
Gpu *GpuInfo
29-
Mig *MigDeviceInfo
40+
Gpu *GpuInfo
41+
Mig *MigDeviceInfo
42+
Vfio *VfioDeviceInfo
3043
}
3144

3245
func (d AllocatableDevice) Type() string {
@@ -36,6 +49,9 @@ func (d AllocatableDevice) Type() string {
3649
if d.Mig != nil {
3750
return MigDeviceType
3851
}
52+
if d.Vfio != nil {
53+
return VfioDeviceType
54+
}
3955
return UnknownDeviceType
4056
}
4157

@@ -45,6 +61,8 @@ func (d *AllocatableDevice) CanonicalName() string {
4561
return d.Gpu.CanonicalName()
4662
case MigDeviceType:
4763
return d.Mig.CanonicalName()
64+
case VfioDeviceType:
65+
return d.Vfio.CanonicalName()
4866
}
4967
panic("unexpected type for AllocatableDevice")
5068
}
@@ -55,6 +73,8 @@ func (d *AllocatableDevice) GetDevice() resourceapi.Device {
5573
return d.Gpu.GetDevice()
5674
case MigDeviceType:
5775
return d.Mig.GetDevice()
76+
case VfioDeviceType:
77+
return d.Vfio.GetDevice()
5878
}
5979
panic("unexpected type for AllocatableDevice")
6080
}
@@ -66,6 +86,9 @@ func (d AllocatableDevice) UUID() string {
6686
if d.Mig != nil {
6787
return d.Mig.UUID
6888
}
89+
if d.Vfio != nil {
90+
return d.Vfio.UUID
91+
}
6992
panic("unexpected type for AllocatableDevice")
7093
}
7194

@@ -91,8 +114,20 @@ func (d AllocatableDevices) MigDeviceUUIDs() []string {
91114
return uuids
92115
}
93116

117+
func (d AllocatableDevices) VfioDeviceUUIDs() []string {
118+
var uuids []string
119+
for _, device := range d {
120+
if device.Type() == VfioDeviceType {
121+
uuids = append(uuids, device.Vfio.UUID)
122+
}
123+
}
124+
slices.Sort(uuids)
125+
return uuids
126+
}
127+
94128
func (d AllocatableDevices) UUIDs() []string {
95129
uuids := append(d.GpuUUIDs(), d.MigDeviceUUIDs()...)
130+
uuids = append(uuids, d.VfioDeviceUUIDs()...)
96131
slices.Sort(uuids)
97132
return uuids
98133
}

cmd/gpu-kubelet-plugin/cdi.go

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ const (
4242
cdiClaimKind = cdiVendor + "/" + cdiClaimClass
4343

4444
cdiBaseSpecIdentifier = "base"
45+
cdiVfioSpecIdentifier = "vfio"
4546

4647
defaultCDIRoot = "/var/run/cdi"
4748
)
@@ -139,7 +140,69 @@ func NewCDIHandler(opts ...cdiOption) (*CDIHandler, error) {
139140
return h, nil
140141
}
141142

143+
func (cdi *CDIHandler) writeSpec(spec spec.Interface, specName string) error {
144+
// Transform the spec to make it aware that it is running inside a container.
145+
err := transformroot.New(
146+
transformroot.WithRoot(cdi.driverRoot),
147+
transformroot.WithTargetRoot(cdi.targetDriverRoot),
148+
transformroot.WithRelativeTo("host"),
149+
).Transform(spec.Raw())
150+
if err != nil {
151+
return fmt.Errorf("failed to transform driver root in CDI spec: %w", err)
152+
}
153+
154+
// Update the spec to include only the minimum version necessary.
155+
minVersion, err := cdispec.MinimumRequiredVersion(spec.Raw())
156+
if err != nil {
157+
return fmt.Errorf("failed to get minimum required CDI spec version: %w", err)
158+
}
159+
spec.Raw().Version = minVersion
160+
161+
// Write the spec out to disk.
162+
return cdi.cache.WriteSpec(spec.Raw(), specName)
163+
164+
}
165+
142166
func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevices) error {
167+
if err := cdi.createStandardNvidiaDeviceSpecFile(allocatable); err != nil {
168+
return err
169+
}
170+
if err := cdi.createStandardVfioDeviceSpecFile(allocatable); err != nil {
171+
return err
172+
}
173+
return nil
174+
}
175+
176+
func (cdi *CDIHandler) createStandardVfioDeviceSpecFile(allocatable AllocatableDevices) error {
177+
commonEdits := GetVfioCommonCDIContainerEdits()
178+
var deviceSpecs []cdispec.Device
179+
for _, device := range allocatable {
180+
if device.Type() != VfioDeviceType {
181+
continue
182+
}
183+
edits := GetVfioCDIContainerEdits(device.Vfio)
184+
dspec := cdispec.Device{
185+
Name: device.CanonicalName(),
186+
ContainerEdits: *edits.ContainerEdits,
187+
}
188+
deviceSpecs = append(deviceSpecs, dspec)
189+
}
190+
191+
spec, err := spec.New(
192+
spec.WithVendor(cdiVendor),
193+
spec.WithClass(cdiDeviceClass),
194+
spec.WithDeviceSpecs(deviceSpecs),
195+
spec.WithEdits(*commonEdits.ContainerEdits),
196+
)
197+
if err != nil {
198+
return fmt.Errorf("failed to creat CDI spec: %w", err)
199+
}
200+
201+
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiDeviceClass, cdiVfioSpecIdentifier)
202+
return cdi.writeSpec(spec, specName)
203+
}
204+
205+
func (cdi *CDIHandler) createStandardNvidiaDeviceSpecFile(allocatable AllocatableDevices) error {
143206
// Initialize NVML in order to get the device edits.
144207
if r := cdi.nvml.Init(); r != nvml.SUCCESS {
145208
return fmt.Errorf("failed to initialize NVML: %v", r)
@@ -166,6 +229,10 @@ func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevic
166229
// Generate device specs for all full GPUs and MIG devices.
167230
var deviceSpecs []cdispec.Device
168231
for _, device := range allocatable {
232+
if device.Type() == VfioDeviceType {
233+
continue
234+
}
235+
169236
dspecs, err := cdi.nvcdiDevice.GetDeviceSpecsByID(device.UUID())
170237
if err != nil {
171238
return fmt.Errorf("unable to get device spec for %s: %w", device.CanonicalName(), err)
@@ -185,26 +252,8 @@ func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevic
185252
return fmt.Errorf("failed to creat CDI spec: %w", err)
186253
}
187254

188-
// Transform the spec to make it aware that it is running inside a container.
189-
err = transformroot.New(
190-
transformroot.WithRoot(cdi.driverRoot),
191-
transformroot.WithTargetRoot(cdi.targetDriverRoot),
192-
transformroot.WithRelativeTo("host"),
193-
).Transform(spec.Raw())
194-
if err != nil {
195-
return fmt.Errorf("failed to transform driver root in CDI spec: %w", err)
196-
}
197-
198-
// Update the spec to include only the minimum version necessary.
199-
minVersion, err := cdispec.MinimumRequiredVersion(spec.Raw())
200-
if err != nil {
201-
return fmt.Errorf("failed to get minimum required CDI spec version: %w", err)
202-
}
203-
spec.Raw().Version = minVersion
204-
205-
// Write the spec out to disk.
206-
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiDeviceClass, cdiBaseSpecIdentifier)
207-
return cdi.cache.WriteSpec(spec.Raw(), specName)
255+
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiDeviceClass, cdiVfioSpecIdentifier)
256+
return cdi.writeSpec(spec, specName)
208257
}
209258

210259
func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, preparedDevices PreparedDevices) error {

0 commit comments

Comments
 (0)