forked from kubewharf/katalyst-core
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgpu_plugin.go
More file actions
75 lines (63 loc) · 2.66 KB
/
gpu_plugin.go
File metadata and controls
75 lines (63 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package qrm
import (
"k8s.io/apimachinery/pkg/api/resource"
cliflag "k8s.io/component-base/cli/flag"
"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/qrm/gpustrategy"
qrmconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
)
type GPUOptions struct {
PolicyName string
GPUDeviceNames []string
GPUMemoryAllocatablePerGPU string
SkipGPUStateCorruption bool
RDMADeviceNames []string
GPUStrategyOptions *gpustrategy.GPUStrategyOptions
}
func NewGPUOptions() *GPUOptions {
return &GPUOptions{
PolicyName: "static",
GPUDeviceNames: []string{"nvidia.com/gpu"},
GPUMemoryAllocatablePerGPU: "100",
RDMADeviceNames: []string{},
GPUStrategyOptions: gpustrategy.NewGPUStrategyOptions(),
}
}
func (o *GPUOptions) AddFlags(fss *cliflag.NamedFlagSets) {
fs := fss.FlagSet("gpu_resource_plugin")
fs.StringVar(&o.PolicyName, "gpu-resource-plugin-policy",
o.PolicyName, "The policy gpu resource plugin should use")
fs.StringSliceVar(&o.GPUDeviceNames, "gpu-resource-names", o.GPUDeviceNames, "The name of the GPU resource")
fs.StringVar(&o.GPUMemoryAllocatablePerGPU, "gpu-memory-allocatable-per-gpu",
o.GPUMemoryAllocatablePerGPU, "The total memory allocatable for each GPU, e.g. 100")
fs.BoolVar(&o.SkipGPUStateCorruption, "skip-gpu-state-corruption",
o.SkipGPUStateCorruption, "skip gpu state corruption, and it will be used after updating state properties")
fs.StringSliceVar(&o.RDMADeviceNames, "rdma-resource-names", o.RDMADeviceNames, "The name of the RDMA resource")
o.GPUStrategyOptions.AddFlags(fss)
}
func (o *GPUOptions) ApplyTo(conf *qrmconfig.GPUQRMPluginConfig) error {
conf.PolicyName = o.PolicyName
conf.GPUDeviceNames = o.GPUDeviceNames
gpuMemory, err := resource.ParseQuantity(o.GPUMemoryAllocatablePerGPU)
if err != nil {
return err
}
conf.GPUMemoryAllocatablePerGPU = gpuMemory
conf.SkipGPUStateCorruption = o.SkipGPUStateCorruption
conf.RDMADeviceNames = o.RDMADeviceNames
if err := o.GPUStrategyOptions.ApplyTo(conf.GPUStrategyConfig); err != nil {
return err
}
return nil
}