Skip to content

fix: kubelet --root-dir can be used #111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/k8s-rdma-shared-dp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ func main() {
log.Fatalf("Exiting.. one or more invalid configuration(s) given: %v", err)
}

if err := rm.SetWatchMode(); err != nil {
log.Fatalln(err.Error())
}

if err := rm.ValidateRdmaSystemMode(); err != nil {
log.Fatalf("Exiting.. can not change : %v", err)
}
Expand Down
1 change: 1 addition & 0 deletions deployment/k8s/base/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ metadata:
data:
config.json: |
{
"kubeletRootDir": "/var/lib/kubelet",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets have this one as a cli flag i dont think users should be concerned about it. (--kubelet-root-dir)

thats a deployment decision.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

normally, i also dont think users should be concerned about it, its default value is /var/lib/kubelet,
if necessary, users can set kubeletRootDir=xxx

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cli and configmap are two different configurations. If necessary, It can be changed to cli mode.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can be changed to cli mode.

that is my preference.

"periodicUpdateInterval": 300,
"configList": [{
"resourceName": "hca_shared_devices_a",
Expand Down
33 changes: 25 additions & 8 deletions pkg/resources/resources_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"fmt"
"log"
"os"
"path"
"regexp"
"strconv"
"time"
Expand Down Expand Up @@ -52,6 +53,9 @@ const (

// RDMA subsystem network namespace mode
rdmaExclusive = "exclusive"

// Default kubelet root dir
defaultKubeletRootDir = "/var/lib/kubelet"
)

var (
Expand All @@ -72,20 +76,14 @@ type resourceManager struct {
rds types.RdmaDeviceSpec
PeriodicUpdateInterval time.Duration
useCdi bool
kubeletRootDir string
}

func NewResourceManager(configFile string, useCdi bool) types.ResourceManager {
watcherMode := detectPluginWatchMode(activeSockDir)
if watcherMode {
fmt.Println("Using Kubelet Plugin Registry Mode")
} else {
fmt.Println("Using Deprecated Devie Plugin Registry Path")
}
return &resourceManager{
configFile: configFile,
defaultResourcePrefix: rdmaHcaResourcePrefix,
socketSuffix: socketSuffix,
watchMode: watcherMode,
netlinkManager: &netlinkManager{},
rds: NewRdmaDeviceSpec(requiredRdmaDevices),
useCdi: useCdi,
Expand All @@ -107,6 +105,12 @@ func (rm *resourceManager) ReadConfig() error {

log.Printf("loaded config: %+v \n", config.ConfigList)

// check kubelet root dir config
rm.kubeletRootDir = defaultKubeletRootDir
if config.KubeletRootDir != "" {
rm.kubeletRootDir = config.KubeletRootDir
}

// if periodic update is not set then use the default value
if config.PeriodicUpdateInterval == nil {
log.Println("no periodic update interval is set, use default interval 60 seconds")
Expand Down Expand Up @@ -228,7 +232,7 @@ func (rm *resourceManager) InitServers() error {
return err
}
}
rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.socketSuffix, rm.useCdi)
rs, err := newResourceServer(config, filteredDevices, rm.watchMode, rm.socketSuffix, rm.useCdi, rm.kubeletRootDir)
if err != nil {
return err
}
Expand Down Expand Up @@ -418,3 +422,16 @@ func (rm *resourceManager) PeriodicUpdate() func() {
}
}
}

func (rm *resourceManager) SetWatchMode() error {
sockPathDir := path.Join(rm.kubeletRootDir, "plugins_registry")
watcherMode := detectPluginWatchMode(sockPathDir)
if watcherMode {
fmt.Println("Using Kubelet Plugin Registry Mode")
} else {
fmt.Println("Using Deprecated Devie Plugin Registry Path")
}
rm.watchMode = watcherMode

return nil
}
13 changes: 8 additions & 5 deletions pkg/resources/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"log"
"net"
"os"
"path"
"path/filepath"
"strconv"
"sync"
Expand Down Expand Up @@ -68,6 +69,7 @@ type resourceServer struct {
useCdi bool
cdi cdi.CDI
cdiResourceName string
kubeletRootDir string
}

func (rsc *resourcesServerPort) GetServer() *grpc.Server {
Expand Down Expand Up @@ -123,10 +125,10 @@ func (rsc *resourcesServerPort) Dial(unixSocketPath string, timeout time.Duratio

// newResourceServer returns an initialized server
func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, watcherMode bool,
socketSuffix string, useCdi bool) (types.ResourceServer, error) {
socketSuffix string, useCdi bool, kubeletRootDir string) (types.ResourceServer, error) {
var devs []*pluginapi.Device

sockDir := activeSockDir
sockDir := path.Join(kubeletRootDir, "plugins_registry")

if config.RdmaHcaMax < 0 {
return nil, fmt.Errorf("error: Invalid value for rdmaHcaMax < 0: %d", config.RdmaHcaMax)
Expand All @@ -151,7 +153,7 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w
}

if !watcherMode {
sockDir = deprecatedSockDir
sockDir = path.Join(kubeletRootDir, "device-plugins")
}

socketName := fmt.Sprintf("%s.%s", config.ResourceName, socketSuffix)
Expand All @@ -172,6 +174,7 @@ func newResourceServer(config *types.UserConfig, devices []types.PciNetDevice, w
useCdi: useCdi,
cdi: cdi.New(),
cdiResourceName: config.ResourceName,
kubeletRootDir: kubeletRootDir,
}, nil
}

Expand Down Expand Up @@ -277,7 +280,7 @@ func (rs *resourceServer) Watch() {

// Register registers the device plugin for the given resourceName with Kubelet.
func (rs *resourceServer) register() error {
kubeletEndpoint := filepath.Join(deprecatedSockDir, kubeEndPoint)
kubeletEndpoint := filepath.Join(rs.kubeletRootDir, "device-plugins", kubeEndPoint)
conn, err := rs.rsConnector.Dial(kubeletEndpoint, cDialTimeout)
if err != nil {
return err
Expand Down Expand Up @@ -424,7 +427,7 @@ func (rs *resourceServer) GetInfo(ctx context.Context, rqt *registerapi.InfoRequ
pluginInfoResponse := &registerapi.PluginInfo{
Type: registerapi.DevicePlugin,
Name: rs.resourceName,
Endpoint: filepath.Join(activeSockDir, rs.socketName),
Endpoint: filepath.Join(rs.kubeletRootDir, "plugins_registry", rs.socketName),
SupportedVersions: []string{"v1alpha1", "v1beta1"},
}
return pluginInfoResponse, nil
Expand Down
2 changes: 2 additions & 0 deletions pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type UserConfig struct {

// UserConfigList config list for servers
type UserConfigList struct {
KubeletRootDir string `json:"kubeletRootDir"`
PeriodicUpdateInterval *int `json:"periodicUpdateInterval"`
ConfigList []UserConfig `json:"configList"`
}
Expand Down Expand Up @@ -75,6 +76,7 @@ type ResourceManager interface {
RestartAllServers() error
GetFilteredDevices(devices []PciNetDevice, selector *Selectors) []PciNetDevice
PeriodicUpdate() func()
SetWatchMode() error
}

// ResourceServerPort to connect the resources server to k8s
Expand Down