diff --git a/cmd/compute-domain-kubelet-plugin/driver.go b/cmd/compute-domain-kubelet-plugin/driver.go index 72e5cbda6..13d900f7b 100644 --- a/cmd/compute-domain-kubelet-plugin/driver.go +++ b/cmd/compute-domain-kubelet-plugin/driver.go @@ -122,9 +122,9 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) { return nil, err } - healthcheck, err := startHealthcheck(ctx, config) + healthcheck, err := setupHealthcheckPrimitives(ctx, config) if err != nil { - return nil, fmt.Errorf("start healthcheck: %w", err) + return nil, fmt.Errorf("error setting up healtcheck primitives: %w", err) } driver.healthcheck = healthcheck diff --git a/cmd/compute-domain-kubelet-plugin/health.go b/cmd/compute-domain-kubelet-plugin/health.go index 0aed25a10..b8de61c13 100644 --- a/cmd/compute-domain-kubelet-plugin/health.go +++ b/cmd/compute-domain-kubelet-plugin/health.go @@ -46,16 +46,17 @@ type healthcheck struct { draClient drapb.DRAPluginClient } -func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) { +func setupHealthcheckPrimitives(ctx context.Context, config *Config) (*healthcheck, error) { port := config.flags.healthcheckPort if port < 0 { return nil, nil } + // Bind on all available interfaces. addr := net.JoinHostPort("", strconv.Itoa(port)) lis, err := net.Listen("tcp", addr) if err != nil { - return nil, fmt.Errorf("failed to listen for healthcheck service at %s: %w", addr, err) + return nil, fmt.Errorf("failed to listen on %s: %w", addr, err) } regSockPath := (&url.URL{ @@ -64,26 +65,28 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) // are enabled and the filename includes a uid. Path: path.Join(config.flags.kubeletRegistrarDirectoryPath, DriverName+"-reg.sock"), }).String() - klog.V(6).Infof("connecting to registration socket path=%s", regSockPath) + + klog.V(6).Infof("Connect to registration socket at %s", regSockPath) regConn, err := grpc.NewClient( regSockPath, grpc.WithTransportCredentials(insecure.NewCredentials()), ) if err != nil { - return nil, fmt.Errorf("connect to registration socket: %w", err) + return nil, fmt.Errorf("error connecting to registration socket: %w", err) } draSockPath := (&url.URL{ Scheme: "unix", Path: path.Join(config.DriverPluginPath(), "dra.sock"), }).String() - klog.V(6).Infof("connecting to DRA socket path=%s", draSockPath) + + klog.V(6).Infof("Connect to plugin socket at %s", draSockPath) draConn, err := grpc.NewClient( draSockPath, grpc.WithTransportCredentials(insecure.NewCredentials()), ) if err != nil { - return nil, fmt.Errorf("connect to DRA socket: %w", err) + return nil, fmt.Errorf("error connecting to plugin socket: %w", err) } server := grpc.NewServer() @@ -97,9 +100,10 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) healthcheck.wg.Add(1) go func() { defer healthcheck.wg.Done() - klog.Infof("starting healthcheck service at %s", lis.Addr().String()) + klog.Infof("Starting healthcheck server on %s", lis.Addr().String()) if err := server.Serve(lis); err != nil { - klog.Errorf("failed to serve healthcheck service on %s: %v", addr, err) + // Note(JP): let's review if this should be fatal + klog.Errorf("failed to start healthcheck server: %v", err) } }() @@ -108,13 +112,13 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) func (h *healthcheck) Stop() { if h.server != nil { - klog.Info("Stopping healthcheck service") + klog.Info("Stopping healthcheck server") h.server.GracefulStop() } h.wg.Wait() } -// Check implements [grpc_health_v1.HealthServer]. +// Check implements [grpc_health_v1.HealthServer.Check]. func (h *healthcheck) Check(ctx context.Context, req *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) { knownServices := map[string]struct{}{"": {}, "liveness": {}} if _, known := knownServices[req.GetService()]; !known { @@ -125,16 +129,19 @@ func (h *healthcheck) Check(ctx context.Context, req *grpc_health_v1.HealthCheck Status: grpc_health_v1.HealthCheckResponse_NOT_SERVING, } + // This simulates the kubelet reaching out to the plugin for discovery + // (towards registering it). info, err := h.regClient.GetInfo(ctx, ®isterapi.InfoRequest{}) if err != nil { - klog.ErrorS(err, "failed to call GetInfo") + klog.ErrorS(err, "failed to call GetInfo on registration socket") return status, nil } klog.V(6).Infof("Successfully invoked GetInfo: %v", info) + // This simulates the kubelet reaching out to the plugin _, err = h.draClient.NodePrepareResources(ctx, &drapb.NodePrepareResourcesRequest{}) if err != nil { - klog.ErrorS(err, "failed to call NodePrepareResources") + klog.ErrorS(err, "failed to call NodePrepareResources on plugin socket") return status, nil } klog.V(6).Info("Successfully invoked NodePrepareResources")