[release-2.12] MTV-5829 | Add retry backoff and auth detection to Hyper-V collector (#7232)

github-actions[bot] · Hazanel · web-flow · commit d24caf06c730 · 2026-06-23T09:22:31.000+03:00
**Backport:** #7230 The Hyper-V collector retried failed runs with no delay, spamming the host with WinRM requests when credentials were incorrect or basic auth was not enabled. - Add RetryInterval (5s) sleep on run() failure in Start() loop, matching the pattern used by oVirt, OpenStack, and OVA collectors. - Introduce ErrUnauthorized sentinel error in the driver package, wrapping WinRM HTTP 401/403 responses so callers use errors.Is() instead of fragile string matching. - Update Test() to execute a real WinRM command (IsAlive) and return http.StatusUnauthorized on auth failure, triggering ConnectionAuthFailed in the provider controller. - Propagate auth errors from SMB prefix discovery in Connect() Ref: https://redhat.atlassian.net/browse/MTV-5829 Resolves: MTV-5829 Signed-off-by: Elad Hazan <ehazan@redhat.com> Co-authored-by: Elad Hazan <ehazan@redhat.com>
diff --git a/pkg/controller/provider/container/hyperv/client.go b/pkg/controller/provider/container/hyperv/client.go
@@ -3,6 +3,7 @@ package hyperv
 import (
 	"bytes"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net"
 	"net/http"
@@ -80,7 +81,10 @@ func (r *Client) Connect(provider *api.Provider) (err error) {
 
 	if r.smbUrl != "" {
 		if pErr := r.discoverSMBWindowsPrefix(); pErr != nil {
-			r.Log.Error(pErr, "Failed to discover SMB Windows prefix, will retry")
+			if errors.Is(pErr, driver.ErrUnauthorized) {
+				return fmt.Errorf("SMB discovery auth failed: %w", pErr)
+			}
+			r.Log.Info("SMB Windows prefix not yet discovered, will attempt on next reconnect")
 		}
 	}
 
diff --git a/pkg/controller/provider/container/hyperv/collector.go b/pkg/controller/provider/container/hyperv/collector.go
@@ -2,7 +2,9 @@ package hyperv
 
 import (
 	"context"
+	"errors"
 	"fmt"
+	"net/http"
 	liburl "net/url"
 	"os"
 	libpath "path"
@@ -12,6 +14,7 @@ import (
 	api "github.com/kubev2v/forklift/pkg/apis/forklift/v1beta1"
 	hvutil "github.com/kubev2v/forklift/pkg/controller/hyperv"
 	model "github.com/kubev2v/forklift/pkg/controller/provider/model/hyperv"
+	"github.com/kubev2v/forklift/pkg/lib/hyperv/driver"
 	libmodel "github.com/kubev2v/forklift/pkg/lib/inventory/model"
 	"github.com/kubev2v/forklift/pkg/lib/logging"
 	core "k8s.io/api/core/v1"
@@ -161,9 +164,21 @@ func (r *Collector) HasParity() bool {
 	return r.parity
 }
 
-// Test connect/logout.
-func (r *Collector) Test() (_ int, err error) {
-	err = r.client.Connect(r.provider)
+// Test validates connectivity and credentials against the Hyper-V host.
+func (r *Collector) Test() (status int, err error) {
+	if err = r.client.Connect(r.provider); err != nil {
+		if errors.Is(err, driver.ErrUnauthorized) {
+			status = http.StatusUnauthorized
+		}
+		return
+	}
+	if _, err = r.client.driver.IsAlive(); err != nil {
+		if errors.Is(err, driver.ErrUnauthorized) {
+			status = http.StatusUnauthorized
+		}
+		return
+	}
+	r.log.Info("Connected to Hyper-V host via WinRM/HTTPS.")
 	return
 }
 
@@ -191,11 +206,17 @@ func (r *Collector) Start() error {
 			r.log.Info("Stopped.")
 		}()
 		for {
-			if !ctx.canceled() {
-				_ = r.run(&ctx)
-			} else {
+			if ctx.canceled() {
 				return
 			}
+			if err := r.run(&ctx); err != nil {
+				r.log.Error(err, "Run failed.", "retry", RetryInterval)
+				select {
+				case <-ctx.ctx.Done():
+					return
+				case <-time.After(RetryInterval):
+				}
+			}
 		}
 	}
 
@@ -218,12 +239,6 @@ func (r *Collector) run(ctx *Context) (err error) {
 	r.startTime = time.Now()
 	r.phase = Started
 
-	defer func() {
-		if err != nil {
-			r.log.Error(err, "Run failed.")
-		}
-	}()
-
 	// Connect directly to HyperV host via WinRM using Secret credentials
 	err = r.client.Connect(r.provider)
 	if err != nil {
diff --git a/pkg/lib/hyperv/driver/errors.go b/pkg/lib/hyperv/driver/errors.go
@@ -0,0 +1,42 @@
+package driver
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"regexp"
+	"strconv"
+)
+
+// ErrUnauthorized indicates a WinRM authentication / authorization failure.
+var ErrUnauthorized = errors.New("hyperv: unauthorized")
+
+// winrmHTTPStatus extracts the numeric HTTP status from the winrm library's
+// known error formats: "http response error: <code> - ..." and "http error <code>: ...".
+var winrmHTTPStatus = regexp.MustCompile(`http (?:response )?error[:\s]+(\d{3})`)
+
+// httpStatus extracts the HTTP status code from a WinRM error message.
+func httpStatus(err error) (int, bool) {
+	if err == nil {
+		return 0, false
+	}
+	if m := winrmHTTPStatus.FindStringSubmatch(err.Error()); len(m) == 2 {
+		if code, convErr := strconv.Atoi(m[1]); convErr == nil {
+			return code, true
+		}
+	}
+	return 0, false
+}
+
+// WrapCommandError inspects a WinRM command error.
+func WrapCommandError(err error) error {
+	if err == nil {
+		return nil
+	}
+	if code, ok := httpStatus(err); ok {
+		if code == http.StatusUnauthorized || code == http.StatusForbidden {
+			return fmt.Errorf("%w: %w", ErrUnauthorized, err)
+		}
+	}
+	return err
+}
diff --git a/pkg/lib/hyperv/driver/errors_test.go b/pkg/lib/hyperv/driver/errors_test.go
@@ -0,0 +1,135 @@
+package driver
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"testing"
+)
+
+func Test_httpStatus(t *testing.T) {
+	tests := []struct {
+		name     string
+		err      error
+		wantCode int
+		wantOK   bool
+	}{
+		{
+			name:   "nil error",
+			err:    nil,
+			wantOK: false,
+		},
+		{
+			name:   "unrelated error",
+			err:    errors.New("connection refused"),
+			wantOK: false,
+		},
+		{
+			name:     "winrm basic auth 401",
+			err:      fmt.Errorf("http response error: 401 - invalid content type"),
+			wantCode: http.StatusUnauthorized,
+			wantOK:   true,
+		},
+		{
+			name:     "winrm cert auth 401",
+			err:      fmt.Errorf("http error 401: Unauthorized"),
+			wantCode: http.StatusUnauthorized,
+			wantOK:   true,
+		},
+		{
+			name:     "403 forbidden",
+			err:      fmt.Errorf("http response error: 403 - access denied"),
+			wantCode: http.StatusForbidden,
+			wantOK:   true,
+		},
+		{
+			name:     "500 server error",
+			err:      fmt.Errorf("http response error: 500 - internal"),
+			wantCode: http.StatusInternalServerError,
+			wantOK:   true,
+		},
+		{
+			name:     "wrapped winrm error",
+			err:      fmt.Errorf("WinRM command failed: %w", fmt.Errorf("http response error: 401 - invalid content type")),
+			wantCode: http.StatusUnauthorized,
+			wantOK:   true,
+		},
+		{
+			name:     "double-wrapped",
+			err:      fmt.Errorf("outer: %w", fmt.Errorf("WinRM command failed: %w", fmt.Errorf("http response error: 401 - x"))),
+			wantCode: http.StatusUnauthorized,
+			wantOK:   true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			code, ok := httpStatus(tc.err)
+			if ok != tc.wantOK {
+				t.Errorf("httpStatus() ok = %v, want %v", ok, tc.wantOK)
+			}
+			if code != tc.wantCode {
+				t.Errorf("httpStatus() code = %d, want %d", code, tc.wantCode)
+			}
+		})
+	}
+}
+
+func TestWrapCommandError(t *testing.T) {
+	tests := []struct {
+		name       string
+		err        error
+		wantNil    bool
+		wantIsAuth bool
+	}{
+		{
+			name:    "nil",
+			err:     nil,
+			wantNil: true,
+		},
+		{
+			name:       "unrelated error passes through",
+			err:        errors.New("timeout"),
+			wantIsAuth: false,
+		},
+		{
+			name:       "401 becomes ErrUnauthorized",
+			err:        fmt.Errorf("http response error: 401 - invalid content type"),
+			wantIsAuth: true,
+		},
+		{
+			name:       "403 becomes ErrUnauthorized",
+			err:        fmt.Errorf("http response error: 403 - forbidden"),
+			wantIsAuth: true,
+		},
+		{
+			name:       "500 does not become ErrUnauthorized",
+			err:        fmt.Errorf("http response error: 500 - internal"),
+			wantIsAuth: false,
+		},
+		{
+			name:       "wrapped 401",
+			err:        fmt.Errorf("WinRM command failed: %w", fmt.Errorf("http response error: 401 - x")),
+			wantIsAuth: true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			result := WrapCommandError(tc.err)
+			if tc.wantNil {
+				if result != nil {
+					t.Fatalf("WrapCommandError(nil) = %v, want nil", result)
+				}
+				return
+			}
+			if result == nil {
+				t.Fatal("WrapCommandError() returned nil for non-nil input")
+			}
+			isAuth := errors.Is(result, ErrUnauthorized)
+			if isAuth != tc.wantIsAuth {
+				t.Errorf("errors.Is(result, ErrUnauthorized) = %v, want %v (err=%v)", isAuth, tc.wantIsAuth, result)
+			}
+		})
+	}
+}
diff --git a/pkg/lib/hyperv/driver/winrm.go b/pkg/lib/hyperv/driver/winrm.go
@@ -78,7 +78,7 @@ func (d *WinRMDriver) Connect() error {
 		return fmt.Errorf("failed to create WinRM client: %w", err)
 	}
 	d.client = client
-	log.Info("Connected to Hyper-V host via WinRM/HTTPS", "host", d.host, "port", d.port, "insecureSkipVerify", d.insecureSkipVerify)
+	log.Info("WinRM client initialized.", "host", d.host, "port", d.port, "insecureSkipVerify", d.insecureSkipVerify)
 	return nil
 }
 
@@ -123,7 +123,7 @@ func (d *WinRMDriver) ExecuteCommandWithTimeout(command string, timeout time.Dur
 
 	stdout, stderr, exitCode, err := d.client.RunWithContextWithString(ctx, command, "")
 	if err != nil {
-		return "", fmt.Errorf("WinRM command failed: %w", err)
+		return "", WrapCommandError(fmt.Errorf("WinRM command failed: %w", err))
 	}
 
 	if exitCode != 0 {

Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@ func (d *WinRMDriver) Connect() error {`
`78`	`78`	`return fmt.Errorf("failed to create WinRM client: %w", err)`
`79`	`79`	`}`
`80`	`80`	`d.client = client`
`81`		`- log.Info("Connected to Hyper-V host via WinRM/HTTPS", "host", d.host, "port", d.port, "insecureSkipVerify", d.insecureSkipVerify)`
	`81`	`+ log.Info("WinRM client initialized.", "host", d.host, "port", d.port, "insecureSkipVerify", d.insecureSkipVerify)`
`82`	`82`	`return nil`
`83`	`83`	`}`
`84`	`84`
`@@ -123,7 +123,7 @@ func (d *WinRMDriver) ExecuteCommandWithTimeout(command string, timeout time.Dur`
`123`	`123`
`124`	`124`	`stdout, stderr, exitCode, err := d.client.RunWithContextWithString(ctx, command, "")`
`125`	`125`	`if err != nil {`
`126`		`- return "", fmt.Errorf("WinRM command failed: %w", err)`
	`126`	`+ return "", WrapCommandError(fmt.Errorf("WinRM command failed: %w", err))`
`127`	`127`	`}`
`128`	`128`
`129`	`129`	`if exitCode != 0 {`