diff --git a/go.mod b/go.mod index 44bc26747..6e4994e7b 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,12 @@ module github.com/longhorn/longhorn-instance-manager -go 1.25.0 +go 1.25.3 require ( github.com/google/uuid v1.6.0 github.com/longhorn/backupstore v0.0.0-20260414054550-8570535ce7ad - github.com/longhorn/go-common-libs v0.0.0-20260328134226-cafa38fc4ce8 - github.com/longhorn/go-spdk-helper v0.0.3-0.20250712161648-42d38592f838 + github.com/longhorn/go-common-libs v0.0.0-20260502161928-1e84fa75a8f1 + github.com/longhorn/go-spdk-helper v0.6.1 github.com/longhorn/longhorn-engine v1.9.2 github.com/longhorn/longhorn-spdk-engine v0.0.0-20250805013325-da3d062c9555 github.com/longhorn/types v0.0.0-20260327130848-66f6de8a2fb3 @@ -26,6 +26,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.5.0 // indirect github.com/RoaringBitmap/roaring v1.9.4 // indirect + github.com/avast/retry-go/v4 v4.7.0 // indirect github.com/aws/aws-sdk-go-v2 v1.41.5 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 // indirect github.com/aws/aws-sdk-go-v2/config v1.32.13 // indirect @@ -102,7 +103,7 @@ require ( github.com/x448/float16 v0.8.4 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect + golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f // indirect golang.org/x/net v0.49.0 // indirect golang.org/x/oauth2 v0.34.0 // indirect golang.org/x/sys v0.40.0 // indirect diff --git a/go.sum b/go.sum index 267c66235..d8960f5ef 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83 github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ= github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90= +github.com/avast/retry-go/v4 v4.7.0 h1:yjDs35SlGvKwRNSykujfjdMxMhMQQM0TnIjJaHB+Zio= +github.com/avast/retry-go/v4 v4.7.0/go.mod h1:ZMPDa3sY2bKgpLtap9JRUgk2yTAba7cgiFhqxY2Sg6Q= github.com/aws/aws-sdk-go-v2 v1.41.5 h1:dj5kopbwUsVUVFgO4Fi5BIT3t4WyqIDjGKCangnV/yY= github.com/aws/aws-sdk-go-v2 v1.41.5/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 h1:eBMB84YGghSocM7PsjmmPffTa+1FBUeNvGvFou6V/4o= @@ -156,12 +158,12 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/longhorn/backupstore v0.0.0-20260414054550-8570535ce7ad h1:B1nZo9xIBUueGBHlpcHKb+c08473MSj0EkXBuN3pzBQ= github.com/longhorn/backupstore v0.0.0-20260414054550-8570535ce7ad/go.mod h1:20Yvj7Gz9wkp1iHD1u77+CWG281L6SVe8/SZDnwQMAI= -github.com/longhorn/go-common-libs v0.0.0-20260328134226-cafa38fc4ce8 h1:HxGyRXTDRYF7vwtaT2ESlV3ognMHolL4lBCStHXvf7o= -github.com/longhorn/go-common-libs v0.0.0-20260328134226-cafa38fc4ce8/go.mod h1:Vw9cRchaffWmZMjHOjqhSG37tFN9DdQ/isKs4ViPXQE= +github.com/longhorn/go-common-libs v0.0.0-20260502161928-1e84fa75a8f1 h1:sjDswhcTrEqeXSjcHjaeccYT1v1WW9wNHB3fWFArM38= +github.com/longhorn/go-common-libs v0.0.0-20260502161928-1e84fa75a8f1/go.mod h1:+aXFI8DmHTX4Az9pZF16/XKfrVKWIESJjw+hcxdMslw= github.com/longhorn/go-iscsi-helper v0.0.0-20250810143507-5c5f9a0060b4 h1:i6Wac1SO2YwXkqZetnc1KZc+2PnFZ9xSUs99nFAhHiU= github.com/longhorn/go-iscsi-helper v0.0.0-20250810143507-5c5f9a0060b4/go.mod h1:EIkghFAyqv+8ktznS4a+leJ6KKQLPt9zrcdS2Zsfk+M= -github.com/longhorn/go-spdk-helper v0.0.3-0.20250712161648-42d38592f838 h1:j3PgIRQihtn7R94mnVyTvc8YStYKH7JumiEI2yP4YQI= -github.com/longhorn/go-spdk-helper v0.0.3-0.20250712161648-42d38592f838/go.mod h1:QOzb0AX/CUcVtf5h6DMyr0W/7EkQrjKn2N593t35UWs= +github.com/longhorn/go-spdk-helper v0.6.1 h1:eFfQzH+E7jP08K1YjySnfoNLZTED6asuXnkx6MuA8w0= +github.com/longhorn/go-spdk-helper v0.6.1/go.mod h1:ZxbMkGsbC2TpzuGLjn9yXXExolt8/F3dl/C8O9/Qa7s= github.com/longhorn/longhorn-engine v1.9.2 h1:2WB+5QENqO2xtVOJfxa0nX1D40/Qpxu8YLzwaxgiOBg= github.com/longhorn/longhorn-engine v1.9.2/go.mod h1:JwIMErlXODX8Ee4P4CrShkX0Q82+0gUIA5bzgC10s0c= github.com/longhorn/longhorn-spdk-engine v0.0.0-20250805013325-da3d062c9555 h1:KGL+tVjp1lDZkIPrEAuJZmL8Px2aSP0T/HyK/6zApnc= @@ -279,8 +281,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= -golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 h1:jiDhWWeC7jfWqR9c/uplMOqJ0sbNlNWv0UkzE0vX1MA= -golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90/go.mod h1:xE1HEv6b+1SCZ5/uscMRjUBKtIxworgEcEi+/n9NQDQ= +golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f h1:W3F4c+6OLc6H2lb//N1q4WpJkhzJCK5J6kUi1NTVXfM= +golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f/go.mod h1:J1xhfL/vlindoeF/aINzNzt2Bket5bjo9sdOYzOsU80= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -318,8 +320,8 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= -golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= +golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c= +golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/vendor/github.com/avast/retry-go/v4/.gitignore b/vendor/github.com/avast/retry-go/v4/.gitignore new file mode 100644 index 000000000..c40eb23f9 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/.gitignore @@ -0,0 +1,21 @@ +# Binaries for programs and plugins +*.exe +*.dll +*.so +*.dylib + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 +.glide/ + +# dep +vendor/ +Gopkg.lock + +# cover +coverage.txt diff --git a/vendor/github.com/avast/retry-go/v4/.godocdown.tmpl b/vendor/github.com/avast/retry-go/v4/.godocdown.tmpl new file mode 100644 index 000000000..32b80df81 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/.godocdown.tmpl @@ -0,0 +1,38 @@ +# {{ .Name }} + +[![Release](https://img.shields.io/github/release/avast/retry-go.svg?style=flat-square)](https://github.com/avast/retry-go/releases/latest) +[![Software License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square)](LICENSE.md) +![GitHub Actions](https://github.com/avast/retry-go/actions/workflows/workflow.yaml/badge.svg) +[![Go Report Card](https://goreportcard.com/badge/github.com/avast/retry-go?style=flat-square)](https://goreportcard.com/report/github.com/avast/retry-go) +[![Go Reference](https://pkg.go.dev/badge/github.com/avast/retry-go/v4.svg)](https://pkg.go.dev/github.com/avast/retry-go/v4) +[![codecov.io](https://codecov.io/github/avast/retry-go/coverage.svg?branch=master)](https://codecov.io/github/avast/retry-go?branch=master) +[![Sourcegraph](https://sourcegraph.com/github.com/avast/retry-go/-/badge.svg)](https://sourcegraph.com/github.com/avast/retry-go?badge) + +{{ .EmitSynopsis }} + +{{ .EmitUsage }} + +## Contributing + +Contributions are very much welcome. + +### Makefile + +Makefile provides several handy rules, like README.md `generator` , `setup` for prepare build/dev environment, `test`, `cover`, etc... + +Try `make help` for more information. + +### Before pull request + +> maybe you need `make setup` in order to setup environment + +please try: +* run tests (`make test`) +* run linter (`make lint`) +* if your IDE don't automaticaly do `go fmt`, run `go fmt` (`make fmt`) + +### README + +README.md are generate from template [.godocdown.tmpl](.godocdown.tmpl) and code documentation via [godocdown](https://github.com/robertkrimen/godocdown). + +Never edit README.md direct, because your change will be lost. diff --git a/vendor/github.com/avast/retry-go/v4/LICENSE b/vendor/github.com/avast/retry-go/v4/LICENSE new file mode 100644 index 000000000..f63fca814 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Avast + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/avast/retry-go/v4/Makefile b/vendor/github.com/avast/retry-go/v4/Makefile new file mode 100644 index 000000000..86544d239 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/Makefile @@ -0,0 +1,59 @@ +SOURCE_FILES?=$$(go list ./... | grep -v /vendor/) +TEST_PATTERN?=. +TEST_OPTIONS?= +VERSION?=$$(cat VERSION) +LINTER?=$$(which golangci-lint) +LINTER_VERSION=1.50.0 + +ifeq ($(OS),Windows_NT) + LINTER_FILE=golangci-lint-$(LINTER_VERSION)-windows-amd64.zip + LINTER_UNPACK= >| app.zip; unzip -j app.zip -d $$GOPATH/bin; rm app.zip +else ifeq ($(OS), Darwin) + LINTER_FILE=golangci-lint-$(LINTER_VERSION)-darwin-amd64.tar.gz + LINTER_UNPACK= | tar xzf - -C $$GOPATH/bin --wildcards --strip 1 "**/golangci-lint" +else + LINTER_FILE=golangci-lint-$(LINTER_VERSION)-linux-amd64.tar.gz + LINTER_UNPACK= | tar xzf - -C $$GOPATH/bin --wildcards --strip 1 "**/golangci-lint" +endif + +setup: + go install github.com/pierrre/gotestcover@latest + go install golang.org/x/tools/cmd/cover@latest + go install github.com/robertkrimen/godocdown/godocdown@latest + go mod download + +generate: ## Generate README.md + godocdown >| README.md + +test: generate test_and_cover_report lint + +test_and_cover_report: + gotestcover $(TEST_OPTIONS) -covermode=atomic -coverprofile=coverage.txt $(SOURCE_FILES) -run $(TEST_PATTERN) -timeout=2m + +cover: test ## Run all the tests and opens the coverage report + go tool cover -html=coverage.txt + +fmt: ## gofmt and goimports all go files + find . -name '*.go' -not -wholename './vendor/*' | while read -r file; do gofmt -w -s "$$file"; goimports -w "$$file"; done + +lint: ## Run all the linters + @if [ "$(LINTER)" = "" ]; then\ + curl -L https://github.com/golangci/golangci-lint/releases/download/v$(LINTER_VERSION)/$(LINTER_FILE) $(LINTER_UNPACK) ;\ + chmod +x $$GOPATH/bin/golangci-lint;\ + fi + + golangci-lint run + +ci: test_and_cover_report ## Run all the tests but no linters - use https://golangci.com integration instead + +build: + go build + +release: ## Release new version + git tag | grep -q $(VERSION) && echo This version was released! Increase VERSION! || git tag $(VERSION) && git push origin $(VERSION) && git tag v$(VERSION) && git push origin v$(VERSION) + +# Absolutely awesome: http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html +help: + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +.DEFAULT_GOAL := build diff --git a/vendor/github.com/avast/retry-go/v4/README.md b/vendor/github.com/avast/retry-go/v4/README.md new file mode 100644 index 000000000..b73034339 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/README.md @@ -0,0 +1,494 @@ +# retry + +[![Release](https://img.shields.io/github/release/avast/retry-go.svg?style=flat-square)](https://github.com/avast/retry-go/releases/latest) +[![Software License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square)](LICENSE.md) +![GitHub Actions](https://github.com/avast/retry-go/actions/workflows/workflow.yaml/badge.svg) +[![Go Report Card](https://goreportcard.com/badge/github.com/avast/retry-go?style=flat-square)](https://goreportcard.com/report/github.com/avast/retry-go) +[![Go Reference](https://pkg.go.dev/badge/github.com/avast/retry-go/v4.svg)](https://pkg.go.dev/github.com/avast/retry-go/v4) +[![codecov.io](https://codecov.io/github/avast/retry-go/coverage.svg?branch=master)](https://codecov.io/github/avast/retry-go?branch=master) +[![Sourcegraph](https://sourcegraph.com/github.com/avast/retry-go/-/badge.svg)](https://sourcegraph.com/github.com/avast/retry-go?badge) + +Simple library for retry mechanism + +Slightly inspired by +[Try::Tiny::Retry](https://metacpan.org/pod/Try::Tiny::Retry) + +# SYNOPSIS + +HTTP GET with retry: + + url := "http://example.com" + var body []byte + + err := retry.Do( + func() error { + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + return nil + }, + ) + + if err != nil { + // handle error + } + + fmt.Println(string(body)) + +HTTP GET with retry with data: + + url := "http://example.com" + + body, err := retry.DoWithData( + func() ([]byte, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return body, nil + }, + ) + + if err != nil { + // handle error + } + + fmt.Println(string(body)) + +[More examples](https://github.com/avast/retry-go/tree/master/examples) + +# SEE ALSO + +* [giantswarm/retry-go](https://github.com/giantswarm/retry-go) - slightly +complicated interface. + +* [sethgrid/pester](https://github.com/sethgrid/pester) - only http retry for +http calls with retries and backoff + +* [cenkalti/backoff](https://github.com/cenkalti/backoff) - Go port of the +exponential backoff algorithm from Google's HTTP Client Library for Java. Really +complicated interface. + +* [rafaeljesus/retry-go](https://github.com/rafaeljesus/retry-go) - looks good, +slightly similar as this package, don't have 'simple' `Retry` method + +* [matryer/try](https://github.com/matryer/try) - very popular package, +nonintuitive interface (for me) + +# BREAKING CHANGES + +* 4.0.0 + + - infinity retry is possible by set `Attempts(0)` by PR [#49](https://github.com/avast/retry-go/pull/49) + +* 3.0.0 + + - `DelayTypeFunc` accepts a new parameter `err` - this breaking change affects only your custom Delay Functions. This change allow [make delay functions based on error](examples/delay_based_on_error_test.go). + +* 1.0.2 -> 2.0.0 + + - argument of `retry.Delay` is final delay (no multiplication by `retry.Units` anymore) + - function `retry.Units` are removed + - [more about this breaking change](https://github.com/avast/retry-go/issues/7) + +* 0.3.0 -> 1.0.0 + + - `retry.Retry` function are changed to `retry.Do` function + - `retry.RetryCustom` (OnRetry) and `retry.RetryCustomWithOpts` functions are now implement via functions produces Options (aka `retry.OnRetry`) + +## Usage + +#### func BackOffDelay + +```go +func BackOffDelay(n uint, _ error, config *Config) time.Duration +``` +BackOffDelay is a DelayType which increases delay between consecutive retries + +#### func Do + +```go +func Do(retryableFunc RetryableFunc, opts ...Option) error +``` + +#### func DoWithData + +```go +func DoWithData[T any](retryableFunc RetryableFuncWithData[T], opts ...Option) (T, error) +``` + +#### func FixedDelay + +```go +func FixedDelay(_ uint, _ error, config *Config) time.Duration +``` +FixedDelay is a DelayType which keeps delay the same through all iterations + +#### func IsRecoverable + +```go +func IsRecoverable(err error) bool +``` +IsRecoverable checks if error is an instance of `unrecoverableError` + +#### func RandomDelay + +```go +func RandomDelay(_ uint, _ error, config *Config) time.Duration +``` +RandomDelay is a DelayType which picks a random delay up to config.maxJitter + +#### func Unrecoverable + +```go +func Unrecoverable(err error) error +``` +Unrecoverable wraps an error in `unrecoverableError` struct + +#### type Config + +```go +type Config struct { +} +``` + + +#### type DelayTypeFunc + +```go +type DelayTypeFunc func(n uint, err error, config *Config) time.Duration +``` + +DelayTypeFunc is called to return the next delay to wait after the retriable +function fails on `err` after `n` attempts. + +#### func CombineDelay + +```go +func CombineDelay(delays ...DelayTypeFunc) DelayTypeFunc +``` +CombineDelay is a DelayType the combines all of the specified delays into a new +DelayTypeFunc + +#### type Error + +```go +type Error []error +``` + +Error type represents list of errors in retry + +#### func (Error) As + +```go +func (e Error) As(target interface{}) bool +``` + +#### func (Error) Error + +```go +func (e Error) Error() string +``` +Error method return string representation of Error It is an implementation of +error interface + +#### func (Error) Is + +```go +func (e Error) Is(target error) bool +``` + +#### func (Error) Unwrap + +```go +func (e Error) Unwrap() error +``` +Unwrap the last error for compatibility with `errors.Unwrap()`. When you need to +unwrap all errors, you should use `WrappedErrors()` instead. + + err := Do( + func() error { + return errors.New("original error") + }, + Attempts(1), + ) + + fmt.Println(errors.Unwrap(err)) # "original error" is printed + +Added in version 4.2.0. + +#### func (Error) WrappedErrors + +```go +func (e Error) WrappedErrors() []error +``` +WrappedErrors returns the list of errors that this Error is wrapping. It is an +implementation of the `errwrap.Wrapper` interface in package +[errwrap](https://github.com/hashicorp/errwrap) so that `retry.Error` can be +used with that library. + +#### type OnRetryFunc + +```go +type OnRetryFunc func(attempt uint, err error) +``` + +Function signature of OnRetry function + +#### type Option + +```go +type Option func(*Config) +``` + +Option represents an option for retry. + +#### func Attempts + +```go +func Attempts(attempts uint) Option +``` +Attempts set count of retry. Setting to 0 will retry until the retried function +succeeds. default is 10 + +#### func AttemptsForError + +```go +func AttemptsForError(attempts uint, err error) Option +``` +AttemptsForError sets count of retry in case execution results in given `err` +Retries for the given `err` are also counted against total retries. The retry +will stop if any of given retries is exhausted. + +added in 4.3.0 + +#### func Context + +```go +func Context(ctx context.Context) Option +``` +Context allow to set context of retry default are Background context + +example of immediately cancellation (maybe it isn't the best example, but it +describes behavior enough; I hope) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + retry.Do( + func() error { + ... + }, + retry.Context(ctx), + ) + +#### func Delay + +```go +func Delay(delay time.Duration) Option +``` +Delay set delay between retry default is 100ms + +#### func DelayType + +```go +func DelayType(delayType DelayTypeFunc) Option +``` +DelayType set type of the delay between retries default is BackOff + +#### func LastErrorOnly + +```go +func LastErrorOnly(lastErrorOnly bool) Option +``` +return the direct last error that came from the retried function default is +false (return wrapped errors with everything) + +#### func MaxDelay + +```go +func MaxDelay(maxDelay time.Duration) Option +``` +MaxDelay set maximum delay between retry does not apply by default + +#### func MaxJitter + +```go +func MaxJitter(maxJitter time.Duration) Option +``` +MaxJitter sets the maximum random Jitter between retries for RandomDelay + +#### func OnRetry + +```go +func OnRetry(onRetry OnRetryFunc) Option +``` +OnRetry function callback are called each retry + +log each retry example: + + retry.Do( + func() error { + return errors.New("some error") + }, + retry.OnRetry(func(n uint, err error) { + log.Printf("#%d: %s\n", n, err) + }), + ) + +#### func RetryIf + +```go +func RetryIf(retryIf RetryIfFunc) Option +``` +RetryIf controls whether a retry should be attempted after an error (assuming +there are any retry attempts remaining) + +skip retry if special error example: + + retry.Do( + func() error { + return errors.New("special error") + }, + retry.RetryIf(func(err error) bool { + if err.Error() == "special error" { + return false + } + return true + }) + ) + +By default RetryIf stops execution if the error is wrapped using +`retry.Unrecoverable`, so above example may also be shortened to: + + retry.Do( + func() error { + return retry.Unrecoverable(errors.New("special error")) + } + ) + +#### func UntilSucceeded + +```go +func UntilSucceeded() Option +``` +UntilSucceeded will retry until the retried function succeeds. Equivalent to +setting Attempts(0). + +#### func WithTimer + +```go +func WithTimer(t Timer) Option +``` +WithTimer provides a way to swap out timer module implementations. This +primarily is useful for mocking/testing, where you may not want to explicitly +wait for a set duration for retries. + +example of augmenting time.After with a print statement + + type struct MyTimer {} + + func (t *MyTimer) After(d time.Duration) <- chan time.Time { + fmt.Print("Timer called!") + return time.After(d) + } + + retry.Do( + func() error { ... }, + retry.WithTimer(&MyTimer{}) + ) + +#### func WrapContextErrorWithLastError + +```go +func WrapContextErrorWithLastError(wrapContextErrorWithLastError bool) Option +``` +WrapContextErrorWithLastError allows the context error to be returned wrapped +with the last error that the retried function returned. This is only applicable +when Attempts is set to 0 to retry indefinitly and when using a context to +cancel / timeout + +default is false + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + retry.Do( + func() error { + ... + }, + retry.Context(ctx), + retry.Attempts(0), + retry.WrapContextErrorWithLastError(true), + ) + +#### type RetryIfFunc + +```go +type RetryIfFunc func(error) bool +``` + +Function signature of retry if function + +#### type RetryableFunc + +```go +type RetryableFunc func() error +``` + +Function signature of retryable function + +#### type RetryableFuncWithData + +```go +type RetryableFuncWithData[T any] func() (T, error) +``` + +Function signature of retryable function with data + +#### type Timer + +```go +type Timer interface { + After(time.Duration) <-chan time.Time +} +``` + +Timer represents the timer used to track time for a retry. + +## Contributing + +Contributions are very much welcome. + +### Makefile + +Makefile provides several handy rules, like README.md `generator` , `setup` for prepare build/dev environment, `test`, `cover`, etc... + +Try `make help` for more information. + +### Before pull request + +> maybe you need `make setup` in order to setup environment + +please try: +* run tests (`make test`) +* run linter (`make lint`) +* if your IDE don't automaticaly do `go fmt`, run `go fmt` (`make fmt`) + +### README + +README.md are generate from template [.godocdown.tmpl](.godocdown.tmpl) and code documentation via [godocdown](https://github.com/robertkrimen/godocdown). + +Never edit README.md direct, because your change will be lost. diff --git a/vendor/github.com/avast/retry-go/v4/VERSION b/vendor/github.com/avast/retry-go/v4/VERSION new file mode 100644 index 000000000..f6cdf4098 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/VERSION @@ -0,0 +1 @@ +4.7.0 diff --git a/vendor/github.com/avast/retry-go/v4/current.txt b/vendor/github.com/avast/retry-go/v4/current.txt new file mode 100644 index 000000000..406b14fe8 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/current.txt @@ -0,0 +1,26 @@ +goos: darwin +goarch: amd64 +pkg: github.com/avast/retry-go/v4 +cpu: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz +BenchmarkDo-16 3 474128987 ns/op 2730 B/op 48 allocs/op +BenchmarkDo-16 3 441499631 ns/op 2725 B/op 47 allocs/op +BenchmarkDo-16 3 449390845 ns/op 2693 B/op 47 allocs/op +BenchmarkDo-16 3 488695333 ns/op 2725 B/op 47 allocs/op +BenchmarkDo-16 2 601685067 ns/op 2704 B/op 48 allocs/op +BenchmarkDo-16 3 336872997 ns/op 2693 B/op 47 allocs/op +BenchmarkDo-16 3 384347911 ns/op 2725 B/op 47 allocs/op +BenchmarkDo-16 3 480906307 ns/op 2693 B/op 47 allocs/op +BenchmarkDo-16 3 455362447 ns/op 2693 B/op 47 allocs/op +BenchmarkDo-16 3 443170384 ns/op 2693 B/op 47 allocs/op +BenchmarkDoNoErrors-16 6872852 159.4 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7650360 161.3 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7235683 159.3 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7465636 160.2 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7549692 160.7 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7510610 159.8 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7438124 160.3 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7416504 160.2 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7356183 160.4 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7393480 160.1 ns/op 208 B/op 4 allocs/op +PASS +ok github.com/avast/retry-go/v4 35.971s diff --git a/vendor/github.com/avast/retry-go/v4/generic.txt b/vendor/github.com/avast/retry-go/v4/generic.txt new file mode 100644 index 000000000..116a09645 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/generic.txt @@ -0,0 +1,46 @@ +goos: darwin +goarch: amd64 +pkg: github.com/avast/retry-go/v4 +cpu: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz +BenchmarkDo-16 3 406306609 ns/op 2701 B/op 48 allocs/op +BenchmarkDo-16 3 419470846 ns/op 2693 B/op 47 allocs/op +BenchmarkDo-16 2 567716303 ns/op 2696 B/op 47 allocs/op +BenchmarkDo-16 2 562713288 ns/op 2696 B/op 47 allocs/op +BenchmarkDo-16 3 418301987 ns/op 2693 B/op 47 allocs/op +BenchmarkDo-16 2 541207332 ns/op 2696 B/op 47 allocs/op +BenchmarkDo-16 2 526211617 ns/op 2696 B/op 47 allocs/op +BenchmarkDo-16 2 517419526 ns/op 2696 B/op 47 allocs/op +BenchmarkDo-16 3 478391497 ns/op 2693 B/op 47 allocs/op +BenchmarkDo-16 3 452548175 ns/op 2725 B/op 47 allocs/op +BenchmarkDoWithData-16 3 463040866 ns/op 2693 B/op 47 allocs/op +BenchmarkDoWithData-16 3 496158943 ns/op 2693 B/op 47 allocs/op +BenchmarkDoWithData-16 3 488367012 ns/op 2725 B/op 47 allocs/op +BenchmarkDoWithData-16 3 454618897 ns/op 2693 B/op 47 allocs/op +BenchmarkDoWithData-16 3 435430056 ns/op 2693 B/op 47 allocs/op +BenchmarkDoWithData-16 2 552289967 ns/op 2744 B/op 48 allocs/op +BenchmarkDoWithData-16 3 569748815 ns/op 2693 B/op 47 allocs/op +BenchmarkDoWithData-16 3 416597207 ns/op 2725 B/op 47 allocs/op +BenchmarkDoWithData-16 3 358455415 ns/op 2725 B/op 47 allocs/op +BenchmarkDoWithData-16 3 455297803 ns/op 2725 B/op 47 allocs/op +BenchmarkDoNoErrors-16 7035135 161.9 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7389806 161.3 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7394016 161.5 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7380039 162.2 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7424865 162.2 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7111860 160.5 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7285305 162.6 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7410627 160.7 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7340961 161.6 ns/op 208 B/op 4 allocs/op +BenchmarkDoNoErrors-16 7295727 164.1 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 7357304 159.9 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 6649852 166.9 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 6938404 176.3 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 7181965 160.4 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 7311484 166.2 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 6939157 169.7 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 6648344 179.0 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 6794847 177.0 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 6782588 171.4 ns/op 208 B/op 4 allocs/op +BenchmarkDoWithDataNoErrors-16 7279119 166.9 ns/op 208 B/op 4 allocs/op +PASS +ok github.com/avast/retry-go/v4 73.128s diff --git a/vendor/github.com/avast/retry-go/v4/options.go b/vendor/github.com/avast/retry-go/v4/options.go new file mode 100644 index 000000000..3ae28bb15 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/options.go @@ -0,0 +1,311 @@ +package retry + +import ( + "context" + "math" + "math/rand" + "time" +) + +// Function signature of retry if function +type RetryIfFunc func(error) bool + +// Function signature of OnRetry function +type OnRetryFunc func(attempt uint, err error) + +// DelayTypeFunc is called to return the next delay to wait after the retriable function fails on `err` after `n` attempts. +type DelayTypeFunc func(n uint, err error, config *Config) time.Duration + +// Timer represents the timer used to track time for a retry. +type Timer interface { + After(time.Duration) <-chan time.Time +} + +type Config struct { + attempts uint + attemptsForError map[error]uint + delay time.Duration + maxDelay time.Duration + maxJitter time.Duration + onRetry OnRetryFunc + retryIf RetryIfFunc + delayType DelayTypeFunc + lastErrorOnly bool + context context.Context + timer Timer + wrapContextErrorWithLastError bool + + maxBackOffN uint +} + +// Option represents an option for retry. +type Option func(*Config) + +func emptyOption(c *Config) {} + +// return the direct last error that came from the retried function +// default is false (return wrapped errors with everything) +func LastErrorOnly(lastErrorOnly bool) Option { + return func(c *Config) { + c.lastErrorOnly = lastErrorOnly + } +} + +// Attempts set count of retry. Setting to 0 will retry until the retried function succeeds. +// default is 10 +func Attempts(attempts uint) Option { + return func(c *Config) { + c.attempts = attempts + } +} + +// UntilSucceeded will retry until the retried function succeeds. Equivalent to setting Attempts(0). +func UntilSucceeded() Option { + return func(c *Config) { + c.attempts = 0 + } +} + +// AttemptsForError sets count of retry in case execution results in given `err` +// Retries for the given `err` are also counted against total retries. +// The retry will stop if any of given retries is exhausted. +// +// added in 4.3.0 +func AttemptsForError(attempts uint, err error) Option { + return func(c *Config) { + c.attemptsForError[err] = attempts + } +} + +// Delay set delay between retry +// default is 100ms +func Delay(delay time.Duration) Option { + return func(c *Config) { + c.delay = delay + } +} + +// MaxDelay set maximum delay between retry +// does not apply by default +func MaxDelay(maxDelay time.Duration) Option { + return func(c *Config) { + c.maxDelay = maxDelay + } +} + +// MaxJitter sets the maximum random Jitter between retries for RandomDelay +func MaxJitter(maxJitter time.Duration) Option { + return func(c *Config) { + c.maxJitter = maxJitter + } +} + +// DelayType set type of the delay between retries +// default is BackOff +func DelayType(delayType DelayTypeFunc) Option { + if delayType == nil { + return emptyOption + } + return func(c *Config) { + c.delayType = delayType + } +} + +// BackOffDelay is a DelayType which increases delay between consecutive retries +func BackOffDelay(n uint, _ error, config *Config) time.Duration { + // 1 << 63 would overflow signed int64 (time.Duration), thus 62. + const max uint = 62 + + if config.maxBackOffN == 0 { + if config.delay <= 0 { + config.delay = 1 + } + + config.maxBackOffN = max - uint(math.Floor(math.Log2(float64(config.delay)))) + } + + n-- + + if n > config.maxBackOffN { + n = config.maxBackOffN + } + + return config.delay << n +} + +// FixedDelay is a DelayType which keeps delay the same through all iterations +func FixedDelay(_ uint, _ error, config *Config) time.Duration { + return config.delay +} + +// RandomDelay is a DelayType which picks a random delay up to config.maxJitter +func RandomDelay(_ uint, _ error, config *Config) time.Duration { + return time.Duration(rand.Int63n(int64(config.maxJitter))) +} + +// CombineDelay is a DelayType the combines all of the specified delays into a new DelayTypeFunc +func CombineDelay(delays ...DelayTypeFunc) DelayTypeFunc { + const maxInt64 = uint64(math.MaxInt64) + + return func(n uint, err error, config *Config) time.Duration { + var total uint64 + for _, delay := range delays { + total += uint64(delay(n, err, config)) + if total > maxInt64 { + total = maxInt64 + } + } + + return time.Duration(total) + } +} + +// FullJitterBackoffDelay is a DelayTypeFunc that calculates delay using exponential backoff +// with full jitter. The delay is a random value between 0 and the current backoff ceiling. +// Formula: sleep = random_between(0, min(cap, base * 2^attempt)) +// It uses config.Delay as the base delay and config.MaxDelay as the cap. +func FullJitterBackoffDelay(n uint, err error, config *Config) time.Duration { + // Calculate the exponential backoff ceiling for the current attempt + backoffCeiling := float64(config.delay) * math.Pow(2, float64(n)) + currentCap := float64(config.maxDelay) + + // If MaxDelay is set and backoffCeiling exceeds it, cap at MaxDelay + if currentCap > 0 && backoffCeiling > currentCap { + backoffCeiling = currentCap + } + + // Ensure backoffCeiling is at least 0 + if backoffCeiling < 0 { + backoffCeiling = 0 + } + + // Add jitter: random value between 0 and backoffCeiling + // rand.Int63n panics if argument is <= 0 + if backoffCeiling <= 0 { + return 0 // No delay if ceiling is zero or negative + } + + jitter := rand.Int63n(int64(backoffCeiling)) // #nosec G404 -- Using math/rand is acceptable for non-security critical jitter. + return time.Duration(jitter) +} + +// OnRetry function callback are called each retry +// +// log each retry example: +// +// retry.Do( +// func() error { +// return errors.New("some error") +// }, +// retry.OnRetry(func(n uint, err error) { +// log.Printf("#%d: %s\n", n, err) +// }), +// ) +func OnRetry(onRetry OnRetryFunc) Option { + if onRetry == nil { + return emptyOption + } + return func(c *Config) { + c.onRetry = onRetry + } +} + +// RetryIf controls whether a retry should be attempted after an error +// (assuming there are any retry attempts remaining) +// +// skip retry if special error example: +// +// retry.Do( +// func() error { +// return errors.New("special error") +// }, +// retry.RetryIf(func(err error) bool { +// if err.Error() == "special error" { +// return false +// } +// return true +// }) +// ) +// +// By default RetryIf stops execution if the error is wrapped using `retry.Unrecoverable`, +// so above example may also be shortened to: +// +// retry.Do( +// func() error { +// return retry.Unrecoverable(errors.New("special error")) +// } +// ) +func RetryIf(retryIf RetryIfFunc) Option { + if retryIf == nil { + return emptyOption + } + return func(c *Config) { + c.retryIf = retryIf + } +} + +// Context allow to set context of retry +// default are Background context +// +// example of immediately cancellation (maybe it isn't the best example, but it describes behavior enough; I hope) +// +// ctx, cancel := context.WithCancel(context.Background()) +// cancel() +// +// retry.Do( +// func() error { +// ... +// }, +// retry.Context(ctx), +// ) +func Context(ctx context.Context) Option { + return func(c *Config) { + c.context = ctx + } +} + +// WithTimer provides a way to swap out timer module implementations. +// This primarily is useful for mocking/testing, where you may not want to explicitly wait for a set duration +// for retries. +// +// example of augmenting time.After with a print statement +// +// type struct MyTimer {} +// +// func (t *MyTimer) After(d time.Duration) <- chan time.Time { +// fmt.Print("Timer called!") +// return time.After(d) +// } +// +// retry.Do( +// func() error { ... }, +// retry.WithTimer(&MyTimer{}) +// ) +func WithTimer(t Timer) Option { + return func(c *Config) { + c.timer = t + } +} + +// WrapContextErrorWithLastError allows the context error to be returned wrapped with the last error that the +// retried function returned. This is only applicable when Attempts is set to 0 to retry indefinitly and when +// using a context to cancel / timeout +// +// default is false +// +// ctx, cancel := context.WithCancel(context.Background()) +// defer cancel() +// +// retry.Do( +// func() error { +// ... +// }, +// retry.Context(ctx), +// retry.Attempts(0), +// retry.WrapContextErrorWithLastError(true), +// ) +func WrapContextErrorWithLastError(wrapContextErrorWithLastError bool) Option { + return func(c *Config) { + c.wrapContextErrorWithLastError = wrapContextErrorWithLastError + } +} diff --git a/vendor/github.com/avast/retry-go/v4/retry.go b/vendor/github.com/avast/retry-go/v4/retry.go new file mode 100644 index 000000000..62d6392e2 --- /dev/null +++ b/vendor/github.com/avast/retry-go/v4/retry.go @@ -0,0 +1,347 @@ +/* +Simple library for retry mechanism + +Slightly inspired by [Try::Tiny::Retry](https://metacpan.org/pod/Try::Tiny::Retry) + +# SYNOPSIS + +HTTP GET with retry: + + url := "http://example.com" + var body []byte + + err := retry.Do( + func() error { + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + body, err = ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + return nil + }, + ) + + if err != nil { + // handle error + } + + fmt.Println(string(body)) + +HTTP GET with retry with data: + + url := "http://example.com" + + body, err := retry.DoWithData( + func() ([]byte, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + return body, nil + }, + ) + + if err != nil { + // handle error + } + + fmt.Println(string(body)) + +[More examples](https://github.com/avast/retry-go/tree/master/examples) + +# SEE ALSO + +* [giantswarm/retry-go](https://github.com/giantswarm/retry-go) - slightly complicated interface. + +* [sethgrid/pester](https://github.com/sethgrid/pester) - only http retry for http calls with retries and backoff + +* [cenkalti/backoff](https://github.com/cenkalti/backoff) - Go port of the exponential backoff algorithm from Google's HTTP Client Library for Java. Really complicated interface. + +* [rafaeljesus/retry-go](https://github.com/rafaeljesus/retry-go) - looks good, slightly similar as this package, don't have 'simple' `Retry` method + +* [matryer/try](https://github.com/matryer/try) - very popular package, nonintuitive interface (for me) + +# BREAKING CHANGES + +* 4.0.0 + - infinity retry is possible by set `Attempts(0)` by PR [#49](https://github.com/avast/retry-go/pull/49) + +* 3.0.0 + - `DelayTypeFunc` accepts a new parameter `err` - this breaking change affects only your custom Delay Functions. This change allow [make delay functions based on error](examples/delay_based_on_error_test.go). + +* 1.0.2 -> 2.0.0 + - argument of `retry.Delay` is final delay (no multiplication by `retry.Units` anymore) + - function `retry.Units` are removed + - [more about this breaking change](https://github.com/avast/retry-go/issues/7) + +* 0.3.0 -> 1.0.0 + - `retry.Retry` function are changed to `retry.Do` function + - `retry.RetryCustom` (OnRetry) and `retry.RetryCustomWithOpts` functions are now implement via functions produces Options (aka `retry.OnRetry`) +*/ +package retry + +import ( + "context" + "errors" + "fmt" + "strings" + "time" +) + +// Function signature of retryable function +type RetryableFunc func() error + +// Function signature of retryable function with data +type RetryableFuncWithData[T any] func() (T, error) + +// Default timer is a wrapper around time.After +type timerImpl struct{} + +func (t *timerImpl) After(d time.Duration) <-chan time.Time { + return time.After(d) +} + +func Do(retryableFunc RetryableFunc, opts ...Option) error { + retryableFuncWithData := func() (any, error) { + return nil, retryableFunc() + } + + _, err := DoWithData(retryableFuncWithData, opts...) + return err +} + +func DoWithData[T any](retryableFunc RetryableFuncWithData[T], opts ...Option) (T, error) { + var n uint + var emptyT T + + // default + config := newDefaultRetryConfig() + + // apply opts + for _, opt := range opts { + opt(config) + } + + if err := context.Cause(config.context); err != nil { + return emptyT, err + } + + // Setting attempts to 0 means we'll retry until we succeed + var lastErr error + if config.attempts == 0 { + for { + t, err := retryableFunc() + if err == nil { + return t, nil + } + + if !IsRecoverable(err) { + return emptyT, err + } + + if !config.retryIf(err) { + return emptyT, err + } + + lastErr = err + + config.onRetry(n, err) + n++ + select { + case <-config.timer.After(delay(config, n, err)): + case <-config.context.Done(): + if config.wrapContextErrorWithLastError { + return emptyT, Error{context.Cause(config.context), lastErr} + } + return emptyT, context.Cause(config.context) + } + } + } + + errorLog := Error{} + + attemptsForError := make(map[error]uint, len(config.attemptsForError)) + for err, attempts := range config.attemptsForError { + attemptsForError[err] = attempts + } + +shouldRetry: + for { + t, err := retryableFunc() + if err == nil { + return t, nil + } + + errorLog = append(errorLog, unpackUnrecoverable(err)) + + if !config.retryIf(err) { + break + } + + config.onRetry(n, err) + + for errToCheck, attempts := range attemptsForError { + if errors.Is(err, errToCheck) { + attempts-- + attemptsForError[errToCheck] = attempts + if attempts <= 0 { + break shouldRetry + } + } + } + + // if this is last attempt - don't wait + if n == config.attempts-1 { + break shouldRetry + } + n++ + select { + case <-config.timer.After(delay(config, n, err)): + case <-config.context.Done(): + if config.lastErrorOnly { + return emptyT, context.Cause(config.context) + } + + return emptyT, append(errorLog, context.Cause(config.context)) + } + } + + if config.lastErrorOnly { + return emptyT, errorLog.Unwrap() + } + return emptyT, errorLog +} + +func newDefaultRetryConfig() *Config { + return &Config{ + attempts: uint(10), + attemptsForError: make(map[error]uint), + delay: 100 * time.Millisecond, + maxJitter: 100 * time.Millisecond, + onRetry: func(n uint, err error) {}, + retryIf: IsRecoverable, + delayType: CombineDelay(BackOffDelay, RandomDelay), + lastErrorOnly: false, + context: context.Background(), + timer: &timerImpl{}, + } +} + +// Error type represents list of errors in retry +type Error []error + +// Error method return string representation of Error +// It is an implementation of error interface +func (e Error) Error() string { + logWithNumber := make([]string, len(e)) + for i, l := range e { + if l != nil { + logWithNumber[i] = fmt.Sprintf("#%d: %s", i+1, l.Error()) + } + } + + return fmt.Sprintf("All attempts fail:\n%s", strings.Join(logWithNumber, "\n")) +} + +func (e Error) Is(target error) bool { + for _, v := range e { + if errors.Is(v, target) { + return true + } + } + return false +} + +func (e Error) As(target interface{}) bool { + for _, v := range e { + if errors.As(v, target) { + return true + } + } + return false +} + +/* +Unwrap the last error for compatibility with `errors.Unwrap()`. +When you need to unwrap all errors, you should use `WrappedErrors()` instead. + + err := Do( + func() error { + return errors.New("original error") + }, + Attempts(1), + ) + + fmt.Println(errors.Unwrap(err)) # "original error" is printed + +Added in version 4.2.0. +*/ +func (e Error) Unwrap() error { + return e[len(e)-1] +} + +// WrappedErrors returns the list of errors that this Error is wrapping. +// It is an implementation of the `errwrap.Wrapper` interface +// in package [errwrap](https://github.com/hashicorp/errwrap) so that +// `retry.Error` can be used with that library. +func (e Error) WrappedErrors() []error { + return e +} + +type unrecoverableError struct { + error +} + +func (e unrecoverableError) Error() string { + if e.error == nil { + return "unrecoverable error" + } + return e.error.Error() +} + +func (e unrecoverableError) Unwrap() error { + return e.error +} + +// Unrecoverable wraps an error in `unrecoverableError` struct +func Unrecoverable(err error) error { + return unrecoverableError{err} +} + +// IsRecoverable checks if error is an instance of `unrecoverableError` +func IsRecoverable(err error) bool { + return !errors.Is(err, unrecoverableError{}) +} + +// Adds support for errors.Is usage on unrecoverableError +func (unrecoverableError) Is(err error) bool { + _, isUnrecoverable := err.(unrecoverableError) + return isUnrecoverable +} + +func unpackUnrecoverable(err error) error { + if unrecoverable, isUnrecoverable := err.(unrecoverableError); isUnrecoverable { + return unrecoverable.error + } + + return err +} + +func delay(config *Config, n uint, err error) time.Duration { + delayTime := config.delayType(n, err, config) + if config.maxDelay > 0 && delayTime > config.maxDelay { + delayTime = config.maxDelay + } + + return delayTime +} diff --git a/vendor/github.com/longhorn/go-common-libs/ns/crypto.go b/vendor/github.com/longhorn/go-common-libs/ns/crypto.go index 0d91900c1..8ea86d890 100644 --- a/vendor/github.com/longhorn/go-common-libs/ns/crypto.go +++ b/vendor/github.com/longhorn/go-common-libs/ns/crypto.go @@ -1,12 +1,15 @@ package ns import ( + "fmt" "os/exec" + "strings" "time" "github.com/cockroachdb/errors" "github.com/longhorn/go-common-libs/types" + "github.com/longhorn/go-common-libs/utils" ) // LuksFormatOptions defines optional parameters used when running cryptsetup luksFormat. @@ -95,6 +98,52 @@ func (nsexec *Executor) IsLuks(devicePath string, timeout time.Duration) (bool, return false, err } +func (nsexec *Executor) GetLuksBackendSize(size int64, encrypted bool, cliAPIVersion int) (int64, error) { + if !encrypted { + return size, nil + } + + is16MiBHeaderPkgVersion, err := nsexec.IsLuksFixed16MiBHeaderSize() + if err != nil { + return 0, errors.Wrap(err, "failed to determine if cryptsetup version has fixed 16 MiB header size") + } + if !is16MiBHeaderPkgVersion { + return size, nil + } + + return types.GetBackendSize(size, encrypted, cliAPIVersion), nil +} + +func (nsexec *Executor) IsLuksFixed16MiBHeaderSize() (bool, error) { + ver, err := nsexec.getCryptsetupVersion() + if err != nil { + return false, err + } + + // The fixed header size 16 MiB was introduced in cryptsetup 2.1.0. See: https://www.kernel.org/pub/linux/utils/cryptsetup/v2.1/v2.1.0-ReleaseNotes. + return utils.IsVersionAtLeast(ver, "2.1.0") +} + +func (nsexec *Executor) getCryptsetupVersion() (string, error) { + args := []string{"--version"} + result, err := nsexec.Cryptsetup(args, time.Minute) + if err != nil { + return "", errors.Wrap(err, "cannot find cryptsetup version info on host") + } + + //command: cryptsetup --version; result: cryptsetup 2.4.3\n + fields := strings.Fields(result) + if len(fields) < 2 { + return "", fmt.Errorf("failed to parse cryptsetup version from output %q", result) + } + for _, field := range fields { + if utils.IsVersionValid(field) { + return field, nil + } + } + return "", fmt.Errorf("failed to get valid cryptsetup version from %q", result) +} + // Cryptsetup runs cryptsetup without passphrase. It will return // 0 on success and a non-zero value on error. func (nsexec *Executor) Cryptsetup(args []string, timeout time.Duration) (stdout string, err error) { diff --git a/vendor/github.com/longhorn/go-common-libs/types/crypto.go b/vendor/github.com/longhorn/go-common-libs/types/crypto.go index fc24dc3dd..b26149ecf 100644 --- a/vendor/github.com/longhorn/go-common-libs/types/crypto.go +++ b/vendor/github.com/longhorn/go-common-libs/types/crypto.go @@ -13,6 +13,19 @@ const ( CryptoPBKDF = "CRYPTO_PBKDF" CryptoPBKDFForceIterations = "CRYPTO_PBKDF_FORCE_ITERATIONS" CryptoPBKDFMemory = "CRYPTO_PBKDF_MEMORY" + + CliAPIVersionForSupportingExtendLuks2HeaderSize = 12 + Luks2EncryptionHeaderSize = 16 * 1024 * 1024 ) const LuksTimeout = time.Minute + +func GetBackendSize(volumeSize int64, encrypted bool, cliAPIVersion int) int64 { + if volumeSize > 0 && encrypted && cliAPIVersion >= CliAPIVersionForSupportingExtendLuks2HeaderSize { + // The default size is 16MB for the LUKS2 header, so we need to add it to the replica size if the volume is encrypted. + // otherwise, the device that users get will be 16MB smaller than the actual size users want, which will cause some issues as https://github.com/longhorn/longhorn/issues/9205. + // https://gitlab.com/cryptsetup/cryptsetup/-/wikis/FrequentlyAskedQuestions + return volumeSize + Luks2EncryptionHeaderSize + } + return volumeSize +} diff --git a/vendor/github.com/longhorn/go-common-libs/utils/misc.go b/vendor/github.com/longhorn/go-common-libs/utils/misc.go index 24f36d9ad..8cdf80338 100644 --- a/vendor/github.com/longhorn/go-common-libs/utils/misc.go +++ b/vendor/github.com/longhorn/go-common-libs/utils/misc.go @@ -15,6 +15,8 @@ import ( "github.com/google/uuid" "golang.org/x/exp/constraints" + "k8s.io/apimachinery/pkg/util/version" + "github.com/longhorn/go-common-libs/types" ) @@ -219,3 +221,23 @@ func GetStringFromMap(mapObj map[string]any, key string) string { return fmt.Sprint(value) } } + +// IsVersionAtLeast checks if the current version is at least the minimum version. +func IsVersionAtLeast(currentVersion, minimumVersion string) (bool, error) { + parsedVer, err := version.ParseSemantic(currentVersion) + if err != nil { + return false, errors.Wrapf(err, "failed to parse testing version %q", currentVersion) + } + + minVer, err := version.ParseSemantic(minimumVersion) + if err != nil { + return false, errors.Wrapf(err, "failed to parse minimum version %q", minimumVersion) + } + + return parsedVer.AtLeast(minVer), nil +} + +func IsVersionValid(versionStr string) bool { + _, err := version.ParseSemantic(versionStr) + return err == nil +} diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/initiator.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/initiator.go index 8cba42336..6b9e8f522 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/initiator.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/initiator.go @@ -8,7 +8,8 @@ import ( "strings" "time" - "github.com/pkg/errors" + "github.com/avast/retry-go/v4" + "github.com/cockroachdb/errors" "github.com/sirupsen/logrus" commonns "github.com/longhorn/go-common-libs/ns" @@ -26,12 +27,24 @@ const ( HostProc = "/host/proc" - validateDiskCreationTimeout = 30 // seconds + validateDiskCreationMaxRetries = 60 + validateDiskCreationRetryInterval = 1 * time.Second UnInitializedUblkId = -1 MaxUblkId = 65535 DefaultUblkQueueDepth = 128 DefaultUblkNumberOfQueue = 1 + + // LinuxKernelSectorSize is the fixed sector size (512 bytes) used by the + // Linux kernel for all block layer and Device Mapper table calculations. + // As defined in the kernel source (include/linux/types.h), "Linux always + // considers sectors to be 512 bytes long independently of the devices + // real block size." + // + // Refs: + // - https://github.com/torvalds/linux/blob/master/include/linux/types.h#L130-L138 + // - https://android.googlesource.com/platform/external/lvm2/+/refs/heads/main/tools/dmsetup.c#103 + DmSectorSize = 512 ) const ( @@ -43,10 +56,11 @@ const ( ) var ( - idGenerator IDGenerator - isUblkTargetCreated = false + idGenerator IDGenerator ) +var errDeviceNotReady = errors.New("device is not a block device yet") + type Initiator struct { Name string Endpoint string @@ -72,8 +86,13 @@ type NVMeTCPInfo struct { } type UblkInfo struct { - BdevName string - UblkID int32 + // spec + BdevName string + UblkQueueDepth int32 + UblkNumberOfQueue int32 + + // status + UblkID int32 } // NewInitiator creates a new initiator @@ -85,7 +104,7 @@ func NewInitiator(name, hostProc string, nvmeTCPInfo *NVMeTCPInfo, ublkInfo *Ubl return nil, fmt.Errorf("cannot initiator creation because both nvmeTCPInfo and ublkInfo are nil or non-nil: nvmeTCPInfo: %v, ublkInfo: %v", nvmeTCPInfo, ublkInfo) } if nvmeTCPInfo != nil && nvmeTCPInfo.SubsystemNQN == "" { - return nil, fmt.Errorf("empty subsystem for NVMe-oF initiator creation") + return nil, fmt.Errorf("empty subsystem for NVMe/TCP initiator creation") } if ublkInfo != nil && ublkInfo.BdevName == "" { return nil, fmt.Errorf("empty BdevName for ublk initiator creation") @@ -153,6 +172,48 @@ func (i *Initiator) ConnectNVMeTCPTarget(ip, port, nqn string) (string, error) { return ConnectTarget(ip, port, nqn, i.executor) } +// executeNVMeTCPPathOp validates initiator state, acquires the file lock, and +// delegates to fn. It is the shared skeleton for ConnectNVMeTCPPath and +// ReconnectNVMeTCPPath. +func (i *Initiator) executeNVMeTCPPathOp(transportAddress, transportServiceID, opName string, fn func(string, string) error) (err error) { + defer func() { + if err != nil { + err = errors.Wrapf(err, "failed to %s NVMe/TCP path for initiator %s", opName, i.Name) + } + }() + + if i.NVMeTCPInfo == nil { + return fmt.Errorf("nvmeTCPInfo is nil") + } + if transportAddress == "" || transportServiceID == "" { + return fmt.Errorf("invalid transportAddress %s and transportServiceID %s for initiator %s", transportAddress, transportServiceID, i.Name) + } + + if i.hostProc != "" { + lock, err := i.newLock() + if err != nil { + return err + } + defer lock.Unlock() + } + + return fn(transportAddress, transportServiceID) +} + +// ConnectNVMeTCPPath connects an additional NVMe/TCP path without touching dm-linear. +// It is intended for native multipath switchover flows where dm-linear must remain +// intact for later snapshot suspend/resume operations. +func (i *Initiator) ConnectNVMeTCPPath(transportAddress, transportServiceID string) error { + return i.executeNVMeTCPPathOp(transportAddress, transportServiceID, "connect", i.connectNVMeTCPPathWithoutLock) +} + +// ReconnectNVMeTCPPath refreshes the current NVMe/TCP initiator state for the +// specified path without touching dm-linear. It reuses an existing matching +// path when present and otherwise establishes a new multipath connection. +func (i *Initiator) ReconnectNVMeTCPPath(transportAddress, transportServiceID string) error { + return i.executeNVMeTCPPathOp(transportAddress, transportServiceID, "reconnect", i.ensureNVMeTCPPathWithoutLock) +} + // DisconnectNVMeTCPTarget disconnects a target func (i *Initiator) DisconnectNVMeTCPTarget() error { if i.NVMeTCPInfo == nil { @@ -169,7 +230,25 @@ func (i *Initiator) DisconnectNVMeTCPTarget() error { return DisconnectTarget(i.NVMeTCPInfo.SubsystemNQN, i.executor) } -// WaitForNVMeTCPConnect waits for the NVMe-oF initiator to connect +func (i *Initiator) connectNVMeTCPPathWithoutLock(transportAddress, transportServiceID string) error { + if reused, err := i.reuseExistingNVMeTCPPathWithoutLock(transportAddress, transportServiceID); err == nil { + if reused { + return nil + } + } else { + i.logger.WithError(err).Debugf("Failed to reuse existing NVMe/TCP path for %s:%s, will attempt fresh connect", transportAddress, transportServiceID) + } + + // For switchover flows the caller may intentionally keep the newly + // connected path in ANA inaccessible state until after control-plane + // coordination completes. In that window the kernel may not expose the + // new path as the selected namespace device yet, so we only establish the + // connection here and defer device-info reload to the caller. + _, _, err := i.discoverAndConnectNVMeTCPTarget(transportAddress, transportServiceID, maxConnectTargetRetries, retryConnectTargetInterval) + return err +} + +// WaitForNVMeTCPConnect waits for the NVMe/TCP initiator to connect and load the device info func (i *Initiator) WaitForNVMeTCPConnect(maxRetries int, retryInterval time.Duration) (err error) { if i.NVMeTCPInfo == nil { return fmt.Errorf("failed to WaitForNVMeTCPConnect because nvmeTCPInfo is nil") @@ -182,18 +261,36 @@ func (i *Initiator) WaitForNVMeTCPConnect(maxRetries int, retryInterval time.Dur defer lock.Unlock() } - for r := 0; r < maxRetries; r++ { - err = i.loadNVMeDeviceInfoWithoutLock(i.NVMeTCPInfo.TransportAddress, i.NVMeTCPInfo.TransportServiceID, i.NVMeTCPInfo.SubsystemNQN) - if err == nil { - return nil - } - time.Sleep(retryInterval) + err = retry.Do( + func() error { + errTest := i.loadNVMeDeviceInfoWithoutLock( + i.NVMeTCPInfo.TransportAddress, + i.NVMeTCPInfo.TransportServiceID, + i.NVMeTCPInfo.SubsystemNQN, + ) + + return errTest + }, + retry.Attempts(uint(maxRetries)), + retry.Delay(retryInterval), + retry.DelayType(retry.FixedDelay), + retry.LastErrorOnly(true), + retry.OnRetry(func(n uint, err error) { + i.logger.WithError(err).Warnf( + "Retrying waiting for NVMe/TCP connect: address=%s:%s attempt=%d/%d next_wait=%s", + i.NVMeTCPInfo.TransportAddress, i.NVMeTCPInfo.TransportServiceID, n+1, maxRetries, retryInterval, + ) + }), + ) + + if err != nil { + return errors.Wrap(err, "failed to wait for NVMe/TCP connect") } - return err + return nil } -// WaitForNVMeTCPTargetDisconnect waits for the NVMe-oF initiator to disconnect +// WaitForNVMeTCPTargetDisconnect waits for the NVMe/TCP initiator to disconnect func (i *Initiator) WaitForNVMeTCPTargetDisconnect(maxRetries int, retryInterval time.Duration) (err error) { if i.NVMeTCPInfo == nil { return fmt.Errorf("failed to WaitForNVMeTCPTargetDisconnect because nvmeTCPInfo is nil") @@ -206,18 +303,53 @@ func (i *Initiator) WaitForNVMeTCPTargetDisconnect(maxRetries int, retryInterval defer lock.Unlock() } - for r := 0; r < maxRetries; r++ { - err = i.loadNVMeDeviceInfoWithoutLock(i.NVMeTCPInfo.TransportAddress, i.NVMeTCPInfo.TransportServiceID, i.NVMeTCPInfo.SubsystemNQN) - if types.ErrorIsValidNvmeDeviceNotFound(err) { - return nil - } - time.Sleep(retryInterval) + // Keep original behavior when maxRetries <= 0: do nothing and return current err (nil). + if maxRetries <= 0 { + return err } - return err + var ( + found bool + lastErr error + attemptCount int + forceRetry = errors.New("force retry") + ) + + _ = retry.Do( + func() error { + // Extra terminal no-op attempt to preserve original "sleep after last failed loop" behavior. + if attemptCount >= maxRetries { + return nil + } + + attemptCount++ + lastErr = i.loadNVMeDeviceInfoWithoutLock( + i.NVMeTCPInfo.TransportAddress, + i.NVMeTCPInfo.TransportServiceID, + i.NVMeTCPInfo.SubsystemNQN, + ) + if types.ErrorIsValidNvmeDeviceNotFound(lastErr) { + found = true + return nil + } + + // Always continue retrying for exactly maxRetries loop-equivalent attempts, + // even when lastErr == nil, matching the original for-loop behavior. + return forceRetry + }, + retry.Attempts(uint(maxRetries+1)), + retry.Delay(retryInterval), + retry.DelayType(retry.FixedDelay), + retry.LastErrorOnly(true), + ) + + if found { + return nil + } + return lastErr } -// Suspend suspends the device mapper device for the NVMe-oF initiator +// Suspend suspends the device mapper device for the NVMe/TCP initiator func (i *Initiator) Suspend(noflush, nolockfs bool) error { if i.hostProc != "" { lock, err := i.newLock() @@ -229,19 +361,19 @@ func (i *Initiator) Suspend(noflush, nolockfs bool) error { suspended, err := i.IsSuspended() if err != nil { - return errors.Wrapf(err, "failed to check if linear dm device is suspended for NVMe-oF initiator %s", i.Name) + return errors.Wrapf(err, "failed to check if linear dm device is suspended for NVMe/TCP initiator %s", i.Name) } if !suspended { if err := i.suspendLinearDmDevice(noflush, nolockfs); err != nil { - return errors.Wrapf(err, "failed to suspend linear dm device for NVMe-oF initiator %s", i.Name) + return errors.Wrapf(err, "failed to suspend linear dm device for NVMe/TCP initiator %s", i.Name) } } return nil } -// Resume resumes the device mapper device for the NVMe-oF initiator +// Resume resumes the device mapper device for the NVMe/TCP initiator func (i *Initiator) Resume() error { if i.hostProc != "" { lock, err := i.newLock() @@ -252,7 +384,7 @@ func (i *Initiator) Resume() error { } if err := i.resumeLinearDmDevice(); err != nil { - return errors.Wrapf(err, "failed to resume linear dm device for NVMe-oF initiator %s", i.Name) + return errors.Wrapf(err, "failed to resume linear dm device for NVMe/TCP initiator %s", i.Name) } return nil @@ -286,11 +418,11 @@ func (i *Initiator) replaceDmDeviceTarget() error { return nil } -// StartNvmeTCPInitiator starts the NVMe-oF initiator with the given transportAddress and transportServiceID -func (i *Initiator) StartNvmeTCPInitiator(transportAddress, transportServiceID string, dmDeviceAndEndpointCleanupRequired bool) (dmDeviceIsBusy bool, err error) { +// StartNvmeTCPInitiator starts the NVMe/TCP initiator with the given transportAddress and transportServiceID +func (i *Initiator) StartNvmeTCPInitiator(transportAddress, transportServiceID string, dmDeviceAndEndpointCleanupRequired bool, stop bool) (dmDeviceIsBusy bool, err error) { defer func() { if err != nil { - err = errors.Wrapf(err, "failed to start NVMe-oF initiator %s", i.Name) + err = errors.Wrapf(err, "failed to start NVMe/TCP initiator %s", i.Name) } }() @@ -305,7 +437,7 @@ func (i *Initiator) StartNvmeTCPInitiator(transportAddress, transportServiceID s "transportAddress": transportAddress, "transportServiceID": transportServiceID, "dmDeviceAndEndpointCleanupRequired": dmDeviceAndEndpointCleanupRequired, - }).Info("Starting NVMe-oF initiator") + }).Info("Starting NVMe/TCP initiator") if i.hostProc != "" { lock, err := i.newLock() @@ -315,76 +447,85 @@ func (i *Initiator) StartNvmeTCPInitiator(transportAddress, transportServiceID s defer lock.Unlock() } - // Check if the initiator/NVMe-oF device is already launched and matches the params - err = i.loadNVMeDeviceInfoWithoutLock(i.NVMeTCPInfo.TransportAddress, i.NVMeTCPInfo.TransportServiceID, i.NVMeTCPInfo.SubsystemNQN) - if err == nil { - if i.NVMeTCPInfo.TransportAddress == transportAddress && i.NVMeTCPInfo.TransportServiceID == transportServiceID { + // Check if the initiator/NVMe/TCP device is already launched and matches the params. + if launched, err := i.reuseExistingNVMeTCPPathWithoutLock(transportAddress, transportServiceID); err == nil { + if launched { err = i.LoadEndpointForNvmeTcpFrontend(false) if err == nil { - i.logger.Info("NVMe-oF initiator is already launched with correct params") + i.logger.Info("NVMe/TCP initiator is already launched with correct params") return false, nil } - i.logger.WithError(err).Warnf("NVMe-oF initiator is launched with failed to load the endpoint") - } else { - i.logger.Warnf("NVMe-oF initiator is launched but with incorrect address, the required one is %s:%s, will try to stop then relaunch it", transportAddress, transportServiceID) + i.logger.WithError(err).Warnf("NVMe/TCP initiator is launched with failed to load the endpoint") } + } else { + i.logger.WithError(err).Warn("Failed to load existing NVMe/TCP path state before starting initiator") } - - i.logger.Info("Stopping NVMe-oF initiator blindly before starting") - dmDeviceIsBusy, err = i.stopWithoutLock(nil, dmDeviceAndEndpointCleanupRequired, false, false) - if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to stop the mismatching NVMe-oF initiator %s before starting", i.Name) + if i.NVMeTCPInfo.TransportAddress != "" && i.NVMeTCPInfo.TransportServiceID != "" && + (i.NVMeTCPInfo.TransportAddress != transportAddress || i.NVMeTCPInfo.TransportServiceID != transportServiceID) { + i.logger.Warnf("NVMe/TCP initiator is launched but with incorrect address, the required one is %s:%s, will try to stop then relaunch it", transportAddress, transportServiceID) } - i.logger.Info("Launching NVMe-oF initiator") - - i.connectNVMeTCPTarget(transportAddress, transportServiceID, maxConnectTargetRetries, retryConnectTargetInterval) - if i.NVMeTCPInfo.ControllerName == "" { - return dmDeviceIsBusy, fmt.Errorf("failed to start NVMe-oF initiator %s within %d * %v sec retries", i.Name, maxConnectTargetRetries, retryConnectTargetInterval.Seconds()) + if stop { + i.logger.Info("Stopping NVMe/TCP initiator blindly before starting") + dmDeviceIsBusy, err = i.stopWithoutLock(nil, dmDeviceAndEndpointCleanupRequired, false, false) + if err != nil { + return dmDeviceIsBusy, errors.Wrapf(err, "failed to stop the mismatching NVMe/TCP initiator %s before starting", i.Name) + } + } else { + dmDeviceIsBusy = true } - err = i.waitAndLoadNVMeDeviceInfoWithoutLock(transportAddress, transportServiceID) + i.logger.Info("Ensuring NVMe/TCP target path is connected") + err = i.ensureNVMeTCPPathWithoutLock(transportAddress, transportServiceID) if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to load device info after connecting target for NVMe-oF initiator %s", i.Name) + return dmDeviceIsBusy, errors.Wrapf(err, "failed to ensure device info after connecting target for NVMe/TCP initiator %s", i.Name) } if dmDeviceAndEndpointCleanupRequired { if dmDeviceIsBusy { // Endpoint is already created, just replace the target device - i.logger.Info("Linear dm device is busy, trying the best to replace the target device for NVMe-oF initiator") + i.logger.Info("Linear dm device is busy, trying the best to replace the target device for NVMe/TCP initiator") if err := i.replaceDmDeviceTarget(); err != nil { - i.logger.WithError(err).Warnf("Failed to replace the target device for NVMe-oF initiator") + i.logger.WithError(err).Warnf("Failed to replace the target device for NVMe/TCP initiator") } else { - i.logger.Info("Successfully replaced the target device for NVMe-oF initiator") + i.logger.Info("Successfully replaced the target device for NVMe/TCP initiator") dmDeviceIsBusy = false } } else { - i.logger.Info("Creating linear dm device for NVMe-oF initiator") + i.logger.Info("Creating linear dm device for NVMe/TCP initiator") if err := i.createLinearDmDevice(); err != nil { - return false, errors.Wrapf(err, "failed to create linear dm device for NVMe-oF initiator %s", i.Name) + return false, errors.Wrapf(err, "failed to create linear dm device for NVMe/TCP initiator %s", i.Name) } } } else { - i.logger.Info("Skipping creating linear dm device for NVMe-oF initiator") + i.logger.Info("Skipping creating linear dm device for NVMe/TCP initiator") i.dev.Export = i.dev.Source } i.logger.Infof("Creating endpoint %v", i.Endpoint) + if err := i.createEndpoint(); err != nil { + return dmDeviceIsBusy, err + } + + i.logger.Infof("Launched NVMe/TCP initiator: %+v", i) + + return dmDeviceIsBusy, nil +} + +func (i *Initiator) createEndpoint() error { exist, err := i.isEndpointExist() if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to check if endpoint %v exists for NVMe-oF initiator %s", i.Endpoint, i.Name) + return errors.Wrapf(err, "failed to check if endpoint %v exists for NVMe/TCP initiator %s", i.Endpoint, i.Name) } if exist { - i.logger.Infof("Skipping endpoint %v creation for NVMe-oF initiator", i.Endpoint) - } else { - if err := i.makeEndpoint(); err != nil { - return dmDeviceIsBusy, err - } + i.logger.Infof("Skipping endpoint %v creation for NVMe/TCP initiator", i.Endpoint) + return nil } - i.logger.Infof("Launched NVMe-oF initiator: %+v", i) - - return dmDeviceIsBusy, nil + if err := i.makeEndpoint(); err != nil { + return err + } + return nil } func (i *Initiator) StartUblkInitiator(spdkClient *client.Client, dmDeviceAndEndpointCleanupRequired bool) (dmDeviceIsBusy bool, err error) { @@ -405,13 +546,6 @@ func (i *Initiator) StartUblkInitiator(spdkClient *client.Client, dmDeviceAndEnd defer lock.Unlock() } - if !isUblkTargetCreated { - if err := spdkClient.UblkCreateTarget("", true); err != nil { - return false, err - } - isUblkTargetCreated = true - } - ublkDeviceList, err := spdkClient.UblkGetDisks(0) if err != nil { return false, err @@ -441,7 +575,24 @@ func (i *Initiator) StartUblkInitiator(spdkClient *client.Client, dmDeviceAndEnd if err != nil { return false, err } - if err := spdkClient.UblkStartDisk(i.UblkInfo.BdevName, availableUblkID, DefaultUblkQueueDepth, DefaultUblkNumberOfQueue); err != nil { + + queueDepth := i.UblkInfo.UblkQueueDepth + if queueDepth <= 0 { + i.logger.Infof("Invalid queue depth %d for ublk initiator, using default value %d", queueDepth, DefaultUblkQueueDepth) + queueDepth = DefaultUblkQueueDepth + } + numQueues := i.UblkInfo.UblkNumberOfQueue + if numQueues <= 0 { + i.logger.Infof("Invalid number of queues %d for ublk initiator, using default value %d", numQueues, DefaultUblkNumberOfQueue) + numQueues = DefaultUblkNumberOfQueue + } + + i.UblkInfo.UblkQueueDepth = queueDepth + i.UblkInfo.UblkNumberOfQueue = numQueues + + i.logger.Infof("Starting ublk initiator with bdev %s, available UBLK ID %d, queue depth %d, number of queues %d", + i.UblkInfo.BdevName, availableUblkID, i.UblkInfo.UblkQueueDepth, i.UblkInfo.UblkNumberOfQueue) + if err := spdkClient.UblkStartDisk(i.UblkInfo.BdevName, availableUblkID, i.UblkInfo.UblkQueueDepth, i.UblkInfo.UblkNumberOfQueue); err != nil { return false, err } i.UblkInfo.UblkID = availableUblkID @@ -506,43 +657,198 @@ func (i *Initiator) waitAndLoadNVMeDeviceInfoWithoutLock(transportAddress, trans if i.NVMeTCPInfo == nil { return fmt.Errorf("failed to waitAndLoadNVMeDeviceInfoWithoutLock because nvmeTCPInfo is nil") } - for r := 0; r < maxWaitDeviceRetries; r++ { - err = i.loadNVMeDeviceInfoWithoutLock(transportAddress, transportServiceID, i.NVMeTCPInfo.SubsystemNQN) - if err == nil { - break - } - time.Sleep(waitDeviceInterval) + + err = retry.Do( + func() error { + return i.loadNVMeDeviceInfoWithoutLock( + transportAddress, + transportServiceID, + i.NVMeTCPInfo.SubsystemNQN, + ) + }, + retry.Attempts(uint(maxWaitDeviceRetries)), + retry.Delay(waitDeviceInterval), + retry.DelayType(retry.FixedDelay), + retry.LastErrorOnly(true), + retry.OnRetry(func(n uint, err error) { + i.logger.WithError(err).Warnf( + "Retrying loading NVMe device info for initiator %s: address=%s:%s attempt=%d/%d next_wait=%s", + i.Name, transportAddress, transportServiceID, n+1, maxWaitDeviceRetries, waitDeviceInterval, + ) + }), + ) + + if err != nil { + return errors.Wrap(err, "failed to load NVMe device info") } - return err + + return nil } -func (i *Initiator) connectNVMeTCPTarget(transportAddress, transportServiceID string, maxRetries int, retryInterval time.Duration) { - if i.NVMeTCPInfo == nil { - logrus.Warnf("Failed to connectTarget because nvmeTCPInfo is nil") +func (i *Initiator) reuseExistingNVMeTCPPathWithoutLock(transportAddress, transportServiceID string) (bool, error) { + if err := i.loadNVMeDeviceInfoWithoutLock(i.NVMeTCPInfo.TransportAddress, i.NVMeTCPInfo.TransportServiceID, i.NVMeTCPInfo.SubsystemNQN); err != nil { + return false, err } - for r := 0; r < maxRetries; r++ { - // Rerun this API for a discovered target should be fine - subsystemNQN, err := DiscoverTarget(transportAddress, transportServiceID, i.executor) - if err != nil { - i.logger.WithError(err).Warn("Failed to discover target") - time.Sleep(retryInterval) - continue + if i.NVMeTCPInfo.TransportAddress != transportAddress || i.NVMeTCPInfo.TransportServiceID != transportServiceID { + return false, nil + } + + i.logger.WithFields(logrus.Fields{ + "transportAddress": transportAddress, + "transportServiceID": transportServiceID, + }).Info("NVMe/TCP path is already connected") + + if err := i.waitAndLoadNVMeDeviceInfoWithoutLock(transportAddress, transportServiceID); err != nil { + return false, err + } + + return true, nil +} + +func (i *Initiator) ensureNVMeTCPPathWithoutLock(transportAddress, transportServiceID string) error { + if reused, err := i.reuseExistingNVMeTCPPathWithoutLock(transportAddress, transportServiceID); err == nil { + if reused { + return nil } + } - controllerName, err := ConnectTarget(transportAddress, transportServiceID, subsystemNQN, i.executor) - if err != nil { - i.logger.WithError(err).Warn("Failed to connect target") - time.Sleep(retryInterval) - continue + previousInfo := *i.NVMeTCPInfo + subsystemNQN, controllerName, err := i.discoverAndConnectNVMeTCPTarget(transportAddress, transportServiceID, maxConnectTargetRetries, retryConnectTargetInterval) + if err != nil { + return err + } + + cleanupConnection := func(reason error) { + i.logger.WithError(reason).Warnf("Cleaning up orphaned NVMe/TCP connection for %s at %s:%s after post-connect failure", subsystemNQN, transportAddress, transportServiceID) + if disconnectErr := DisconnectController(subsystemNQN, transportAddress, transportServiceID, i.executor); disconnectErr != nil { + i.logger.WithError(disconnectErr).Warnf("Failed to disconnect orphaned NVMe/TCP controller for %s at %s:%s", subsystemNQN, transportAddress, transportServiceID) } + } + + if err := i.recordConnectedNVMeTCPInfo(subsystemNQN, controllerName); err != nil { + cleanupConnection(err) + *i.NVMeTCPInfo = previousInfo + return err + } + if err := i.waitAndLoadNVMeDeviceInfoWithoutLock(transportAddress, transportServiceID); err != nil { + cleanupConnection(err) + *i.NVMeTCPInfo = previousInfo + return err + } + return nil +} + +func (i *Initiator) recordConnectedNVMeTCPInfo(subsystemNQN, controllerName string) error { + if i.NVMeTCPInfo == nil { + return fmt.Errorf("nvmeTCPInfo is nil") + } + + // Persist the discovered subsystem immediately after a successful connect. + // Later reload/load/cleanup paths use SubsystemNQN to locate or disconnect + // the NVMe device, so it must stay in sync even if controllerName validation fails. + if subsystemNQN != "" { i.NVMeTCPInfo.SubsystemNQN = subsystemNQN - i.NVMeTCPInfo.ControllerName = controllerName - break } + if controllerName == "" { + return fmt.Errorf("controller name is empty") + } + + i.NVMeTCPInfo.ControllerName = controllerName + return nil +} + +func (i *Initiator) discoverAndConnectNVMeTCPTarget(transportAddress, transportServiceID string, maxRetries int, retryInterval time.Duration) (subsystemNQN, controllerName string, err error) { + if i.NVMeTCPInfo == nil { + return "", "", fmt.Errorf("nvmeTCPInfo is nil") + } + + err = retry.Do( + func() error { + var e error + + // If SubsystemNQN is already known (e.g. for backup or rebuild + // initiators), skip the discovery step and connect directly. + // This avoids "failed to add controller" errors from nvme-cli 2.x + // when the kernel already has NVMe-oF connections to the same + // target address with the same hostNQN/hostID. + if i.NVMeTCPInfo.SubsystemNQN != "" { + subsystemNQN = i.NVMeTCPInfo.SubsystemNQN + i.logger.Infof("Using pre-configured SubsystemNQN %s for target %s:%s, skipping discovery", + subsystemNQN, transportAddress, transportServiceID) + } else { + i.logger.Infof("Discovering NVMe/TCP target %s:%s", transportAddress, transportServiceID) + subsystemNQN, e = DiscoverTarget(transportAddress, transportServiceID, i.executor) + if e != nil { + return errors.Wrapf(e, "discover NVMe/TCP target %s:%s failed", transportAddress, transportServiceID) + } + } + + i.logger.Infof("Connecting to NVMe/TCP target %s:%s with subsystemNQN %s", transportAddress, transportServiceID, subsystemNQN) + controllerName, e = ConnectTarget(transportAddress, transportServiceID, subsystemNQN, i.executor) + if e != nil { + // "already connected" means the path is present in the kernel + // but GetDevices() couldn't find a namespace device yet (e.g. + // multipath ANA inaccessible). Since the goal is to ensure + // the path is connected, verify via subsystem listing and + // treat it as success. + if strings.Contains(strings.ToLower(e.Error()), "already connected") { + i.logger.Infof("NVMe/TCP target %s:%s is already connected, verifying controller via subsystem listing", transportAddress, transportServiceID) + if name, verifyErr := i.findControllerBySubsystem(subsystemNQN, transportAddress, transportServiceID); verifyErr == nil { + controllerName = name + i.logger.Infof("Verified existing controller %s for %s:%s", controllerName, transportAddress, transportServiceID) + return nil + } + return retry.Unrecoverable(errors.Wrapf(e, "connect NVMe/TCP target %s:%s (nqn=%s) failed", transportAddress, transportServiceID, subsystemNQN)) + } + return errors.Wrapf(e, "connect NVMe/TCP target %s:%s (nqn=%s) failed", transportAddress, transportServiceID, subsystemNQN) + } + + return nil + }, + retry.Attempts(uint(maxRetries)), + retry.Delay(retryInterval), + retry.DelayType(retry.FixedDelay), + retry.LastErrorOnly(true), + retry.OnRetry(func(n uint, err error) { + i.logger.WithError(err).Warnf( + "Retrying NVMe/TCP target connect: addr=%s:%s attempt=%d/%d next_wait=%s", + transportAddress, transportServiceID, n+1, maxRetries, retryInterval, + ) + }), + ) + + if err != nil { + return "", "", errors.Wrapf(err, "failed to discover and connect NVMe/TCP target %s:%s", transportAddress, transportServiceID) + } + + return subsystemNQN, controllerName, nil } -// Stop stops the NVMe-oF initiator +// findControllerBySubsystem looks up the controller name for the given NQN +// and transport address via subsystem listing. This is used as a fallback +// when ConnectTarget reports "already connected" but GetDevices cannot find +// a namespace device (e.g. multipath ANA inaccessible state). +func (i *Initiator) findControllerBySubsystem(nqn, transportAddress, transportServiceID string) (string, error) { + subsystems, err := GetSubsystems(i.executor) + if err != nil { + return "", errors.Wrap(err, "failed to list subsystems") + } + for _, sys := range subsystems { + if sys.NQN != nqn { + continue + } + for _, path := range sys.Paths { + controllerIP, controllerPort := GetIPAndPortFromControllerAddress(path.Address) + if controllerIP == transportAddress && controllerPort == transportServiceID { + return path.Name, nil + } + } + } + return "", fmt.Errorf("no controller found for subsystem %s at %s:%s", nqn, transportAddress, transportServiceID) +} + +// Stop stops the NVMe/TCP initiator func (i *Initiator) Stop(spdkClient *client.Client, dmDeviceAndEndpointCleanupRequired, deferDmDeviceCleanup, returnErrorForBusyDevice bool) (bool, error) { if i.hostProc != "" { lock, err := i.newLock() @@ -583,7 +889,7 @@ func (i *Initiator) stopWithoutLock(spdkClient *client.Client, dmDeviceAndEndpoi if i.NVMeTCPInfo != nil { err = DisconnectTarget(i.NVMeTCPInfo.SubsystemNQN, i.executor) if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to disconnect target for NVMe-oF initiator %s", i.Name) + return dmDeviceIsBusy, errors.Wrapf(err, "failed to disconnect target for NVMe/TCP initiator %s", i.Name) } i.NVMeTCPInfo.ControllerName = "" @@ -649,6 +955,61 @@ func (i *Initiator) GetEndpoint() string { return "" } +// WaitForControllerLive waits for the NVMe controller at the given address to +// reach "live" state. This is needed after nvme connect which returns +// immediately while the TCP handshake completes asynchronously. +func (i *Initiator) WaitForControllerLive(transportAddress, transportServiceID string, maxAttempts int, retryInterval time.Duration) error { + if i.NVMeTCPInfo == nil { + return fmt.Errorf("nvmeTCPInfo is nil") + } + + nqn := i.NVMeTCPInfo.SubsystemNQN + + err := retry.Do( + func() error { + subsystems, err := GetSubsystems(i.executor) + if err != nil { + return errors.Wrap(err, "failed to list subsystems while waiting for controller live state") + } + + for _, sys := range subsystems { + if sys.NQN != nqn { + continue + } + for _, path := range sys.Paths { + controllerIP, controllerPort := GetIPAndPortFromControllerAddress(path.Address) + if controllerIP == transportAddress && controllerPort == transportServiceID { + if path.State == "live" { + i.logger.Infof("NVMe controller %s for %s:%s reached live state", + path.Name, transportAddress, transportServiceID) + return nil + } + } + } + } + + return fmt.Errorf("NVMe controller for %s:%s is not live yet", transportAddress, transportServiceID) + }, + retry.Attempts(uint(maxAttempts)), + retry.Delay(retryInterval), + retry.DelayType(retry.FixedDelay), + retry.LastErrorOnly(true), + retry.OnRetry(func(n uint, err error) { + i.logger.WithError(err).Warnf( + "Retrying waiting for NVMe controller live: addr=%s:%s attempt=%d/%d next_wait=%s", + transportAddress, transportServiceID, n+1, maxAttempts, retryInterval, + ) + }), + ) + + if err != nil { + return fmt.Errorf("timed out waiting for NVMe controller to become live for %s:%s after %d attempts", + transportAddress, transportServiceID, maxAttempts) + } + + return nil +} + // GetDevice returns the device information func (i *Initiator) LoadNVMeDeviceInfo(transportAddress, transportServiceID, subsystemNQN string) (err error) { if i.hostProc != "" { @@ -671,18 +1032,19 @@ func (i *Initiator) loadNVMeDeviceInfoWithoutLock(transportAddress, transportSer return err } if len(nvmeDevices) != 1 { - return fmt.Errorf("found zero or multiple devices NVMe-oF initiator %s", i.Name) + return fmt.Errorf("found zero or multiple devices NVMe/TCP initiator %s", i.Name) } if len(nvmeDevices[0].Namespaces) != 1 { - return fmt.Errorf("found zero or multiple devices for NVMe-oF initiator %s", i.Name) + return fmt.Errorf("found zero or multiple devices for NVMe/TCP initiator %s", i.Name) } - if i.NVMeTCPInfo.ControllerName != "" && i.NVMeTCPInfo.ControllerName != nvmeDevices[0].Controllers[0].Controller { - return fmt.Errorf("found mismatching between the detected controller name %s and the recorded value %s for NVMe-oF initiator %s", nvmeDevices[0].Controllers[0].Controller, i.NVMeTCPInfo.ControllerName, i.Name) + controller, err := selectControllerForNVMeDevice(nvmeDevices[0], transportAddress, transportServiceID, i.NVMeTCPInfo.ControllerName) + if err != nil { + return errors.Wrapf(err, "failed to select controller for NVMe/TCP initiator %s", i.Name) } - i.NVMeTCPInfo.ControllerName = nvmeDevices[0].Controllers[0].Controller + i.NVMeTCPInfo.ControllerName = controller.Controller i.NVMeTCPInfo.NamespaceName = nvmeDevices[0].Namespaces[0].NameSpace - i.NVMeTCPInfo.TransportAddress, i.NVMeTCPInfo.TransportServiceID = GetIPAndPortFromControllerAddress(nvmeDevices[0].Controllers[0].Address) + i.NVMeTCPInfo.TransportAddress, i.NVMeTCPInfo.TransportServiceID = GetIPAndPortFromControllerAddress(controller.Address) i.logger = i.logger.WithFields(logrus.Fields{ "controllerName": i.NVMeTCPInfo.ControllerName, "namespaceName": i.NVMeTCPInfo.NamespaceName, @@ -693,7 +1055,7 @@ func (i *Initiator) loadNVMeDeviceInfoWithoutLock(transportAddress, transportSer devPath := filepath.Join("/dev", i.NVMeTCPInfo.NamespaceName) dev, err := util.DetectDevice(devPath, i.executor) if err != nil { - return errors.Wrapf(err, "cannot find the device for NVMe-oF initiator %s with namespace name %s", i.Name, i.NVMeTCPInfo.NamespaceName) + return errors.Wrapf(err, "cannot find the device for NVMe/TCP initiator %s with namespace name %s", i.Name, i.NVMeTCPInfo.NamespaceName) } i.dev = &util.LonghornBlockDevice{ @@ -702,6 +1064,33 @@ func (i *Initiator) loadNVMeDeviceInfoWithoutLock(transportAddress, transportSer return nil } +func selectControllerForNVMeDevice(device Device, transportAddress, transportServiceID, recordedControllerName string) (Controller, error) { + if len(device.Controllers) == 0 { + return Controller{}, fmt.Errorf("no NVMe controllers found for subsystem %s", device.SubsystemNQN) + } + + if transportAddress != "" && transportServiceID != "" { + for _, controller := range device.Controllers { + controllerAddress, controllerServiceID := GetIPAndPortFromControllerAddress(controller.Address) + if controllerAddress == transportAddress && controllerServiceID == transportServiceID { + return controller, nil + } + } + } + + if recordedControllerName != "" { + for _, controller := range device.Controllers { + if controller.Controller == recordedControllerName { + return controller, nil + } + } + } + + logrus.Warnf("No NVMe controller matched address %s:%s or recorded name %q for subsystem %s, falling back to first controller %s", + transportAddress, transportServiceID, recordedControllerName, device.SubsystemNQN, device.Controllers[0].Controller) + return device.Controllers[0], nil +} + func (i *Initiator) isNamespaceExist(devices []string) bool { if i.NVMeTCPInfo == nil { return false @@ -741,13 +1130,20 @@ func (i *Initiator) LoadEndpointForNvmeTcpFrontend(dmDeviceIsBusy bool) error { i.logger.Debugf("Skipping endpoint %v loading due to device busy", i.Endpoint) } else { if i.NVMeTCPInfo.NamespaceName != "" && !i.isNamespaceExist(depDevices) { - return fmt.Errorf("detected device %s name mismatching from endpoint %v for NVMe-oF initiator %s", dev.Name, i.Endpoint, i.Name) + return fmt.Errorf("detected device %s name mismatching from endpoint %v for NVMe/TCP initiator %s", dev.Name, i.Endpoint, i.Name) } } - i.dev = &util.LonghornBlockDevice{ + newDev := &util.LonghornBlockDevice{ Export: *dev, } + // Preserve the Source device (e.g. the underlying NVMe namespace) if it + // was already populated from a previous createLinearDmDevice call. + // SyncDmDeviceSize relies on Source.Name to locate the physical device. + if i.dev != nil && i.dev.Source.Name != "" { + newDev.Source = i.dev.Source + } + i.dev = newDev i.isUp = true return nil @@ -815,7 +1211,7 @@ func (i *Initiator) createLinearDmDevice() error { } dmDevPath := getDmDevicePath(i.Name) - if err := validateDiskCreation(dmDevPath, validateDiskCreationTimeout); err != nil { + if err := i.validateDiskCreation(dmDevPath, validateDiskCreationMaxRetries, validateDiskCreationRetryInterval); err != nil { return err } @@ -832,16 +1228,40 @@ func (i *Initiator) createLinearDmDevice() error { return nil } -func validateDiskCreation(path string, timeout int) error { - for i := 0; i < timeout; i++ { - isBlockDev, _ := util.IsBlockDevice(path) - if isBlockDev { +func (i *Initiator) validateDiskCreation(path string, maxRetries int, retryInterval time.Duration) error { + if maxRetries <= 0 { + return fmt.Errorf("maxRetries must be > 0") + } + + err := retry.Do( + func() error { + isBlockDev, err := util.IsBlockDevice(path) + if err != nil { + return err + } + if !isBlockDev { + return errDeviceNotReady + } return nil - } - time.Sleep(time.Second * 1) + }, + retry.Attempts(uint(maxRetries)), + retry.Delay(retryInterval), + retry.DelayType(retry.FixedDelay), + retry.LastErrorOnly(true), + retry.OnRetry(func(n uint, err error) { + i.logger.WithError(err).Warnf( + "Retrying device creation validation: path=%s attempt=%d/%d next_wait=%s", + path, n+1, maxRetries, retryInterval, + ) + }), + ) + + if err != nil { + return fmt.Errorf("failed to validate device %s creation: %w", path, err) } - return fmt.Errorf("failed to validate device %s creation", path) + i.logger.Infof("Device %s is created and ready", path) + return nil } func (i *Initiator) suspendLinearDmDevice(noflush, nolockfs bool) error { @@ -863,6 +1283,57 @@ func (i *Initiator) ReloadDmDevice() (err error) { return i.reloadLinearDmDevice() } +func (i *Initiator) SyncDmDeviceSize(expectedSize uint64) error { + if i.dev == nil || i.dev.Source.Name == "" { + return fmt.Errorf("initiator device source is not initialized") + } + + devPath := fmt.Sprintf("/dev/%s", i.dev.Source.Name) + expectedSectors := int64(expectedSize / DmSectorSize) + + i.logger.Infof("Start reloading dm device %v to expected size %v bytes (%v sectors)", i.Name, expectedSize, expectedSectors) + + var sectors int64 + + // Keep original behavior when maxWaitDeviceRetries <= 0: + // no polling happens, then fall through to the same final check. + if maxWaitDeviceRetries > 0 { + const forceRetryMsg = "force retry" + forceRetryErr := errors.New(forceRetryMsg) + + attempt := 0 + _ = retry.Do( + func() error { + // Extra terminal no-op attempt to preserve original behavior: + // sleep also happens after the last failed polling iteration. + if attempt >= maxWaitDeviceRetries { + return nil + } + attempt++ + + output, err := i.executor.Execute(nil, util.BlockdevBinary, []string{"--getsize", devPath}, types.ExecuteTimeout) + if err == nil { + sectors, _ = strconv.ParseInt(strings.TrimSpace(output), 10, 64) + if sectors >= expectedSectors { + i.logger.Infof("Kernel updated device %v capacity to %v sectors", devPath, sectors) + return nil + } + } + return forceRetryErr + }, + retry.Attempts(uint(maxWaitDeviceRetries+1)), + retry.Delay(waitDeviceInterval), + retry.DelayType(retry.FixedDelay), + retry.LastErrorOnly(true), + ) + } + + if sectors < expectedSectors { + return fmt.Errorf("timeout waiting for device %v to reach expected size %v", devPath, expectedSectors) + } + return i.reloadLinearDmDevice() +} + // IsSuspended checks if the linear dm device is suspended func (i *Initiator) IsSuspended() (bool, error) { devices, err := util.DmsetupInfo(i.Name, i.executor) diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvme.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvme.go index 62fe084eb..f98505a42 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvme.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvme.go @@ -4,7 +4,7 @@ import ( "fmt" "path/filepath" - "github.com/pkg/errors" + "github.com/cockroachdb/errors" "github.com/sirupsen/logrus" commonns "github.com/longhorn/go-common-libs/ns" @@ -62,6 +62,29 @@ func DisconnectTarget(nqn string, executor *commonns.Executor) error { return disconnect(nqn, executor) } +// DisconnectController disconnects a single NVMe controller that +// matches the given NQN, IP, and port. This is used to remove an individual +// multipath path without affecting other controllers for the same subsystem. +// It returns nil if no matching controller is found (already disconnected). +func DisconnectController(nqn, ip, port string, executor *commonns.Executor) error { + subsystems, err := listSubsystems("", executor) + if err != nil { + return errors.Wrap(err, "failed to list subsystems for controller disconnect") + } + for _, sys := range subsystems { + if sys.NQN != nqn { + continue + } + for _, path := range sys.Paths { + controllerIP, controllerPort := GetIPAndPortFromControllerAddress(path.Address) + if controllerIP == ip && controllerPort == port { + return disconnectController(path.Name, executor) + } + } + } + return nil +} + // GetDevices returns all devices func GetDevices(ip, port, nqn string, executor *commonns.Executor) (devices []Device, err error) { defer func() { @@ -148,6 +171,76 @@ func GetDevices(ip, port, nqn string, executor *commonns.Executor) (devices []De } } + if len(res) == 0 { + // NVMe native multipath fallback: when multiple controllers share one + // subsystem NQN, the kernel will only create a single namespace block + // device (e.g. /dev/nvme4n1) under the first controller. Additional + // controllers (e.g. nvme5) added via `nvme connect` do not get their + // own block device because the kernel merges them as extra I/O paths. + // + // The per-device `nvme list-subsys /dev/nvme4n1` only returns the + // path for the controller that owns that block device (nvme4), so the + // primary matching loop above cannot find the new controller. Here we + // query ALL subsystems without a device path filter to discover every + // controller, then map back to the existing namespace block device. + subsystems, err := listSubsystems("", executor) + if err != nil { + return nil, err + } + for _, sys := range subsystems { + if sys.NQN != nqn { + continue + } + pathMatch := false + for _, path := range sys.Paths { + controllerIP, controllerPort := GetIPAndPortFromControllerAddress(path.Address) + if ip != "" && ip != controllerIP { + continue + } + if port != "" && port != controllerPort { + continue + } + pathMatch = true + break + } + if !pathMatch { + continue + } + + // Found a subsystem with a matching controller. Now find the + // namespace block device that belongs to this subsystem from + // the per-device scan we already performed. + for _, d := range devices { + if d.SubsystemNQN != nqn { + continue + } + if len(d.Namespaces) == 0 { + continue + } + // Rebuild this device with ALL controllers from the + // unfiltered subsystem query so the caller can select + // the correct controller. + allControllers := []Controller{} + for _, p := range sys.Paths { + allControllers = append(allControllers, Controller{ + Controller: p.Name, + Transport: p.Transport, + Address: p.Address, + State: p.State, + }) + } + multipathDevice := Device{ + Subsystem: sys.Name, + SubsystemNQN: sys.NQN, + Controllers: allControllers, + Namespaces: d.Namespaces, + } + res = append(res, multipathDevice) + break + } + } + } + if len(res) == 0 { subsystems, err := listSubsystems("", executor) if err != nil { diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvmecli.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvmecli.go index 71ae39b85..8a4e48a23 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvmecli.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/initiator/nvmecli.go @@ -3,12 +3,15 @@ package initiator import ( "encoding/json" "fmt" + "path/filepath" "strconv" "strings" commonns "github.com/longhorn/go-common-libs/ns" "github.com/longhorn/go-spdk-helper/pkg/types" + + spdkutil "github.com/longhorn/go-spdk-helper/pkg/util" ) const ( @@ -233,9 +236,13 @@ func getHostID(executor *commonns.Executor) (string, error) { } func discovery(hostID, hostNQN, ip, port string, executor *commonns.Executor) ([]DiscoveryPageEntry, error) { + ip = spdkutil.NormalizeNvmeAddr(ip) + opts := []string{ "discover", "-t", DefaultTransportType, + // nvme-cli -a accepts bare IPv6 (no brackets). net.SplitHostPort callers + // upstream strip brackets; util.NormalizeNvmeAddr is a safety net. "-a", ip, "-s", port, "-o", "json", @@ -302,6 +309,8 @@ func discovery(hostID, hostNQN, ip, port string, executor *commonns.Executor) ([ } func connect(hostID, hostNQN, nqn, transpotType, ip, port string, executor *commonns.Executor) (string, error) { + ip = spdkutil.NormalizeNvmeAddr(ip) + var err error opts := []string{ @@ -321,6 +330,8 @@ func connect(hostID, hostNQN, nqn, transpotType, ip, port string, executor *comm opts = append(opts, "-q", hostNQN) } if ip != "" { + // nvme-cli -a accepts bare IPv6 (no brackets). net.SplitHostPort callers + // upstream strip brackets; util.NormalizeNvmeAddr is a safety net. opts = append(opts, "-a", ip) } if port != "" { @@ -363,6 +374,19 @@ func disconnect(nqn string, executor *commonns.Executor) error { return err } +// disconnectController disconnects a single NVMe controller by device name +// (e.g. "nvme4"). This removes one multipath path without affecting other +// controllers for the same subsystem NQN. +func disconnectController(controllerName string, executor *commonns.Executor) error { + devPath := filepath.Join("/dev", controllerName) + opts := []string{ + "disconnect", + "--device", devPath, + } + _, err := executor.Execute(nil, nvmeBinary, opts, types.ExecuteTimeout) + return err +} + func extractJSONString(str string) (string, error) { startIndex := strings.Index(str, "{") startIndexBracket := strings.Index(str, "[") @@ -388,8 +412,9 @@ func extractJSONString(str string) (string, error) { return "", fmt.Errorf("invalid JSON string") } -// GetIPAndPortFromControllerAddress returns the IP and port from the controller address +// GetIPAndPortFromControllerAddress returns the IP and port from the controller address. // Input can be either "traddr=10.42.2.18 trsvcid=20006" or "traddr=10.42.2.18,trsvcid=20006" +// for IPv4, or "traddr=fd00::1 trsvcid=20006" for IPv6 (traddr may contain colons). func GetIPAndPortFromControllerAddress(address string) (string, string) { var traddr, trsvcid string @@ -398,7 +423,7 @@ func GetIPAndPortFromControllerAddress(address string) (string, string) { }) for _, part := range parts { - keyVal := strings.Split(part, "=") + keyVal := strings.SplitN(part, "=", 2) if len(keyVal) == 2 { key := strings.TrimSpace(keyVal[0]) value := strings.TrimSpace(keyVal[1]) diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/jsonrpc/client.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/jsonrpc/client.go index f48d3bd8c..167cb55a0 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/jsonrpc/client.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/jsonrpc/client.go @@ -160,7 +160,7 @@ func (c *Client) handleSend(msgWrapper *messageWrapper) { id := c.idCounter if err := c.encoder.Encode(NewMessage(id, msgWrapper.method, msgWrapper.params)); err != nil { - logrus.WithError(err).Errorf("Failed to encode during handleSend") + logrus.WithError(err).Errorf("Failed to encode during handleSend for method %s, params %+v", msgWrapper.method, msgWrapper.params) // In case of the cached error info of the old encoder fails the following response, it's better to recreate the encoder. c.encoder = json.NewEncoder(c.conn) diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/advanced.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/advanced.go index 3c4fce1b3..005abc5ea 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/advanced.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/advanced.go @@ -1,11 +1,15 @@ package client import ( + "net" "path/filepath" + "github.com/sirupsen/logrus" + "github.com/longhorn/go-spdk-helper/pkg/jsonrpc" spdktypes "github.com/longhorn/go-spdk-helper/pkg/spdk/types" + spdkutil "github.com/longhorn/go-spdk-helper/pkg/util" ) // AddDevice adds a device with the given device path, name, and cluster size. @@ -53,27 +57,106 @@ func (c *Client) DeleteDevice(bdevAioName, lvsName string) (err error) { return nil } +// DetectAddressFamily returns the NVMe address family for the given IP. +// Exported so downstream repos (longhorn-spdk-engine) can reuse it. +func DetectAddressFamily(ip string) spdktypes.NvmeAddressFamily { + if ip == "" { + return spdktypes.NvmeAddressFamilyIPv4 + } + + normalized := spdkutil.NormalizeNvmeAddr(ip) + parsedIP := net.ParseIP(normalized) + if parsedIP == nil { + logrus.Warnf("Failed to parse IP %q, defaulting to IPv4", ip) + return spdktypes.NvmeAddressFamilyIPv4 + } + + if parsedIP.To4() == nil { + return spdktypes.NvmeAddressFamilyIPv6 + } + return spdktypes.NvmeAddressFamilyIPv4 +} + // StartExposeBdev exposes the bdev with the given nqn, bdevName, nguid, ip, and port. func (c *Client) StartExposeBdev(nqn, bdevName, nguid, ip, port string) error { + ip = spdkutil.NormalizeNvmeAddr(ip) + + logrus.Infof("Exposing bdev with nqn %v, bdevName %v, nguid %v, ip %v, port %v", nqn, bdevName, nguid, ip, port) + nvmfTransportList, err := c.NvmfGetTransports("", "") if err != nil { return err } if nvmfTransportList != nil && len(nvmfTransportList) == 0 { + logrus.Infof("Creating transport with type %v", spdktypes.NvmeTransportTypeTCP) if _, err := c.NvmfCreateTransport(spdktypes.NvmeTransportTypeTCP); err != nil && !jsonrpc.IsJSONRPCRespErrorTransportTypeAlreadyExists(err) { return err } } + logrus.Infof("Creating subsystem with nqn %v", nqn) if _, err := c.NvmfCreateSubsystem(nqn); err != nil { return err } + logrus.Infof("Adding NVMe namespace with bdev name %v and nguid %v to subsystem with nqn %v", bdevName, nguid, nqn) if _, err := c.NvmfSubsystemAddNs(nqn, bdevName, nguid); err != nil { return err } - if _, err := c.NvmfSubsystemAddListener(nqn, ip, port, spdktypes.NvmeTransportTypeTCP, spdktypes.NvmeAddressFamilyIPv4); err != nil { + adrfam := DetectAddressFamily(ip) + + logrus.Infof("Adding listener with transport address %v, transport service id %v, transport type %v, address family %v to subsystem with nqn %v", ip, port, spdktypes.NvmeTransportTypeTCP, adrfam, nqn) + if _, err := c.NvmfSubsystemAddListener(nqn, ip, port, spdktypes.NvmeTransportTypeTCP, adrfam); err != nil { + return err + } + + return nil +} + +// StartExposeBdevWithANAState exposes the bdev with the given nqn, bdevName, +// nguid, nsUUID, ip, port, initial ANA state, and optional CNTLID range. +// nsUUID sets a stable namespace UUID so the Linux kernel can aggregate +// controllers into the same NVMe multipath group. minCntlid/maxCntlid assign +// a unique controller-ID range per engine to avoid "Duplicate cntlid" errors +// when multiple targets share one subsystem NQN. Pass 0 for defaults. +func (c *Client) StartExposeBdevWithANAState(nqn, bdevName, nguid, nsUUID, ip, port string, anaState spdktypes.NvmfSubsystemListenerAnaState, minCntlid, maxCntlid uint16) error { + ip = spdkutil.NormalizeNvmeAddr(ip) + + logrus.Infof("Exposing bdev with nqn %v, bdevName %v, nguid %v, nsUUID %v, ip %v, port %v, anaState %v, minCntlid %v, maxCntlid %v", + nqn, bdevName, nguid, nsUUID, ip, port, anaState, minCntlid, maxCntlid) + + nvmfTransportList, err := c.NvmfGetTransports("", "") + if err != nil { + return err + } + if nvmfTransportList != nil && len(nvmfTransportList) == 0 { + logrus.Infof("Creating transport with type %v", spdktypes.NvmeTransportTypeTCP) + if _, err := c.NvmfCreateTransport(spdktypes.NvmeTransportTypeTCP); err != nil && !jsonrpc.IsJSONRPCRespErrorTransportTypeAlreadyExists(err) { + return err + } + } + + logrus.Infof("Creating subsystem with nqn %v, minCntlid %v, maxCntlid %v", nqn, minCntlid, maxCntlid) + if _, err := c.NvmfCreateSubsystemWithCntlid(nqn, minCntlid, maxCntlid); err != nil { + return err + } + + logrus.Infof("Adding NVMe namespace with bdev name %v, nguid %v, uuid %v to subsystem with nqn %v", bdevName, nguid, nsUUID, nqn) + if _, err := c.NvmfSubsystemAddNsWithUUID(nqn, bdevName, nguid, nsUUID); err != nil { + return err + } + + adrfam := DetectAddressFamily(ip) + + logrus.Infof("Adding listener with transport address %v, transport service id %v, transport type %v, address family %v to subsystem with nqn %v", ip, port, spdktypes.NvmeTransportTypeTCP, adrfam, nqn) + if _, err := c.NvmfSubsystemAddListener(nqn, ip, port, spdktypes.NvmeTransportTypeTCP, adrfam); err != nil { + return err + } + + logrus.Infof("Setting listener ANA state to %v for subsystem with nqn %v", anaState, nqn) + if _, err := c.NvmfSubsystemListenerSetANAState(nqn, ip, port, spdktypes.NvmeTransportTypeTCP, + adrfam, anaState, spdktypes.DefaultNvmfANAGroupID); err != nil { return err } @@ -82,8 +165,9 @@ func (c *Client) StartExposeBdev(nqn, bdevName, nguid, ip, port string) error { // StopExposeBdev stops exposing the bdev with the given nqn. func (c *Client) StopExposeBdev(nqn string) error { - var subsystem *spdktypes.NvmfSubsystem + logrus.Infof("Stopping exposing bdev with nqn %v", nqn) + var subsystem *spdktypes.NvmfSubsystem subsystemList, err := c.NvmfGetSubsystems("", "") if err != nil { return err @@ -104,17 +188,20 @@ func (c *Client) StopExposeBdev(nqn string) error { return err } for _, l := range listenerList { + logrus.Infof("Removing listener with transport address %v, transport service id %v, transport type %v, address family %v", l.Address.Traddr, l.Address.Trsvcid, l.Address.Trtype, l.Address.Adrfam) if _, err := c.NvmfSubsystemRemoveListener(nqn, l.Address.Traddr, l.Address.Trsvcid, l.Address.Trtype, l.Address.Adrfam); err != nil { return err } } for _, ns := range subsystem.Namespaces { + logrus.Infof("Removing namespace with NSID %v", ns.Nsid) if _, err := c.NvmfSubsystemRemoveNs(nqn, ns.Nsid); err != nil { return err } } + logrus.Infof("Deleting subsystem with nqn %v", nqn) if _, err := c.NvmfDeleteSubsystem(nqn, ""); err != nil { return err } diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/basic.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/basic.go index 51c90bc9d..014a7bd2d 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/basic.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/basic.go @@ -6,7 +6,7 @@ import ( "strconv" "strings" - "github.com/pkg/errors" + "github.com/cockroachdb/errors" spdktypes "github.com/longhorn/go-spdk-helper/pkg/spdk/types" ) @@ -49,7 +49,8 @@ func (c *Client) BdevAioCreate(filePath, name string, blockSize uint64) (bdevNam BlockSize: blockSize, } - cmdOutput, err := c.jsonCli.SendCommand("bdev_aio_create", req) + // Long blob recovery time might be needed if the spdk_tgt is not shutdown gracefully. + cmdOutput, err := c.jsonCli.SendCommandWithLongTimeout("bdev_aio_create", req) if err != nil { return "", err } @@ -566,6 +567,54 @@ func (c *Client) BdevLvolCheckShallowCopy(operationId uint32) (*spdktypes.Shallo return &shallowCopyStatus, nil } +// BdevLvolStartDeepCopy start a deep copy of lvol over a given bdev. +// Only clusters allocated to the lvol or the lvol's ancestors will be written on the bdev. +// Returns the operation ID needed to check the deep copy status with BdevLvolCheckDeepCopy. +// +// "srcLvolName": Required. UUID or alias of lvol to create a copy from. +// +// "dstBdevName": Required. Name of the bdev that acts as destination for the copy. +func (c *Client) BdevLvolStartDeepCopy(srcLvolName, dstBdevName string) (operationId uint32, err error) { + req := spdktypes.BdevLvolDeepCopyRequest{ + SrcLvolName: srcLvolName, + DstBdevName: dstBdevName, + } + + cmdOutput, err := c.jsonCli.SendCommand("bdev_lvol_start_deep_copy", req) + if err != nil { + return 0, err + } + + deepCopy := spdktypes.DeepCopy{} + err = json.Unmarshal(cmdOutput, &deepCopy) + if err != nil { + return 0, err + } + + return deepCopy.OperationId, nil +} + +// BdevLvolCheckDeepCopy check the status of a deep copy previously started. +// +// "operationId": Required. Operation ID of the deep copy to check. +func (c *Client) BdevLvolCheckDeepCopy(operationId uint32) (*spdktypes.DeepCopyStatus, error) { + deepCopy := spdktypes.DeepCopy{ + OperationId: operationId, + } + cmdOutput, err := c.jsonCli.SendCommand("bdev_lvol_check_deep_copy", deepCopy) + if err != nil { + return nil, err + } + + var deepCopyStatus spdktypes.DeepCopyStatus + err = json.Unmarshal(cmdOutput, &deepCopyStatus) + if err != nil { + return nil, err + } + + return &deepCopyStatus, nil +} + // BdevLvolGetFragmap gets fragmap of the specific segment of the logical volume. // // "name": Required. UUID or alias of the logical volume. @@ -719,14 +768,16 @@ func (c *Client) BdevLvolRename(oldName, newName string) (renamed bool, err erro // BdevRaidCreate constructs a new RAID bdev. // -// "name": Required. a RAID bdev name rather than an alias or a UUID. +// "name": Required. a RAID bdev name rather than an alias or a UUID. // -// "raidLevel": Required. RAID level. It can be "0"/"raid0", "1"/"raid1", "5f"/"raid5f", or "concat". +// "raidLevel": Required. RAID level. It can be "0"/"raid0", "1"/"raid1", "5f"/"raid5f", or "concat". // -// "stripSizeKb": Required. Strip size in KB. It's valid for raid0 and raid5f only. For other raid levels, this would be modified to 0. +// "stripSizeKb": Required. Strip size in KB. It's valid for raid0 and raid5f only. For other raid levels, this would be modified to 0. // -// "baseBdevs": Required. The bdev list used as the underlying disk of the RAID. -func (c *Client) BdevRaidCreate(name string, raidLevel spdktypes.BdevRaidLevel, stripSizeKb uint32, baseBdevs []string) (created bool, err error) { +// "baseBdevs": Required. The bdev list used as the underlying disk of the RAID. +// +// "uuid": Optional. Create the raid bdev with specific uuid +func (c *Client) BdevRaidCreate(name string, raidLevel spdktypes.BdevRaidLevel, stripSizeKb uint32, baseBdevs []string, uuid string) (created bool, err error) { if raidLevel != spdktypes.BdevRaidLevel0 && raidLevel != spdktypes.BdevRaidLevelRaid0 && raidLevel != spdktypes.BdevRaidLevel5f && raidLevel != spdktypes.BdevRaidLevelRaid5f { stripSizeKb = 0 } @@ -737,6 +788,10 @@ func (c *Client) BdevRaidCreate(name string, raidLevel spdktypes.BdevRaidLevel, BaseBdevs: baseBdevs, } + if uuid != "" { + req.UUID = uuid + } + cmdOutput, err := c.jsonCli.SendCommand("bdev_raid_create", req) if err != nil { return false, err @@ -919,7 +974,8 @@ func (c *Client) BdevNvmeAttachController(name, subnqn, traddr, trsvcid string, Multipath: multipath, } - cmdOutput, err := c.jsonCli.SendCommand("bdev_nvme_attach_controller", req) + // Long blob recovery time might be needed if the spdk_tgt is not shutdown gracefully. + cmdOutput, err := c.jsonCli.SendCommandWithLongTimeout("bdev_nvme_attach_controller", req) if err != nil { return nil, err } @@ -959,6 +1015,35 @@ func (c *Client) BdevNvmeGetControllers(name string) (controllerInfoList []spdkt return controllerInfoList, json.Unmarshal(cmdOutput, &controllerInfoList) } +// BdevNvmeGetControllerHealthInfo retrieves health information for a specified +// NVMe bdev controller. +// +// "name": Name of the NVMe controller +func (c *Client) BdevNvmeGetControllerHealthInfo(name string) (healthInfo spdktypes.BdevNvmeControllerHealthInfo, err error) { + req := spdktypes.BdevNvmeGetControllerHealthInfoRequest{ + Name: name, + } + + cmdOutput, err := c.jsonCli.SendCommand("bdev_nvme_get_controller_health_info", req) + if err != nil { + return healthInfo, err + } + + if err := json.Unmarshal(cmdOutput, &healthInfo); err != nil { + return healthInfo, err + } + + // Normalize temperature: SPDK writes temperature as unsigned (Kelvin-273). + // When controller reports invalid/0 temperature in Kelvin, subtracting 273 + // on uint64 underflows and produces a huge number (~2^64 - 273), which is + // meaningless in Celsius. Clamp such outliers to -1 to indicate unknown. + if healthInfo.TemperatureCelsius > 255 { // Values >255°C are invalid for a uint8 S.M.A.R.T. temperature. + healthInfo.TemperatureCelsius = spdktypes.UnknownTemperature + } + + return healthInfo, nil +} + // BdevNvmeSetOptions sets global parameters for all bdev NVMe. // This RPC may only be called before SPDK subsystems have been initialized or any bdev NVMe // has been created. @@ -1079,6 +1164,29 @@ func (c *Client) NvmfCreateSubsystem(nqn string) (created bool, err error) { return created, json.Unmarshal(cmdOutput, &created) } +// NvmfCreateSubsystemWithCntlid constructs an NVMe over Fabrics target +// subsystem with an optional CNTLID range and ANA reporting enabled. +func (c *Client) NvmfCreateSubsystemWithCntlid(nqn string, minCntlid, maxCntlid uint16) (created bool, err error) { + req := spdktypes.NvmfCreateSubsystemRequest{ + Nqn: nqn, + AllowAnyHost: true, + AnaReporting: true, + } + if minCntlid > 0 { + req.MinCntlid = minCntlid + } + if maxCntlid > 0 { + req.MaxCntlid = maxCntlid + } + + cmdOutput, err := c.jsonCli.SendCommand("nvmf_create_subsystem", req) + if err != nil { + return false, err + } + + return created, json.Unmarshal(cmdOutput, &created) +} + // NvmfDeleteSubsystem constructs an NVMe over Fabrics target subsystem.. // // "nqn": Required. Subsystem NQN. @@ -1125,11 +1233,18 @@ func (c *Client) NvmfGetSubsystems(nqn, tgtName string) (subsystemList []spdktyp // // "nguid": Optional. Namespace globally unique identifier. func (c *Client) NvmfSubsystemAddNs(nqn, bdevName, nguid string) (nsid uint32, err error) { + return c.NvmfSubsystemAddNsWithUUID(nqn, bdevName, nguid, "") +} + +// NvmfSubsystemAddNsWithUUID adds a namespace with an optional stable UUID for +// NVMe multipath aggregation. +func (c *Client) NvmfSubsystemAddNsWithUUID(nqn, bdevName, nguid, nsUUID string) (nsid uint32, err error) { req := spdktypes.NvmfSubsystemAddNsRequest{ Nqn: nqn, Namespace: spdktypes.NvmfSubsystemNamespace{ BdevName: bdevName, Nguid: nguid, + UUID: nsUUID, }, } @@ -1258,6 +1373,47 @@ func (c *Client) NvmfSubsystemRemoveListener(nqn, traddr, trsvcid string, trtype return deleted, json.Unmarshal(cmdOutput, &deleted) } +// NvmfSubsystemListenerSetANAState sets the ANA state of a subsystem listener. +// +// "nqn": Required. Subsystem NQN. +// +// "traddr": Required. NVMe-oF target address: an ip or BDF. +// +// "trsvcid": Required. NVMe-oF target trsvcid: a port number. +// +// "trtype": Required. NVMe-oF target trtype. +// +// "adrfam": Required. Address family. +// +// "anaState": Required. Listener ANA state. +// +// "anaGrpid": Optional. ANA group ID. +func (c *Client) NvmfSubsystemListenerSetANAState(nqn, traddr, trsvcid string, trtype spdktypes.NvmeTransportType, + adrfam spdktypes.NvmeAddressFamily, anaState spdktypes.NvmfSubsystemListenerAnaState, anaGrpid uint32) (result bool, err error) { + if anaGrpid == 0 { + anaGrpid = spdktypes.DefaultNvmfANAGroupID + } + + req := spdktypes.NvmfSubsystemListenerSetANAStateRequest{ + Nqn: nqn, + ListenAddress: spdktypes.NvmfSubsystemListenAddress{ + Traddr: traddr, + Trsvcid: trsvcid, + Trtype: trtype, + Adrfam: adrfam, + }, + AnaState: anaState, + AnaGrpid: anaGrpid, + } + + cmdOutput, err := c.jsonCli.SendCommand("nvmf_subsystem_listener_set_ana_state", req) + if err != nil { + return false, err + } + + return result, json.Unmarshal(cmdOutput, &result) +} + // NvmfSubsystemGetListeners lists all listeners for the specified NVMe-oF target subsystem. // // "nqn": Required. Subsystem NQN. @@ -1398,7 +1554,8 @@ func (c *Client) BdevVirtioAttachController(name, trtype, traddr, devType string DevType: devType, } - cmdOutput, err := c.jsonCli.SendCommand("bdev_virtio_attach_controller", req) + // Long blob recovery time might be needed if the spdk_tgt is not shutdown gracefully. + cmdOutput, err := c.jsonCli.SendCommandWithLongTimeout("bdev_virtio_attach_controller", req) if err != nil { return nil, err } @@ -1487,3 +1644,41 @@ func (c *Client) BdevSetQosLimit(bdevName string, rwIOsPerSec, rwMBPerSec, rMBPe return nil } + +// SpdkKillInstance sends a signal to the application. +func (c *Client) SpdkKillInstance(sig string) (result bool, err error) { + req := spdktypes.SpdkKillInstanceRequest{ + SigName: sig, + } + + cmdOutput, err := c.jsonCli.SendCommand("spdk_kill_instance", req) + if err != nil { + return false, err + } + + return result, json.Unmarshal(cmdOutput, &result) +} + +// BdevNvmeSetHotplug enables or disables the NVMe hotplug poller. +// +// "enable": true to enable hotplug, false to disable. +// +// "periodUs": Polling period in microseconds. +func (c *Client) BdevNvmeSetHotplug(enable bool, periodUs uint64) (bool, error) { + params := map[string]interface{}{ + "enable": enable, + "period_us": periodUs, + } + + var result bool + cmdOutput, err := c.jsonCli.SendCommand("bdev_nvme_set_hotplug", params) + if err != nil { + return false, err + } + + if err := json.Unmarshal(cmdOutput, &result); err != nil { + return false, err + } + + return result, nil +} diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/client.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/client.go index a6bef357f..63c12917b 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/client.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/client.go @@ -4,7 +4,7 @@ import ( "context" "net" - "github.com/pkg/errors" + "github.com/cockroachdb/errors" "github.com/longhorn/go-spdk-helper/pkg/jsonrpc" "github.com/longhorn/go-spdk-helper/pkg/types" diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/ublk.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/ublk.go index 38031719c..92a531101 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/ublk.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/client/ublk.go @@ -4,8 +4,6 @@ import ( "encoding/json" "fmt" - "github.com/pkg/errors" - spdktypes "github.com/longhorn/go-spdk-helper/pkg/spdk/types" ) @@ -14,17 +12,17 @@ func (c *Client) UblkCreateTarget(cpumask string, disableUserCopy bool) (err err Cpumask: cpumask, DisableUserCopy: disableUserCopy, } - cmdOutput, err := c.jsonCli.SendCommand("ublk_create_target", req) + _, err = c.jsonCli.SendCommand("ublk_create_target", req) if err != nil { - return errors.Wrapf(err, "failed to UblkCreateTarget: %v", string(cmdOutput)) + return err } return nil } func (c *Client) UblkDestroyTarget() (err error) { - cmdOutput, err := c.jsonCli.SendCommand("ublk_destroy_target", struct{}{}) + _, err = c.jsonCli.SendCommand("ublk_destroy_target", struct{}{}) if err != nil { - return errors.Wrapf(err, "failed to UblkDestroyTarget: %v", string(cmdOutput)) + return err } return nil } @@ -36,7 +34,7 @@ func (c *Client) UblkGetDisks(ublkID int32) (ublkDeviceList []spdktypes.UblkDevi } cmdOutput, err := c.jsonCli.SendCommand("ublk_get_disks", req) if err != nil { - return nil, errors.Wrapf(err, "failed to create UblkGetDisks: %v", string(cmdOutput)) + return nil, err } return ublkDeviceList, json.Unmarshal(cmdOutput, &ublkDeviceList) } @@ -48,9 +46,9 @@ func (c *Client) UblkStartDisk(bdevName string, ublkId, queueDepth, numQueues in QueueDepth: queueDepth, NumQueues: numQueues, } - cmdOutput, err := c.jsonCli.SendCommand("ublk_start_disk", req) + _, err = c.jsonCli.SendCommand("ublk_start_disk", req) if err != nil { - return errors.Wrapf(err, "failed to UblkStartDisk: %v", string(cmdOutput)) + return err } return nil } @@ -60,9 +58,9 @@ func (c *Client) UblkRecoverDisk(bdevName string, ublkId int32) (err error) { BdevName: bdevName, UblkId: ublkId, } - cmdOutput, err := c.jsonCli.SendCommand("ublk_recover_disk", req) + _, err = c.jsonCli.SendCommand("ublk_recover_disk", req) if err != nil { - return errors.Wrapf(err, "failed to UblkRecoverDisk: %v", string(cmdOutput)) + return err } return nil } @@ -71,9 +69,9 @@ func (c *Client) UblkStopDisk(ublkId int32) (err error) { req := spdktypes.UblkStopDiskRequest{ UblkId: ublkId, } - cmdOutput, err := c.jsonCli.SendCommand("ublk_stop_disk", req) + _, err = c.jsonCli.SendCommand("ublk_stop_disk", req) if err != nil { - return errors.Wrapf(err, "failed to UblkStopDisk: %v", string(cmdOutput)) + return err } return nil } diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/setup/setup.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/setup/setup.go index b6e58dd6f..a67d1667a 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/setup/setup.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/setup/setup.go @@ -5,7 +5,7 @@ import ( "fmt" "strings" - "github.com/pkg/errors" + "github.com/cockroachdb/errors" commonns "github.com/longhorn/go-common-libs/ns" diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/bdev.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/bdev.go index 67c535a7a..1fcaa40b0 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/bdev.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/bdev.go @@ -162,3 +162,7 @@ type BdevStats struct { IoTime uint64 `json:"io_time"` WeightedIoTime uint64 `json:"weighted_io_time"` } + +type SpdkKillInstanceRequest struct { + SigName string `json:"sig_name"` +} diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/lvol.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/lvol.go index b0c4aed35..c4483c33e 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/lvol.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/lvol.go @@ -54,6 +54,17 @@ type ShallowCopyStatus struct { Error string `json:"error,omitempty"` } +type DeepCopy struct { + OperationId uint32 `json:"operation_id"` +} + +type DeepCopyStatus struct { + State string `json:"state"` + ProcessedClusters uint64 `json:"processed_clusters"` + TotalClusters uint64 `json:"total_clusters"` + Error string `json:"error,omitempty"` +} + type BdevLvolCreateLvstoreRequest struct { BdevName string `json:"bdev_name"` LvsName string `json:"lvs_name"` @@ -156,6 +167,11 @@ type BdevLvolRangeShallowCopyRequest struct { Clusters []uint64 `json:"clusters"` } +type BdevLvolDeepCopyRequest struct { + SrcLvolName string `json:"src_lvol_name"` + DstBdevName string `json:"dst_bdev_name"` +} + type BdevLvolGetFragmapRequest struct { Name string `json:"name"` Offset uint64 `json:"offset"` diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvme.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvme.go index cdc80d5e4..c452ccc52 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvme.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvme.go @@ -132,29 +132,42 @@ type BdevNvmeGetControllersRequest struct { Name string `json:"name,omitempty"` } -// type BdevNvmeControllerHealthInfo struct { -// ModelNumber string `json:"model_number"` -// SerialNumber string `json:"serial_number"` -// FirmwareRevision string `json:"firmware_revision"` -// Traddr string `json:"traddr"` -// TemperatureCelsius uint64 `json:"temperature_celsius"` -// AvailableSparePercentage uint64 `json:"available_spare_percentage"` -// AvailableSpareThresholdPercentage uint64 `json:"available_spare_threshold_percentage"` -// PercentageUsed uint64 `json:"percentage_used"` -// DataUnitsRead uint128 `json:"data_units_read"` -// DataUnitsWritten uint128 `json:"data_units_written"` -// HostReadCommands uint128 `json:"host_read_commands"` -// HostWriteCommands uint128 `json:"host_write_commands"` -// ControllerBusyTime uint128 `json:"controller_busy_time"` -// PowerCycles uint128 `json:"power_cycles"` -// PowerOnHours uint128 `json:"power_on_hours"` -// UnsafeShutdowns uint128 `json:"unsafe_shutdowns"` -// MediaErrors uint128 `json:"media_errors"` -// NumErrLogEntries uint128 `json:"num_err_log_entries"` -// WarningTemperatureTimeMinutes uint64 `json:"warning_temperature_time_minutes"` -// CriticalCompositeTemperatureTimeMinutes uint64 `json:"critical_composite_temperature_time_minutes"` -// } -// -// type BdevNvmeGetControllerHealthInfoRequest struct { -// Name string `json:"name"` -// } +// UnknownTemperature represents an unknown/invalid NVMe temperature reading (in Celsius). +// SPDK may emit an underflowed unsigned value when converting Kelvin to Celsius; map such +// outliers to this sentinel at the client layer. +const UnknownTemperature float64 = -1 + +// BdevNvmeControllerHealthInfo represents the response of bdev_nvme_get_controller_health_info. +type BdevNvmeControllerHealthInfo struct { + ModelNumber string `json:"model_number"` + SerialNumber string `json:"serial_number"` + FirmwareRevision string `json:"firmware_revision"` + Traddr string `json:"traddr"` + CriticalWarning uint32 `json:"critical_warning"` + + // TemperatureCelsius can sometimes be reported by SPDK as a wrapped 64-bit sentinel + // value (e.g., 2^64 - 273) when temperature is invalid. Use float64 to avoid + // unmarshal errors on oversized integers and let callers interpret outliers. + TemperatureCelsius float64 `json:"temperature_celsius"` + + AvailableSparePercentage uint32 `json:"available_spare_percentage"` + AvailableSpareThresholdPercentage uint32 `json:"available_spare_threshold_percentage"` + PercentageUsed uint32 `json:"percentage_used"` + DataUnitsRead uint64 `json:"data_units_read"` + DataUnitsWritten uint64 `json:"data_units_written"` + HostReadCommands uint64 `json:"host_read_commands"` + HostWriteCommands uint64 `json:"host_write_commands"` + ControllerBusyTime uint64 `json:"controller_busy_time"` + PowerCycles uint64 `json:"power_cycles"` + PowerOnHours uint64 `json:"power_on_hours"` + UnsafeShutdowns uint64 `json:"unsafe_shutdowns"` + MediaErrors uint64 `json:"media_errors"` + NumErrLogEntries uint64 `json:"num_err_log_entries"` + WarningTemperatureTimeMinutes uint64 `json:"warning_temperature_time_minutes"` + CriticalCompositeTemperatureTimeMinutes uint64 `json:"critical_composite_temperature_time_minutes"` +} + +// BdevNvmeGetControllerHealthInfoRequest is the request for fetching controller health. +type BdevNvmeGetControllerHealthInfoRequest struct { + Name string `json:"name"` +} diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvmf.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvmf.go index ad2a2296e..0edabba39 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvmf.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/nvmf.go @@ -1,5 +1,36 @@ package types +import ( + "encoding/json" + "fmt" + "strconv" +) + +type NvmfANAGroupID string + +const DefaultNvmfANAGroupID uint32 = 1 + +func (groupID *NvmfANAGroupID) UnmarshalJSON(data []byte) error { + if string(data) == "null" { + *groupID = "" + return nil + } + + var stringValue string + if err := json.Unmarshal(data, &stringValue); err == nil { + *groupID = NvmfANAGroupID(stringValue) + return nil + } + + var numericValue uint32 + if err := json.Unmarshal(data, &numericValue); err == nil { + *groupID = NvmfANAGroupID(strconv.FormatUint(uint64(numericValue), 10)) + return nil + } + + return fmt.Errorf("failed to unmarshal ANA group ID %s", string(data)) +} + type NvmfCreateTransportRequest struct { Trtype NvmeTransportType `json:"trtype"` } @@ -80,6 +111,34 @@ type NvmfSubsystemNamespace struct { PtplFile string `json:"ptpl_file,omitempty"` } +// UnmarshalJSON handles SPDK returning anagrpid as either a string or a number. +func (ns *NvmfSubsystemNamespace) UnmarshalJSON(data []byte) error { + type Alias NvmfSubsystemNamespace + aux := &struct { + Anagrpid json.RawMessage `json:"anagrpid,omitempty"` + *Alias + }{ + Alias: (*Alias)(ns), + } + if err := json.Unmarshal(data, aux); err != nil { + return err + } + if len(aux.Anagrpid) > 0 && string(aux.Anagrpid) != "null" { + var s string + if err := json.Unmarshal(aux.Anagrpid, &s); err == nil { + ns.Anagrpid = s + } else { + var n uint32 + if err := json.Unmarshal(aux.Anagrpid, &n); err == nil { + ns.Anagrpid = strconv.FormatUint(uint64(n), 10) + } else { + return fmt.Errorf("failed to unmarshal anagrpid: %s", string(aux.Anagrpid)) + } + } + } + return nil +} + type NvmfSubsystemHost struct { Nqn string `json:"nqn"` } @@ -110,6 +169,15 @@ type NvmfSubsystemRemoveListenerRequest struct { TgtName string `json:"tgt_name,omitempty"` } +type NvmfSubsystemListenerSetANAStateRequest struct { + Nqn string `json:"nqn"` + ListenAddress NvmfSubsystemListenAddress `json:"listen_address"` + AnaState NvmfSubsystemListenerAnaState `json:"ana_state"` + AnaGrpid uint32 `json:"anagrpid,omitempty"` + + TgtName string `json:"tgt_name,omitempty"` +} + type NvmfSubsystemGetListenersRequest struct { Nqn string `json:"nqn"` @@ -119,11 +187,16 @@ type NvmfSubsystemGetListenersRequest struct { type NvmfSubsystemListenerAnaState string const ( - NvmfSubsystemListenerAnaStateOptimized = "optimized" - NvmfSubsystemListenerAnaStateNonOptimized = "non-optimized" - NvmfSubsystemListenerAnaStateInaccessible = "Inaccessible" - NvmfSubsystemListenerAnaStatePersistentLoss = "persistent-loss" - NvmfSubsystemListenerAnaStateChange = "change" + NvmfSubsystemListenerAnaStateOptimized = "optimized" + NvmfSubsystemListenerAnaStateNonOptimized = "non_optimized" + NvmfSubsystemListenerAnaStateInaccessible = "inaccessible" + + // Deprecated: NvmfSubsystemListenerAnaStatePersistentLoss is kept for + // backward compatibility with older Longhorn versions that reference it. + NvmfSubsystemListenerAnaStatePersistentLoss = "persistent_loss" + // Deprecated: NvmfSubsystemListenerAnaStateChange is kept for backward + // compatibility with older Longhorn versions that reference it. + NvmfSubsystemListenerAnaStateChange = "change" ) type NvmfSubsystemListener struct { diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/raid.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/raid.go index 264592947..1039d2be9 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/raid.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/spdk/types/raid.go @@ -37,6 +37,7 @@ type BdevRaidCreateRequest struct { RaidLevel BdevRaidLevel `json:"raid_level"` StripSizeKb uint32 `json:"strip_size_kb"` BaseBdevs []string `json:"base_bdevs"` + UUID string `json:"uuid,omitempty"` } type BdevRaidDeleteRequest struct { diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/util/device.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/util/device.go index 4264fcc43..2645b8e19 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/util/device.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/util/device.go @@ -10,7 +10,7 @@ import ( "strings" "time" - "github.com/pkg/errors" + "github.com/cockroachdb/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -21,6 +21,7 @@ import ( const ( lsblkBinary = "lsblk" + blkidBinary = "blkid" BlockdevBinary = "blockdev" ) @@ -274,3 +275,32 @@ func IsBlockDevice(path string) (bool, error) { return (st.Mode & unix.S_IFMT) == unix.S_IFBLK, nil } + +// hasFilesystem checks if the device has a filesystem using `blkid -s TYPE` +func hasFilesystem(devPath string, executor *commonns.Executor) bool { + opts := []string{"-s", "TYPE", "-o", "value", devPath} + output, err := executor.Execute(nil, blkidBinary, opts, types.ExecuteTimeout) + if err != nil { + // blkid failed = no filesystem detected + return false + } + return len(strings.TrimSpace(output)) > 0 +} + +// hasPartitionTable checks if the device has a partition table using `blkid -s PTTYPE` +func hasPartitionTable(devPath string, executor *commonns.Executor) bool { + opts := []string{"-s", "PTTYPE", "-o", "value", devPath} + output, err := executor.Execute(nil, blkidBinary, opts, types.ExecuteTimeout) + if err != nil { + // blkid failed = no partition table detected + return false + } + return len(strings.TrimSpace(output)) > 0 +} + +// IsBlockDeviceInUse returns true if the given block device has a filesystem +// or partition table detected by blkid, indicating that the device is already +// in use or contains existing data. +func IsBlockDeviceInUse(devPath string, executor *commonns.Executor) bool { + return hasFilesystem(devPath, executor) || hasPartitionTable(devPath, executor) +} diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/util/dmsetup.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/util/dmsetup.go index 6f57fb967..9cc216b54 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/util/dmsetup.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/util/dmsetup.go @@ -3,6 +3,7 @@ package util import ( "fmt" "regexp" + "strconv" "strings" commonns "github.com/longhorn/go-common-libs/ns" @@ -85,17 +86,51 @@ func DmsetupDeps(dmDeviceName string, executor *commonns.Executor) ([]string, er return nil, err } - return parseDependentDevicesFromString(outputStr), nil + knownDevices, err := GetKnownDevices(executor) + if err != nil { + return nil, err + } + + return parseDependentDevicesFromString(outputStr, knownDevices), nil } -func parseDependentDevicesFromString(str string) []string { - re := regexp.MustCompile(`\(([\w-]+)\)`) +func parseDependentDevicesFromString(str string, knownDevices map[string]*LonghornBlockDevice) []string { + re := regexp.MustCompile(`\(([^)]+)\)`) matches := re.FindAllStringSubmatch(str, -1) devices := make([]string, 0, len(matches)) for _, match := range matches { - devices = append(devices, match[1]) + dep := strings.TrimSpace(match[1]) + if dep == "" { + continue + } + + if !strings.Contains(dep, ",") { + devices = append(devices, dep) + continue + } + + majorMinor := strings.Split(dep, ",") + if len(majorMinor) != 2 { + continue + } + + major, errMajor := strconv.Atoi(strings.TrimSpace(majorMinor[0])) + minor, errMinor := strconv.Atoi(strings.TrimSpace(majorMinor[1])) + if errMajor != nil || errMinor != nil { + continue + } + + for _, device := range knownDevices { + if device == nil { + continue + } + if device.Source.Major == major && device.Source.Minor == minor { + devices = append(devices, device.Source.Name) + break + } + } } return devices diff --git a/vendor/github.com/longhorn/go-spdk-helper/pkg/util/nvme.go b/vendor/github.com/longhorn/go-spdk-helper/pkg/util/nvme.go index b851fd6c0..74192e854 100644 --- a/vendor/github.com/longhorn/go-spdk-helper/pkg/util/nvme.go +++ b/vendor/github.com/longhorn/go-spdk-helper/pkg/util/nvme.go @@ -4,6 +4,7 @@ import ( "fmt" "path/filepath" "regexp" + "strings" ) const ( @@ -29,3 +30,13 @@ func GetNvmeControllerNameFromNamespaceName(nsName string) string { reg := regexp.MustCompile(`([^"]*)n\d+$`) return reg.ReplaceAllString(nsName, "${1}") } + +// NormalizeNvmeAddr strips surrounding brackets from an IPv6 address if present. +// Both nvme-cli (-a flag) and SPDK expect bare IPv6 (e.g., "fd00::1" not "[fd00::1]"). +func NormalizeNvmeAddr(ip string) string { + ip = strings.TrimSpace(ip) + if strings.HasPrefix(ip, "[") && strings.HasSuffix(ip, "]") { + ip = ip[1 : len(ip)-1] + } + return ip +} diff --git a/vendor/k8s.io/apimachinery/pkg/util/version/doc.go b/vendor/k8s.io/apimachinery/pkg/util/version/doc.go new file mode 100644 index 000000000..da88813da --- /dev/null +++ b/vendor/k8s.io/apimachinery/pkg/util/version/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package version provides utilities for version number comparisons +package version diff --git a/vendor/k8s.io/apimachinery/pkg/util/version/version.go b/vendor/k8s.io/apimachinery/pkg/util/version/version.go new file mode 100644 index 000000000..72c0769e6 --- /dev/null +++ b/vendor/k8s.io/apimachinery/pkg/util/version/version.go @@ -0,0 +1,484 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package version + +import ( + "bytes" + "errors" + "fmt" + "regexp" + "strconv" + "strings" + + apimachineryversion "k8s.io/apimachinery/pkg/version" +) + +// Version is an opaque representation of a version number +type Version struct { + components []uint + semver bool + preRelease string + buildMetadata string +} + +var ( + // versionMatchRE splits a version string into numeric and "extra" parts + versionMatchRE = regexp.MustCompile(`^\s*v?([0-9]+(?:\.[0-9]+)*)(.*)*$`) + // extraMatchRE splits the "extra" part of versionMatchRE into semver pre-release and build metadata; it does not validate the "no leading zeroes" constraint for pre-release + extraMatchRE = regexp.MustCompile(`^(?:-([0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+([0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?\s*$`) +) + +func parse(str string, semver bool) (*Version, error) { + parts := versionMatchRE.FindStringSubmatch(str) + if parts == nil { + return nil, fmt.Errorf("could not parse %q as version", str) + } + numbers, extra := parts[1], parts[2] + + components := strings.Split(numbers, ".") + if (semver && len(components) != 3) || (!semver && len(components) < 2) { + return nil, fmt.Errorf("illegal version string %q", str) + } + + v := &Version{ + components: make([]uint, len(components)), + semver: semver, + } + for i, comp := range components { + if (i == 0 || semver) && strings.HasPrefix(comp, "0") && comp != "0" { + return nil, fmt.Errorf("illegal zero-prefixed version component %q in %q", comp, str) + } + num, err := strconv.ParseUint(comp, 10, 0) + if err != nil { + return nil, fmt.Errorf("illegal non-numeric version component %q in %q: %v", comp, str, err) + } + v.components[i] = uint(num) + } + + if semver && extra != "" { + extraParts := extraMatchRE.FindStringSubmatch(extra) + if extraParts == nil { + return nil, fmt.Errorf("could not parse pre-release/metadata (%s) in version %q", extra, str) + } + v.preRelease, v.buildMetadata = extraParts[1], extraParts[2] + + for _, comp := range strings.Split(v.preRelease, ".") { + if _, err := strconv.ParseUint(comp, 10, 0); err == nil { + if strings.HasPrefix(comp, "0") && comp != "0" { + return nil, fmt.Errorf("illegal zero-prefixed version component %q in %q", comp, str) + } + } + } + } + + return v, nil +} + +// HighestSupportedVersion returns the highest supported version +// This function assumes that the highest supported version must be v1.x. +func HighestSupportedVersion(versions []string) (*Version, error) { + if len(versions) == 0 { + return nil, errors.New("empty array for supported versions") + } + + var ( + highestSupportedVersion *Version + theErr error + ) + + for i := len(versions) - 1; i >= 0; i-- { + currentHighestVer, err := ParseGeneric(versions[i]) + if err != nil { + theErr = err + continue + } + + if currentHighestVer.Major() > 1 { + continue + } + + if highestSupportedVersion == nil || highestSupportedVersion.LessThan(currentHighestVer) { + highestSupportedVersion = currentHighestVer + } + } + + if highestSupportedVersion == nil { + return nil, fmt.Errorf( + "could not find a highest supported version from versions (%v) reported: %+v", + versions, theErr) + } + + if highestSupportedVersion.Major() != 1 { + return nil, fmt.Errorf("highest supported version reported is %v, must be v1.x", highestSupportedVersion) + } + + return highestSupportedVersion, nil +} + +// ParseGeneric parses a "generic" version string. The version string must consist of two +// or more dot-separated numeric fields (the first of which can't have leading zeroes), +// followed by arbitrary uninterpreted data (which need not be separated from the final +// numeric field by punctuation). For convenience, leading and trailing whitespace is +// ignored, and the version can be preceded by the letter "v". See also ParseSemantic. +func ParseGeneric(str string) (*Version, error) { + return parse(str, false) +} + +// MustParseGeneric is like ParseGeneric except that it panics on error +func MustParseGeneric(str string) *Version { + v, err := ParseGeneric(str) + if err != nil { + panic(err) + } + return v +} + +// Parse tries to do ParseSemantic first to keep more information. +// If ParseSemantic fails, it would just do ParseGeneric. +func Parse(str string) (*Version, error) { + v, err := parse(str, true) + if err != nil { + return parse(str, false) + } + return v, err +} + +// MustParse is like Parse except that it panics on error +func MustParse(str string) *Version { + v, err := Parse(str) + if err != nil { + panic(err) + } + return v +} + +// ParseMajorMinor parses a "generic" version string and returns a version with the major and minor version. +func ParseMajorMinor(str string) (*Version, error) { + v, err := ParseGeneric(str) + if err != nil { + return nil, err + } + return MajorMinor(v.Major(), v.Minor()), nil +} + +// MustParseMajorMinor is like ParseMajorMinor except that it panics on error +func MustParseMajorMinor(str string) *Version { + v, err := ParseMajorMinor(str) + if err != nil { + panic(err) + } + return v +} + +// ParseSemantic parses a version string that exactly obeys the syntax and semantics of +// the "Semantic Versioning" specification (http://semver.org/) (although it ignores +// leading and trailing whitespace, and allows the version to be preceded by "v"). For +// version strings that are not guaranteed to obey the Semantic Versioning syntax, use +// ParseGeneric. +func ParseSemantic(str string) (*Version, error) { + return parse(str, true) +} + +// MustParseSemantic is like ParseSemantic except that it panics on error +func MustParseSemantic(str string) *Version { + v, err := ParseSemantic(str) + if err != nil { + panic(err) + } + return v +} + +// MajorMinor returns a version with the provided major and minor version. +func MajorMinor(major, minor uint) *Version { + return &Version{components: []uint{major, minor}} +} + +// Major returns the major release number +func (v *Version) Major() uint { + return v.components[0] +} + +// Minor returns the minor release number +func (v *Version) Minor() uint { + return v.components[1] +} + +// Patch returns the patch release number if v is a Semantic Version, or 0 +func (v *Version) Patch() uint { + if len(v.components) < 3 { + return 0 + } + return v.components[2] +} + +// BuildMetadata returns the build metadata, if v is a Semantic Version, or "" +func (v *Version) BuildMetadata() string { + return v.buildMetadata +} + +// PreRelease returns the prerelease metadata, if v is a Semantic Version, or "" +func (v *Version) PreRelease() string { + return v.preRelease +} + +// Components returns the version number components +func (v *Version) Components() []uint { + return v.components +} + +// WithMajor returns copy of the version object with requested major number +func (v *Version) WithMajor(major uint) *Version { + result := *v + result.components = []uint{major, v.Minor(), v.Patch()} + return &result +} + +// WithMinor returns copy of the version object with requested minor number +func (v *Version) WithMinor(minor uint) *Version { + result := *v + result.components = []uint{v.Major(), minor, v.Patch()} + return &result +} + +// SubtractMinor returns the version with offset from the original minor, with the same major and no patch. +// If -offset >= current minor, the minor would be 0. +func (v *Version) OffsetMinor(offset int) *Version { + var minor uint + if offset >= 0 { + minor = v.Minor() + uint(offset) + } else { + diff := uint(-offset) + if diff < v.Minor() { + minor = v.Minor() - diff + } + } + return MajorMinor(v.Major(), minor) +} + +// SubtractMinor returns the version diff minor versions back, with the same major and no patch. +// If diff >= current minor, the minor would be 0. +func (v *Version) SubtractMinor(diff uint) *Version { + return v.OffsetMinor(-int(diff)) +} + +// AddMinor returns the version diff minor versions forward, with the same major and no patch. +func (v *Version) AddMinor(diff uint) *Version { + return v.OffsetMinor(int(diff)) +} + +// WithPatch returns copy of the version object with requested patch number +func (v *Version) WithPatch(patch uint) *Version { + result := *v + result.components = []uint{v.Major(), v.Minor(), patch} + return &result +} + +// WithPreRelease returns copy of the version object with requested prerelease +func (v *Version) WithPreRelease(preRelease string) *Version { + if len(preRelease) == 0 { + return v + } + result := *v + result.components = []uint{v.Major(), v.Minor(), v.Patch()} + result.preRelease = preRelease + return &result +} + +// WithBuildMetadata returns copy of the version object with requested buildMetadata +func (v *Version) WithBuildMetadata(buildMetadata string) *Version { + result := *v + result.components = []uint{v.Major(), v.Minor(), v.Patch()} + result.buildMetadata = buildMetadata + return &result +} + +// String converts a Version back to a string; note that for versions parsed with +// ParseGeneric, this will not include the trailing uninterpreted portion of the version +// number. +func (v *Version) String() string { + if v == nil { + return "" + } + var buffer bytes.Buffer + + for i, comp := range v.components { + if i > 0 { + buffer.WriteString(".") + } + buffer.WriteString(fmt.Sprintf("%d", comp)) + } + if v.preRelease != "" { + buffer.WriteString("-") + buffer.WriteString(v.preRelease) + } + if v.buildMetadata != "" { + buffer.WriteString("+") + buffer.WriteString(v.buildMetadata) + } + + return buffer.String() +} + +// compareInternal returns -1 if v is less than other, 1 if it is greater than other, or 0 +// if they are equal +func (v *Version) compareInternal(other *Version) int { + + vLen := len(v.components) + oLen := len(other.components) + for i := 0; i < vLen && i < oLen; i++ { + switch { + case other.components[i] < v.components[i]: + return 1 + case other.components[i] > v.components[i]: + return -1 + } + } + + // If components are common but one has more items and they are not zeros, it is bigger + switch { + case oLen < vLen && !onlyZeros(v.components[oLen:]): + return 1 + case oLen > vLen && !onlyZeros(other.components[vLen:]): + return -1 + } + + if !v.semver || !other.semver { + return 0 + } + + switch { + case v.preRelease == "" && other.preRelease != "": + return 1 + case v.preRelease != "" && other.preRelease == "": + return -1 + case v.preRelease == other.preRelease: // includes case where both are "" + return 0 + } + + vPR := strings.Split(v.preRelease, ".") + oPR := strings.Split(other.preRelease, ".") + for i := 0; i < len(vPR) && i < len(oPR); i++ { + vNum, err := strconv.ParseUint(vPR[i], 10, 0) + if err == nil { + oNum, err := strconv.ParseUint(oPR[i], 10, 0) + if err == nil { + switch { + case oNum < vNum: + return 1 + case oNum > vNum: + return -1 + default: + continue + } + } + } + if oPR[i] < vPR[i] { + return 1 + } else if oPR[i] > vPR[i] { + return -1 + } + } + + switch { + case len(oPR) < len(vPR): + return 1 + case len(oPR) > len(vPR): + return -1 + } + + return 0 +} + +// returns false if array contain any non-zero element +func onlyZeros(array []uint) bool { + for _, num := range array { + if num != 0 { + return false + } + } + return true +} + +// EqualTo tests if a version is equal to a given version. +func (v *Version) EqualTo(other *Version) bool { + if v == nil { + return other == nil + } + if other == nil { + return false + } + return v.compareInternal(other) == 0 +} + +// AtLeast tests if a version is at least equal to a given minimum version. If both +// Versions are Semantic Versions, this will use the Semantic Version comparison +// algorithm. Otherwise, it will compare only the numeric components, with non-present +// components being considered "0" (ie, "1.4" is equal to "1.4.0"). +func (v *Version) AtLeast(min *Version) bool { + return v.compareInternal(min) != -1 +} + +// LessThan tests if a version is less than a given version. (It is exactly the opposite +// of AtLeast, for situations where asking "is v too old?" makes more sense than asking +// "is v new enough?".) +func (v *Version) LessThan(other *Version) bool { + return v.compareInternal(other) == -1 +} + +// GreaterThan tests if a version is greater than a given version. +func (v *Version) GreaterThan(other *Version) bool { + return v.compareInternal(other) == 1 +} + +// Compare compares v against a version string (which will be parsed as either Semantic +// or non-Semantic depending on v). On success it returns -1 if v is less than other, 1 if +// it is greater than other, or 0 if they are equal. +func (v *Version) Compare(other string) (int, error) { + ov, err := parse(other, v.semver) + if err != nil { + return 0, err + } + return v.compareInternal(ov), nil +} + +// WithInfo returns copy of the version object. +// Deprecated: The Info field has been removed from the Version struct. This method no longer modifies the Version object. +func (v *Version) WithInfo(info apimachineryversion.Info) *Version { + result := *v + return &result +} + +// Info returns the version information of a component. +// Deprecated: Use Info() from effective version instead. +func (v *Version) Info() *apimachineryversion.Info { + if v == nil { + return nil + } + // in case info is empty, or the major and minor in info is different from the actual major and minor + return &apimachineryversion.Info{ + Major: Itoa(v.Major()), + Minor: Itoa(v.Minor()), + GitVersion: v.String(), + } +} + +func Itoa(i uint) string { + if i == 0 { + return "" + } + return strconv.Itoa(int(i)) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 9604f7254..16bf60f82 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -47,6 +47,9 @@ github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service ## explicit; go 1.14 github.com/RoaringBitmap/roaring github.com/RoaringBitmap/roaring/internal +# github.com/avast/retry-go/v4 v4.7.0 +## explicit; go 1.20 +github.com/avast/retry-go/v4 # github.com/aws/aws-sdk-go-v2 v1.41.5 ## explicit; go 1.24 github.com/aws/aws-sdk-go-v2/aws @@ -341,7 +344,7 @@ github.com/longhorn/backupstore/s3 github.com/longhorn/backupstore/types github.com/longhorn/backupstore/util github.com/longhorn/backupstore/vfs -# github.com/longhorn/go-common-libs v0.0.0-20260328134226-cafa38fc4ce8 +# github.com/longhorn/go-common-libs v0.0.0-20260502161928-1e84fa75a8f1 ## explicit; go 1.25.0 github.com/longhorn/go-common-libs/backingimage github.com/longhorn/go-common-libs/backup @@ -364,8 +367,8 @@ github.com/longhorn/go-iscsi-helper/iscsidev github.com/longhorn/go-iscsi-helper/longhorndev github.com/longhorn/go-iscsi-helper/types github.com/longhorn/go-iscsi-helper/util -# github.com/longhorn/go-spdk-helper v0.0.3-0.20250712161648-42d38592f838 -## explicit; go 1.23.0 +# github.com/longhorn/go-spdk-helper v0.6.1 +## explicit; go 1.25.3 github.com/longhorn/go-spdk-helper/pkg/initiator github.com/longhorn/go-spdk-helper/pkg/jsonrpc github.com/longhorn/go-spdk-helper/pkg/spdk/client @@ -500,7 +503,7 @@ github.com/yusufpapurcu/wmi # go.uber.org/multierr v1.11.0 ## explicit; go 1.19 go.uber.org/multierr -# golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 +# golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f ## explicit; go 1.25.0 golang.org/x/exp/constraints # golang.org/x/net v0.49.0 @@ -767,6 +770,7 @@ k8s.io/apimachinery/pkg/util/runtime k8s.io/apimachinery/pkg/util/sets k8s.io/apimachinery/pkg/util/validation k8s.io/apimachinery/pkg/util/validation/field +k8s.io/apimachinery/pkg/util/version k8s.io/apimachinery/pkg/util/wait k8s.io/apimachinery/pkg/util/yaml k8s.io/apimachinery/pkg/version