Skip to content

Commit 68bdbaa

Browse files
committed
fix(proxy): use local SPDK client for engine lookup in v2 VolumeGet
The v2 proxy VolumeGet handler previously used req.Address (the DirectToURL of the service object) to create an SPDK client for both engine and EngineFrontend lookups. For v2 volumes, the manager passes the EngineFrontend's IM address as req.Address so the proxy can observe the frontend device state. When engine and EngineFrontend are on different Instance Managers (e.g. after fault recovery), this causes the handler to look up the engine on the EF's IM where it does not exist, resulting in a permanent NotFound error. Fix this by separating the two lookups: - Engine: always use the local SPDK service (the proxy runs on the engine's IM, so the engine is guaranteed to be local) - EngineFrontend: use req.Address to reach the potentially remote EF IM If the EF client connection fails, gracefully degrade to engine-only info rather than failing the entire call. Longhorn 13215 Signed-off-by: Derek Su <derek.su@suse.com>
1 parent 8b9d207 commit 68bdbaa

2 files changed

Lines changed: 58 additions & 17 deletions

File tree

pkg/proxy/proxy.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ type ProxyOps interface {
6060
}
6161

6262
type V1DataEngineProxyOps struct{}
63-
type V2DataEngineProxyOps struct{}
63+
type V2DataEngineProxyOps struct {
64+
spdkServiceAddress string
65+
}
6466

6567
type Proxy struct {
6668
rpc.UnimplementedProxyEngineServiceServer
@@ -77,7 +79,7 @@ func NewProxy(ctx context.Context, logsDir, diskServiceAddress, spdkServiceAddre
7779

7880
ops := map[rpc.DataEngine]ProxyOps{
7981
rpc.DataEngine_DATA_ENGINE_V1: V1DataEngineProxyOps{},
80-
rpc.DataEngine_DATA_ENGINE_V2: V2DataEngineProxyOps{},
82+
rpc.DataEngine_DATA_ENGINE_V2: V2DataEngineProxyOps{spdkServiceAddress: spdkServiceAddress},
8183
}
8284

8385
spdkLocalClient, err := spdkclient.NewSPDKClient(spdkServiceAddress)

pkg/proxy/volume.go

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
lhns "github.com/longhorn/go-common-libs/ns"
1717
lhtypes "github.com/longhorn/go-common-libs/types"
1818
eclient "github.com/longhorn/longhorn-engine/pkg/controller/client"
19+
spdkclient "github.com/longhorn/longhorn-spdk-engine/pkg/client"
1920
rpc "github.com/longhorn/types/pkg/generated/imrpc"
2021

2122
"github.com/longhorn/longhorn-instance-manager/pkg/util"
@@ -77,22 +78,26 @@ func (ops V1DataEngineProxyOps) VolumeGet(ctx context.Context, req *rpc.ProxyEng
7778
}
7879

7980
func (ops V2DataEngineProxyOps) VolumeGet(ctx context.Context, req *rpc.ProxyEngineRequest) (resp *rpc.EngineVolumeGetProxyResponse, err error) {
80-
c, err := getSPDKClientFromAddress(req.Address)
81+
// The engine is always local to this IM (proxy runs on engine IM).
82+
// Use the local SPDK service for EngineGet so it works even when the
83+
// EngineFrontend is on a remote node.
84+
localClient, err := spdkclient.NewSPDKClient(ops.spdkServiceAddress)
8185
if err != nil {
82-
return nil, grpcstatus.Errorf(grpccodes.Internal, "failed to get SPDK client from engine address %v: %v", req.Address, err)
86+
return nil, grpcstatus.Errorf(grpccodes.Internal, "failed to create local SPDK client for address %v: %v", ops.spdkServiceAddress, err)
8387
}
8488
defer func() {
85-
if closeErr := c.Close(); closeErr != nil {
89+
if closeErr := localClient.Close(); closeErr != nil {
8690
logrus.WithFields(logrus.Fields{
8791
"serviceURL": req.Address,
88-
"engineFrontendName": req.EngineFrontendName,
92+
"spdkServiceAddress": ops.spdkServiceAddress,
93+
"engineName": req.EngineName,
8994
"volumeName": req.VolumeName,
9095
"dataEngine": req.DataEngine,
91-
}).WithError(closeErr).Warn("Failed to close SPDK client")
96+
}).WithError(closeErr).Warn("Failed to close local SPDK client")
9297
}
9398
}()
9499

95-
engine, err := c.EngineGet(req.EngineName)
100+
engine, err := localClient.EngineGet(req.EngineName)
96101
if err != nil {
97102
return nil, grpcstatus.Errorf(grpccodes.Internal, "failed to get engine %v: %v", req.EngineName, err)
98103
}
@@ -104,15 +109,49 @@ func (ops V2DataEngineProxyOps) VolumeGet(ctx context.Context, req *rpc.ProxyEng
104109
lastExpansionError := engine.LastExpansionError
105110
lastExpansionFailedAt := engine.LastExpansionFailedAt
106111
if req.EngineFrontendName != "" {
107-
engineFrontend, err := c.EngineFrontendGet(req.EngineFrontendName)
108-
if err == nil && engineFrontend != nil {
109-
size = int64(engineFrontend.SpecSize)
110-
endpoint = engineFrontend.Endpoint
111-
frontend = engineFrontend.Frontend
112-
isExpanding = engine.IsExpanding || engineFrontend.IsExpanding
113-
if engineFrontend.LastExpansionError != "" {
114-
lastExpansionError = engineFrontend.LastExpansionError
115-
lastExpansionFailedAt = engineFrontend.LastExpansionFailedAt
112+
// The EngineFrontend may be on a remote IM. Use req.Address
113+
// (the EF IM address) and derive its SPDK service endpoint.
114+
efClient, err := getSPDKClientFromAddress(req.Address)
115+
if err != nil {
116+
logrus.WithFields(logrus.Fields{
117+
"serviceURL": req.Address,
118+
"engineFrontendName": req.EngineFrontendName,
119+
}).WithError(err).Warn("Failed to get SPDK client for engine frontend, returning engine-only info")
120+
} else {
121+
defer func() {
122+
if closeErr := efClient.Close(); closeErr != nil {
123+
logrus.WithFields(logrus.Fields{
124+
"serviceURL": req.Address,
125+
"engineFrontendName": req.EngineFrontendName,
126+
}).WithError(closeErr).Warn("Failed to close EF SPDK client")
127+
}
128+
}()
129+
engineFrontend, err := efClient.EngineFrontendGet(req.EngineFrontendName)
130+
if err != nil {
131+
logrus.WithFields(logrus.Fields{
132+
"engineName": req.EngineName,
133+
"volumeName": req.VolumeName,
134+
"dataEngine": req.DataEngine,
135+
"serviceURL": req.Address,
136+
"engineFrontendName": req.EngineFrontendName,
137+
}).WithError(err).Warn("Failed to get engine frontend, returning engine-only info")
138+
} else if engineFrontend != nil {
139+
size = int64(engineFrontend.SpecSize)
140+
endpoint = engineFrontend.Endpoint
141+
frontend = engineFrontend.Frontend
142+
isExpanding = engine.IsExpanding || engineFrontend.IsExpanding
143+
if engineFrontend.LastExpansionError != "" {
144+
lastExpansionError = engineFrontend.LastExpansionError
145+
lastExpansionFailedAt = engineFrontend.LastExpansionFailedAt
146+
}
147+
} else {
148+
logrus.WithFields(logrus.Fields{
149+
"engineName": req.EngineName,
150+
"volumeName": req.VolumeName,
151+
"dataEngine": req.DataEngine,
152+
"serviceURL": req.Address,
153+
"engineFrontendName": req.EngineFrontendName,
154+
}).Warn("Engine frontend not found, returning engine-only info")
116155
}
117156
}
118157
}

0 commit comments

Comments
 (0)