Skip to content

Commit 7f2a9ee

Browse files
committed
fix: detect and remount stale NFS mounts in NodePublishVolume
When an NFS server restarts, existing mounts become stale. If a pod with fsGroup is restarted, kubelet calls applyFSGroup which does lstat on the mount path and fails with ESTALE before CSI driver gets a chance to remount. Fix by checking for stale file handles when the target path is already mounted. If detected, unmount the stale mount and proceed with a fresh mount. Ref 927
1 parent 86447ec commit 7f2a9ee

1 file changed

Lines changed: 26 additions & 1 deletion

File tree

pkg/nfs/nodeserver.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ package nfs
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
2223
"os"
2324
"strconv"
2425
"strings"
26+
"syscall"
2527
"time"
2628

2729
"github.com/container-storage-interface/spec/lib/go/csi"
@@ -131,7 +133,18 @@ func (ns *NodeServer) NodePublishVolume(_ context.Context, req *csi.NodePublishV
131133
}
132134
}
133135
if !notMnt {
134-
return &csi.NodePublishVolumeResponse{}, nil
136+
// check if the existing mount is stale (e.g. after NFS server restart)
137+
if _, err := os.Lstat(targetPath); err != nil && os.IsPermission(err) {
138+
return &csi.NodePublishVolumeResponse{}, nil
139+
} else if err != nil && isStaleFileHandle(err) {
140+
klog.Warningf("NodePublishVolume: detected stale mount at %s, attempting remount", targetPath)
141+
if unmountErr := ns.mounter.Unmount(targetPath); unmountErr != nil {
142+
return nil, status.Errorf(codes.Internal, "failed to unmount stale mount %s: %v", targetPath, unmountErr)
143+
}
144+
// fall through to remount
145+
} else {
146+
return &csi.NodePublishVolumeResponse{}, nil
147+
}
135148
}
136149

137150
klog.V(2).Infof("NodePublishVolume: volumeID(%v) source(%s) targetPath(%s) mountflags(%v)", volumeID, source, targetPath, mountOptions)
@@ -315,3 +328,15 @@ func makeDir(pathname string) error {
315328
}
316329
return nil
317330
}
331+
332+
// isStaleFileHandle checks if an error is caused by a stale NFS file handle (ESTALE)
333+
func isStaleFileHandle(err error) bool {
334+
if err == nil {
335+
return false
336+
}
337+
var errno syscall.Errno
338+
if errors.As(err, &errno) {
339+
return errno == syscall.ESTALE
340+
}
341+
return strings.Contains(err.Error(), "stale NFS file handle") || strings.Contains(err.Error(), "stale file handle")
342+
}

0 commit comments

Comments
 (0)