Skip to content

Commit 79123cf

Browse files
committed
nfs41: add support for read delegations
Motivation: Read/write delegation allow server to delegate file access check to clients. As a result, client don't need to send a new open requests to the server, thus, less network overhead. This is especially useful when a client opens the same time from multiple processes (HPC workload). Modification: 1. add record to tack issued delegations 2. Update FileTracker to return OpenRecord instead of stateid on open 3. Populate OpenRecord with delegation params, if issued 4. Issue read-delegation if requested by client if: - there are no accesses for write exist - a call-back channel to the client is established - client doesn't already have a delegation for the given file A delegation is recalled if conflicting open-for-write arrives. Result: Server can issue read-delegation, if requested. Acked-by: Lea Morschel Closes: #137 Target: master
1 parent 49403b9 commit 79123cf

File tree

5 files changed

+276
-39
lines changed

5 files changed

+276
-39
lines changed

core/src/main/java/org/dcache/nfs/v4/FileTracker.java

+141-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
package org.dcache.nfs.v4;
2121

2222
import com.google.common.util.concurrent.Striped;
23+
24+
import java.io.IOException;
2325
import java.util.ArrayList;
2426
import java.util.Collection;
2527
import java.util.Iterator;
@@ -30,9 +32,13 @@
3032
import java.util.stream.Collectors;
3133
import org.dcache.nfs.ChimeraNFSException;
3234
import org.dcache.nfs.status.BadStateidException;
35+
import org.dcache.nfs.status.DelayException;
3336
import org.dcache.nfs.status.InvalException;
3437
import org.dcache.nfs.status.ShareDeniedException;
38+
import org.dcache.nfs.status.StaleException;
3539
import org.dcache.nfs.v4.xdr.nfs4_prot;
40+
import org.dcache.nfs.v4.xdr.nfs_fh4;
41+
import org.dcache.nfs.v4.xdr.open_delegation_type4;
3642
import org.dcache.nfs.v4.xdr.stateid4;
3743
import org.dcache.nfs.vfs.Inode;
3844
import org.dcache.nfs.util.Opaque;
@@ -55,6 +61,11 @@ public class FileTracker {
5561
private final Striped<Lock> filesLock = Striped.lock(Runtime.getRuntime().availableProcessors()*4);
5662
private final Map<Opaque, List<OpenState>> files = new ConcurrentHashMap<>();
5763

64+
/**
65+
* Delegation records associated with open files.
66+
*/
67+
private final Map<Opaque, List<DelegationState>> delegations = new ConcurrentHashMap<>();
68+
5869
private static class OpenState {
5970

6071
private final NFS4Client client;
@@ -118,6 +129,27 @@ public NFS4Client getClient() {
118129
}
119130
}
120131

132+
/**
133+
* Record associated with open-delegation.
134+
* @param client
135+
* @param stateid
136+
* @param delegationType
137+
*/
138+
record DelegationState(NFS4Client client, stateid4 openStateId, stateid4 stateid, int delegationType) {
139+
140+
}
141+
142+
/**
143+
* Record associated with an open file.
144+
*
145+
* @param openStateId
146+
* @param delegationStateId
147+
* @param hasDelegation
148+
*/
149+
public record OpenRecord(stateid4 openStateId, stateid4 delegationStateId, boolean hasDelegation) {
150+
151+
}
152+
121153
/**
122154
* Add a new open to the list of open files. If provided {@code shareAccess}
123155
* and {@code shareDeny} conflicts with existing opens, @{link ShareDeniedException}
@@ -127,11 +159,14 @@ public NFS4Client getClient() {
127159
* @param inode of opened file.
128160
* @param shareAccess type of access required.
129161
* @param shareDeny type of access to deny others.
130-
* @return a snapshot of the stateid associated with open.
162+
* @return a snapshot of an OpenRecord associated with open.
131163
* @throws ShareDeniedException if share reservation conflicts with an existing open.
132164
* @throws ChimeraNFSException
133165
*/
134-
public stateid4 addOpen(NFS4Client client, StateOwner owner, Inode inode, int shareAccess, int shareDeny) throws ChimeraNFSException {
166+
public OpenRecord addOpen(NFS4Client client, StateOwner owner, Inode inode, int shareAccess, int shareDeny) throws ChimeraNFSException {
167+
168+
boolean wantReadDelegation = (shareAccess & nfs4_prot.OPEN4_SHARE_ACCESS_WANT_READ_DELEG) != 0;
169+
boolean wantWriteDelegation = (shareAccess & nfs4_prot.OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) != 0;
135170

136171
Opaque fileId = new Opaque(inode.getFileId());
137172
Lock lock = filesLock.get(fileId);
@@ -170,7 +205,43 @@ public stateid4 addOpen(NFS4Client client, StateOwner owner, Inode inode, int sh
170205

171206
os.stateid.seqid++;
172207
//we need to return copy to avoid modification by concurrent opens
173-
return new stateid4(os.stateid.other, os.stateid.seqid);
208+
var openStateid = new stateid4(os.stateid.other, os.stateid.seqid);
209+
return new OpenRecord(openStateid, null, false);
210+
}
211+
}
212+
213+
/*
214+
* REVISIT: currently only read-delegations are supported
215+
*/
216+
var existingDelegations = delegations.get(fileId);
217+
218+
/*
219+
* delegation is possible if:
220+
* - client has a callback channel
221+
* - client does not have a delegation for this file
222+
* - no other open has write access
223+
*/
224+
boolean canDelegate = client.getCB() != null &&
225+
(existingDelegations == null || existingDelegations.stream().noneMatch(d -> d.client().getId() == client.getId())) &&
226+
opens.stream().noneMatch(os -> (os.shareAccess & nfs4_prot.OPEN4_SHARE_ACCESS_WRITE) != 0);
227+
228+
// recall any read delegations if write
229+
if ((existingDelegations != null) && (shareAccess & nfs4_prot.OPEN4_SHARE_ACCESS_WRITE) != 0) {
230+
231+
// REVISIT: usage of Stream#peek is an anti-pattern
232+
boolean haveRecalled = existingDelegations.stream()
233+
.filter(d -> client.isLeaseValid())
234+
.peek(d -> {
235+
try {
236+
d.client().getCB()
237+
.cbDelegationRecall(new nfs_fh4(inode.toNfsHandle()), d.stateid(), false);
238+
} catch (IOException e) {
239+
// ignore
240+
}
241+
}).count() > 0;
242+
243+
if (haveRecalled) {
244+
throw new DelayException("Recalling read delegations");
174245
}
175246
}
176247

@@ -180,8 +251,21 @@ public stateid4 addOpen(NFS4Client client, StateOwner owner, Inode inode, int sh
180251
opens.add(openState);
181252
state.addDisposeListener(s -> removeOpen(inode, stateid));
182253
stateid.seqid++;
254+
183255
//we need to return copy to avoid modification by concurrent opens
184-
return new stateid4(stateid.other, stateid.seqid);
256+
var openStateid = new stateid4(stateid.other, stateid.seqid);
257+
258+
// REVISIT: currently only read-delegations are supported
259+
if (wantReadDelegation && canDelegate) {
260+
// REVISIT: currently only read-delegations are supported
261+
stateid4 delegationStateid = client.createState(state.getStateOwner(), state).stateid();
262+
delegations.computeIfAbsent(fileId, x -> new ArrayList<>(1))
263+
.add(new DelegationState(client, openStateid, delegationStateid, open_delegation_type4.OPEN_DELEGATE_READ));
264+
return new OpenRecord(openStateid, delegationStateid, true);
265+
} else {
266+
//we need to return copy to avoid modification by concurrent opens
267+
return new OpenRecord(openStateid, null, false);
268+
}
185269
} finally {
186270
lock.unlock();
187271
}
@@ -241,6 +325,42 @@ public stateid4 downgradeOpen(NFS4Client client, stateid4 stateid, Inode inode,
241325
}
242326
}
243327

328+
/**
329+
* Return delegation for the given file
330+
* @param client nfs client who returns the delegation.
331+
* @param stateid delegation stateid
332+
* @param inode the inode of the delegated file.
333+
*/
334+
public void delegationReturn(NFS4Client client, stateid4 stateid, Inode inode)
335+
throws StaleException {
336+
337+
Opaque fileId = new Opaque(inode.getFileId());
338+
Lock lock = filesLock.get(fileId);
339+
lock.lock();
340+
try {
341+
342+
var fileDelegations = delegations.get(fileId);
343+
if (fileDelegations == null) {
344+
throw new StaleException("no delegation found");
345+
}
346+
347+
DelegationState delegation = fileDelegations.stream()
348+
.filter(d -> d.client().getId() == client.getId())
349+
.filter(d -> d.stateid().equals(stateid))
350+
.findFirst()
351+
.orElseThrow(StaleException::new);
352+
353+
fileDelegations.remove(delegation);
354+
if (fileDelegations.isEmpty()) {
355+
delegations.remove(fileId);
356+
}
357+
358+
} finally {
359+
lock.unlock();
360+
}
361+
}
362+
363+
244364
/**
245365
* Remove an open from the list.
246366
* @param inode of the opened file
@@ -271,6 +391,23 @@ void removeOpen(Inode inode, stateid4 stateid) {
271391
files.remove(fileId);
272392
}
273393
}
394+
395+
var existingDelegations = delegations.get(fileId);
396+
if (existingDelegations != null) {
397+
Iterator<DelegationState> dsi = existingDelegations.iterator();
398+
while (dsi.hasNext()) {
399+
stateid4 os = dsi.next().openStateId();
400+
if (os.equals(stateid)) {
401+
dsi.remove();
402+
break;
403+
}
404+
}
405+
406+
if (existingDelegations.isEmpty()) {
407+
delegations.remove(fileId);
408+
}
409+
}
410+
274411
} finally {
275412
lock.unlock();
276413
}

core/src/main/java/org/dcache/nfs/v4/OperationDELEGRETURN.java

+17-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2009 - 2012 Deutsches Elektronen-Synchroton,
2+
* Copyright (c) 2009 - 2025 Deutsches Elektronen-Synchroton,
33
* Member of the Helmholtz Association, (DESY), HAMBURG, GERMANY
44
*
55
* This library is free software; you can redistribute it and/or modify
@@ -19,10 +19,13 @@
1919
*/
2020
package org.dcache.nfs.v4;
2121

22+
import org.dcache.nfs.ChimeraNFSException;
2223
import org.dcache.nfs.nfsstat;
2324
import org.dcache.nfs.v4.xdr.nfs_argop4;
2425
import org.dcache.nfs.v4.xdr.nfs_opnum4;
2526
import org.dcache.nfs.v4.xdr.nfs_resop4;
27+
import org.dcache.nfs.v4.xdr.stateid4;
28+
import org.dcache.nfs.vfs.Inode;
2629
import org.slf4j.Logger;
2730
import org.slf4j.LoggerFactory;
2831

@@ -35,7 +38,18 @@ public OperationDELEGRETURN(nfs_argop4 args) {
3538
}
3639

3740
@Override
38-
public void process(CompoundContext context, nfs_resop4 result) {
39-
result.opdelegreturn.status = nfsstat.NFSERR_NOTSUPP;
41+
public void process(CompoundContext context, nfs_resop4 result) throws ChimeraNFSException {
42+
43+
final Inode inode = context.currentInode();
44+
stateid4 stateid = Stateids.getCurrentStateidIfNeeded(context, _args.opdelegreturn.deleg_stateid);
45+
NFS4Client client;
46+
if (context.getMinorversion() > 0) {
47+
client = context.getSession().getClient();
48+
} else {
49+
client = context.getStateHandler().getClientIdByStateId(stateid);
50+
}
51+
52+
context.getStateHandler().getFileTracker().delegationReturn(client, stateid, inode);
53+
result.opdelegreturn.status = nfsstat.NFS_OK;
4054
}
4155
}

core/src/main/java/org/dcache/nfs/v4/OperationOPEN.java

+28-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2009 - 2023 Deutsches Elektronen-Synchroton,
2+
* Copyright (c) 2009 - 2025 Deutsches Elektronen-Synchroton,
33
* Member of the Helmholtz Association, (DESY), HAMBURG, GERMANY
44
*
55
* This library is free software; you can redistribute it and/or modify
@@ -22,13 +22,19 @@
2222
import java.io.IOException;
2323
import java.util.Optional;
2424

25+
import org.dcache.nfs.v4.xdr.aceflag4;
26+
import org.dcache.nfs.v4.xdr.acemask4;
27+
import org.dcache.nfs.v4.xdr.acetype4;
28+
import org.dcache.nfs.v4.xdr.nfsace4;
2529
import org.dcache.nfs.v4.xdr.open_delegation_type4;
2630
import org.dcache.nfs.v4.xdr.change_info4;
2731
import org.dcache.nfs.v4.xdr.bitmap4;
2832
import org.dcache.nfs.v4.xdr.nfs4_prot;
2933
import org.dcache.nfs.v4.xdr.nfs_argop4;
3034
import org.dcache.nfs.v4.xdr.changeid4;
3135
import org.dcache.nfs.nfsstat;
36+
import org.dcache.nfs.v4.xdr.open_none_delegation4;
37+
import org.dcache.nfs.v4.xdr.open_read_delegation4;
3238
import org.dcache.nfs.v4.xdr.uint32_t;
3339
import org.dcache.nfs.v4.xdr.opentype4;
3440
import org.dcache.nfs.v4.xdr.open_claim_type4;
@@ -51,6 +57,8 @@
5157
import org.dcache.nfs.v4.xdr.mode4;
5258
import org.dcache.nfs.v4.xdr.nfs_resop4;
5359
import org.dcache.nfs.v4.xdr.stateid4;
60+
import org.dcache.nfs.v4.xdr.utf8str_mixed;
61+
import org.dcache.nfs.v4.xdr.why_no_delegation4;
5462
import org.dcache.nfs.vfs.Inode;
5563
import org.dcache.nfs.vfs.Stat;
5664
import org.dcache.oncrpc4j.rpc.OncRpcException;
@@ -85,7 +93,9 @@ public void process(CompoundContext context, nfs_resop4 result) throws ChimeraNF
8593
res.resok4 = new OPEN4resok();
8694
res.resok4.attrset = new bitmap4();
8795
res.resok4.delegation = new open_delegation4();
88-
res.resok4.delegation.delegation_type = open_delegation_type4.OPEN_DELEGATE_NONE;
96+
res.resok4.delegation.delegation_type = context.getMinorversion() == 0 ? open_delegation_type4.OPEN_DELEGATE_NONE : open_delegation_type4.OPEN_DELEGATE_NONE_EXT;
97+
res.resok4.delegation.od_whynone = new open_none_delegation4();
98+
res.resok4.delegation.od_whynone.ond_why = why_no_delegation4.WND4_NOT_WANTED;
8999
res.resok4.cinfo = new change_info4();
90100
res.resok4.cinfo.atomic = true;
91101

@@ -264,15 +274,28 @@ public void process(CompoundContext context, nfs_resop4 result) throws ChimeraNF
264274
* THis is a perfectly a valid situation as at the end file is created and only
265275
* one writer is allowed.
266276
*/
267-
stateid4 stateid = context
277+
var openRecord = context
268278
.getStateHandler()
269279
.getFileTracker()
270280
.addOpen(client, owner, context.currentInode(),
271281
_args.opopen.share_access.value,
272282
_args.opopen.share_deny.value);
273283

274-
context.currentStateid(stateid);
275-
res.resok4.stateid = stateid;
284+
context.currentStateid(openRecord.openStateId());
285+
res.resok4.stateid = openRecord.openStateId();
286+
if (openRecord.hasDelegation()) {
287+
res.resok4.delegation.delegation_type = open_delegation_type4.OPEN_DELEGATE_READ;
288+
res.resok4.delegation.read = new open_read_delegation4();
289+
res.resok4.delegation.read.stateid = openRecord.delegationStateId();
290+
res.resok4.delegation.read.permissions = new nfsace4();
291+
res.resok4.delegation.read.permissions.type = new acetype4(nfs4_prot.ACE4_ACCESS_ALLOWED_ACE_TYPE);
292+
res.resok4.delegation.read.permissions.flag = new aceflag4(0);
293+
res.resok4.delegation.read.permissions.access_mask = new acemask4(nfs4_prot.ACCESS4_READ);
294+
res.resok4.delegation.read.permissions.who = new utf8str_mixed(context.getPrincipal().getName());
295+
} else if ((_args.opopen.share_access.value & nfs4_prot.OPEN4_SHARE_ACCESS_WANT_ANY_DELEG) != 0) {
296+
// REVISIT: shall we return something less general?
297+
res.resok4.delegation.od_whynone.ond_why = why_no_delegation4.WND4_RESOURCE;
298+
}
276299
res.status = nfsstat.NFS_OK;
277300

278301
}

0 commit comments

Comments
 (0)