Skip to content

Commit 1c2a92a

Browse files
HADOOP-19303. VectorIO API: support pass-down of a release() operator (#7418)
The PositionedReadable vector IO API has a new readVectored() method which takes a release operator as its third argument. readVectored(List<? extends FileRange> ranges, IntFunction<ByteBuffer> allocate, Consumer<ByteBuffer> release) This is return buffers to pools even in failures. The default implementation hands back to readVectored/2, so that existing custom implementations of that will get invoked. Contributed by Steve Loughran
1 parent 32dad20 commit 1c2a92a

File tree

11 files changed

+427
-54
lines changed

11 files changed

+427
-54
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.StringJoiner;
2525
import java.nio.ByteBuffer;
2626
import java.util.List;
27+
import java.util.function.Consumer;
2728
import java.util.function.IntFunction;
2829

2930
import org.apache.hadoop.classification.InterfaceAudience;
@@ -181,4 +182,11 @@ public void readVectored(List<? extends FileRange> ranges,
181182
IntFunction<ByteBuffer> allocate) throws IOException {
182183
((PositionedReadable) in).readVectored(ranges, allocate);
183184
}
185+
186+
@Override
187+
public void readVectored(final List<? extends FileRange> ranges,
188+
final IntFunction<ByteBuffer> allocate,
189+
final Consumer<ByteBuffer> release) throws IOException {
190+
((PositionedReadable) in).readVectored(ranges, allocate, release);
191+
}
184192
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import java.util.Optional;
3333
import java.util.concurrent.CompletableFuture;
3434
import java.util.concurrent.CompletionException;
35+
import java.util.function.Consumer;
3536
import java.util.function.IntFunction;
3637
import java.util.zip.CRC32;
3738

@@ -438,6 +439,13 @@ static ByteBuffer checkBytes(ByteBuffer sumsBytes,
438439
@Override
439440
public void readVectored(List<? extends FileRange> ranges,
440441
IntFunction<ByteBuffer> allocate) throws IOException {
442+
readVectored(ranges, allocate, (b) -> { });
443+
}
444+
445+
@Override
446+
public void readVectored(final List<? extends FileRange> ranges,
447+
final IntFunction<ByteBuffer> allocate,
448+
final Consumer<ByteBuffer> release) throws IOException {
441449

442450
// If the stream doesn't have checksums, just delegate.
443451
if (sums == null) {
@@ -462,8 +470,8 @@ public void readVectored(List<? extends FileRange> ranges,
462470
}
463471
List<CombinedFileRange> checksumRanges = findChecksumRanges(dataRanges,
464472
bytesPerSum, minSeek, maxSize);
465-
sums.readVectored(checksumRanges, allocate);
466-
datas.readVectored(dataRanges, allocate);
473+
sums.readVectored(checksumRanges, allocate, release);
474+
datas.readVectored(dataRanges, allocate, release);
467475
for(CombinedFileRange checksumRange: checksumRanges) {
468476
for(FileRange dataRange: checksumRange.getUnderlying()) {
469477
// when we have both the ranges, validate the checksum

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.nio.ByteBuffer;
2828
import java.util.EnumSet;
2929
import java.util.List;
30+
import java.util.function.Consumer;
3031
import java.util.function.IntFunction;
3132

3233
import org.apache.hadoop.classification.InterfaceAudience;
@@ -306,4 +307,11 @@ public void readVectored(List<? extends FileRange> ranges,
306307
IntFunction<ByteBuffer> allocate) throws IOException {
307308
((PositionedReadable) in).readVectored(ranges, allocate);
308309
}
310+
311+
@Override
312+
public void readVectored(final List<? extends FileRange> ranges,
313+
final IntFunction<ByteBuffer> allocate,
314+
final Consumer<ByteBuffer> release) throws IOException {
315+
((PositionedReadable) in).readVectored(ranges, allocate, release);
316+
}
309317
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@
2121
import java.io.IOException;
2222
import java.nio.ByteBuffer;
2323
import java.util.List;
24+
import java.util.function.Consumer;
2425
import java.util.function.IntFunction;
2526

2627
import org.apache.hadoop.classification.InterfaceAudience;
2728
import org.apache.hadoop.classification.InterfaceStability;
2829

30+
import static java.util.Objects.requireNonNull;
2931
import static org.apache.hadoop.io.Sizes.S_16K;
3032
import static org.apache.hadoop.io.Sizes.S_1M;
3133

@@ -136,4 +138,31 @@ default void readVectored(List<? extends FileRange> ranges,
136138
IntFunction<ByteBuffer> allocate) throws IOException {
137139
VectoredReadUtils.readVectored(this, ranges, allocate);
138140
}
141+
142+
/**
143+
* Extension of {@link #readVectored(List, IntFunction)} where a {@code release(buffer)}
144+
* operation may be invoked if problems surface during reads.
145+
* <p>
146+
* The {@code release} operation is invoked after an IOException
147+
* to return the actively buffer to a pool before reporting a failure
148+
* in the future.
149+
* <p>
150+
* The default implementation calls {@link #readVectored(List, IntFunction)}.p
151+
* <p>
152+
* Implementations SHOULD override this method if they can release buffers as
153+
* part of their error handling.
154+
* @param ranges the byte ranges to read
155+
* @param allocate function to allocate ByteBuffer
156+
* @param release callable to release a ByteBuffer.
157+
* @throws IOException any IOE.
158+
* @throws IllegalArgumentException if any of ranges are invalid, or they overlap.
159+
* @throws NullPointerException null arguments.
160+
*/
161+
default void readVectored(List<? extends FileRange> ranges,
162+
IntFunction<ByteBuffer> allocate,
163+
Consumer<ByteBuffer> release) throws IOException {
164+
requireNonNull(release);
165+
readVectored(ranges, allocate);
166+
}
167+
139168
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java

Lines changed: 94 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -49,25 +49,29 @@
4949
import java.util.concurrent.atomic.AtomicLong;
5050
import java.util.List;
5151
import java.util.concurrent.CompletableFuture;
52+
import java.util.function.Consumer;
5253
import java.util.function.IntFunction;
5354

5455
import org.apache.hadoop.classification.InterfaceAudience;
5556
import org.apache.hadoop.classification.InterfaceStability;
5657
import org.apache.hadoop.conf.Configuration;
5758
import org.apache.hadoop.fs.impl.StoreImplementationUtils;
59+
import org.apache.hadoop.fs.impl.VectorIOBufferPool;
5860
import org.apache.hadoop.fs.permission.FsPermission;
5961
import org.apache.hadoop.fs.statistics.IOStatistics;
6062
import org.apache.hadoop.fs.statistics.IOStatisticsAggregator;
6163
import org.apache.hadoop.fs.statistics.IOStatisticsContext;
6264
import org.apache.hadoop.fs.statistics.IOStatisticsSource;
6365
import org.apache.hadoop.fs.statistics.BufferedIOStatisticsOutputStream;
6466
import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;
67+
import org.apache.hadoop.io.ByteBufferPool;
6568
import org.apache.hadoop.io.IOUtils;
6669
import org.apache.hadoop.io.nativeio.NativeIO;
6770
import org.apache.hadoop.util.Progressable;
6871
import org.apache.hadoop.util.Shell;
6972
import org.apache.hadoop.util.StringUtils;
7073

74+
import static org.apache.hadoop.fs.VectoredReadUtils.LOG_BYTE_BUFFER_RELEASED;
7175
import static org.apache.hadoop.fs.VectoredReadUtils.sortRangeList;
7276
import static org.apache.hadoop.fs.VectoredReadUtils.validateRangeRequest;
7377
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
@@ -319,74 +323,131 @@ AsynchronousFileChannel getAsyncChannel() throws IOException {
319323
@Override
320324
public void readVectored(List<? extends FileRange> ranges,
321325
IntFunction<ByteBuffer> allocate) throws IOException {
326+
readVectored(ranges, allocate, LOG_BYTE_BUFFER_RELEASED);
327+
}
328+
329+
@Override
330+
public void readVectored(final List<? extends FileRange> ranges,
331+
final IntFunction<ByteBuffer> allocate,
332+
final Consumer<ByteBuffer> release) throws IOException {
322333

323334
// Validate, but do not pass in a file length as it may change.
324335
List<? extends FileRange> sortedRanges = sortRangeList(ranges);
325-
// Set up all of the futures, so that we can use them if things fail
326-
for(FileRange range: sortedRanges) {
336+
// Set up all of the futures, so that the caller can await on
337+
// their completion.
338+
for (FileRange range: sortedRanges) {
327339
validateRangeRequest(range);
328340
range.setData(new CompletableFuture<>());
329341
}
330-
try {
331-
AsynchronousFileChannel channel = getAsyncChannel();
332-
ByteBuffer[] buffers = new ByteBuffer[sortedRanges.size()];
333-
AsyncHandler asyncHandler = new AsyncHandler(channel, sortedRanges, buffers);
334-
for(int i = 0; i < sortedRanges.size(); ++i) {
335-
FileRange range = sortedRanges.get(i);
336-
buffers[i] = allocate.apply(range.getLength());
337-
channel.read(buffers[i], range.getOffset(), i, asyncHandler);
338-
}
339-
} catch (IOException ioe) {
340-
LOG.debug("Exception occurred during vectored read ", ioe);
341-
for(FileRange range: sortedRanges) {
342-
range.getData().completeExceptionally(ioe);
343-
}
344-
}
342+
final ByteBufferPool pool = new VectorIOBufferPool(allocate, release);
343+
// Initiate the asynchronous reads.
344+
new AsyncHandler(getAsyncChannel(),
345+
sortedRanges,
346+
pool)
347+
.initiateRead();
345348
}
346349
}
347350

348351
/**
349352
* A CompletionHandler that implements readFully and translates back
350353
* into the form of CompletionHandler that our users expect.
354+
* <p>
355+
* All reads are started in {@link #initiateRead()};
356+
* the handler then receives callbacks on success
357+
* {@link #completed(Integer, Integer)}, and on failure
358+
* by {@link #failed(Throwable, Integer)}.
359+
* These are mapped to the specific range in the read, and its
360+
* outcome updated.
351361
*/
352-
static class AsyncHandler implements CompletionHandler<Integer, Integer> {
362+
private static class AsyncHandler implements CompletionHandler<Integer, Integer> {
363+
/** File channel to read from. */
353364
private final AsynchronousFileChannel channel;
365+
366+
/** Ranges to fetch. */
354367
private final List<? extends FileRange> ranges;
368+
369+
/**
370+
* Pool providing allocate/release operations.
371+
*/
372+
private final ByteBufferPool allocateRelease;
373+
374+
/** Buffers being read. */
355375
private final ByteBuffer[] buffers;
356376

357-
AsyncHandler(AsynchronousFileChannel channel,
358-
List<? extends FileRange> ranges,
359-
ByteBuffer[] buffers) {
377+
/**
378+
* Instantiate.
379+
* @param channel open channel.
380+
* @param ranges ranges to read.
381+
* @param allocateRelease pool for allocating buffers, and releasing on failure
382+
*/
383+
AsyncHandler(
384+
final AsynchronousFileChannel channel,
385+
final List<? extends FileRange> ranges,
386+
final ByteBufferPool allocateRelease) {
360387
this.channel = channel;
361388
this.ranges = ranges;
362-
this.buffers = buffers;
389+
this.buffers = new ByteBuffer[ranges.size()];
390+
this.allocateRelease = allocateRelease;
391+
}
392+
393+
/**
394+
* Initiate the read operation.
395+
* <p>
396+
* Allocate all buffers, queue the read into the channel,
397+
* providing this object as the handler.
398+
*/
399+
private void initiateRead() {
400+
for(int i = 0; i < ranges.size(); ++i) {
401+
FileRange range = ranges.get(i);
402+
buffers[i] = allocateRelease.getBuffer(false, range.getLength());
403+
channel.read(buffers[i], range.getOffset(), i, this);
404+
}
363405
}
364406

407+
/**
408+
* Callback for a completed full/partial read.
409+
* <p>
410+
* For an EOF the number of bytes may be -1.
411+
* That is mapped to a {@link #failed(Throwable, Integer)} outcome.
412+
* @param result The bytes read.
413+
* @param rangeIndex range index within the range list.
414+
*/
365415
@Override
366-
public void completed(Integer result, Integer r) {
367-
FileRange range = ranges.get(r);
368-
ByteBuffer buffer = buffers[r];
416+
public void completed(Integer result, Integer rangeIndex) {
417+
FileRange range = ranges.get(rangeIndex);
418+
ByteBuffer buffer = buffers[rangeIndex];
369419
if (result == -1) {
370-
failed(new EOFException("Read past End of File"), r);
420+
// no data was read back.
421+
failed(new EOFException("Read past End of File"), rangeIndex);
371422
} else {
372423
if (buffer.remaining() > 0) {
373424
// issue a read for the rest of the buffer
374-
// QQ: What if this fails? It has the same handler.
375-
channel.read(buffer, range.getOffset() + buffer.position(), r, this);
425+
channel.read(buffer, range.getOffset() + buffer.position(), rangeIndex, this);
376426
} else {
377-
// QQ: Why is this required? I think because we don't want the
378-
// user to read data beyond limit.
427+
// Flip the buffer and declare success.
379428
buffer.flip();
380429
range.getData().complete(buffer);
381430
}
382431
}
383432
}
384433

434+
/**
435+
* The read of the range failed.
436+
* <p>
437+
* Release the buffer supplied for this range, then
438+
* report to the future as {{completeExceptionally(exc)}}
439+
* @param exc exception.
440+
* @param rangeIndex range index within the range list.
441+
*/
385442
@Override
386-
public void failed(Throwable exc, Integer r) {
387-
LOG.debug("Failed while reading range {} ", r, exc);
388-
ranges.get(r).getData().completeExceptionally(exc);
443+
public void failed(Throwable exc, Integer rangeIndex) {
444+
LOG.debug("Failed while reading range {} ", rangeIndex, exc);
445+
// release the buffer
446+
allocateRelease.putBuffer(buffers[rangeIndex]);
447+
// report the failure.
448+
ranges.get(rangeIndex).getData().completeExceptionally(exc);
389449
}
450+
390451
}
391452

392453
@Override

0 commit comments

Comments
 (0)