Skip to content

Commit c3e3228

Browse files
HADOOP-19229. S3A/ABFS: Vector IO on cloud storage: increase threshold for range merging (#7281)
The thresholds at which adjacent vector IO read ranges are coalesced into a single range has been increased, as has the limit at which point they are considered large enough that parallel reads are faster. * The min/max for local filesystems and any other FS without custom support are now 16K and 1M * s3a and abfs use 128K as the minimum size, 2M for max. These values are based on the Facebook Velox paper which stated their thresholds for merging were 20K for local SSD and 500K for cloud storage Contributed by Steve Loughran
1 parent 266dad1 commit c3e3228

File tree

6 files changed

+143
-16
lines changed

6 files changed

+143
-16
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
import org.apache.hadoop.classification.InterfaceAudience;
2727
import org.apache.hadoop.classification.InterfaceStability;
2828

29+
import static org.apache.hadoop.io.Sizes.S_16K;
30+
import static org.apache.hadoop.io.Sizes.S_1M;
31+
2932
/**
3033
* Stream that permits positional reading.
3134
*
@@ -95,15 +98,15 @@ void readFully(long position, byte[] buffer, int offset, int length)
9598
* @return the minimum number of bytes
9699
*/
97100
default int minSeekForVectorReads() {
98-
return 4 * 1024;
101+
return S_16K;
99102
}
100103

101104
/**
102105
* What is the largest size that we should group ranges together as?
103106
* @return the number of bytes to read at once
104107
*/
105108
default int maxReadSizeForVectorReads() {
106-
return 1024 * 1024;
109+
return S_1M;
107110
}
108111

109112
/**
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.io;
20+
21+
import org.apache.hadoop.classification.InterfaceAudience;
22+
import org.apache.hadoop.classification.InterfaceStability;
23+
24+
/**
25+
* Sizes of binary values and other some common sizes.
26+
* This avoids having to remember the larger binary values,
27+
* and stops IDEs/style checkers complaining about numeric
28+
* values in source code.
29+
*/
30+
@InterfaceAudience.Public
31+
@InterfaceStability.Evolving
32+
public final class Sizes {
33+
34+
/** 2^8 bytes: {@value}. */
35+
public static final int S_256 = 256;
36+
37+
/** 2^9 bytes: {@value}. */
38+
public static final int S_512 = S_256 << 1;
39+
40+
/** 2^10 bytes - 1 KiB: {@value}. */
41+
public static final int S_1K = S_512 << 1;
42+
43+
/** 2^11 bytes - 1 KiB: {@value}. */
44+
public static final int S_2K = S_1K << 1;
45+
46+
/** 2^12 bytes - 2 KiB: {@value}. */
47+
public static final int S_4K = S_2K << 1;
48+
49+
/** 2^13 bytes: {@value}. */
50+
public static final int S_8K = S_4K << 1;
51+
52+
/** 2^14 bytes: {@value}. */
53+
public static final int S_16K = S_8K << 1;
54+
55+
/** 2^15 bytes: {@value}. */
56+
public static final int S_32K = S_16K << 1;
57+
58+
/** 2^16 bytes: {@value}. */
59+
public static final int S_64K = S_32K << 1;
60+
61+
/** 2^17 bytes, 128 KiB: {@value}. */
62+
public static final int S_128K = S_64K << 1;
63+
64+
/** 2^18 bytes, 256 KiB: {@value}. */
65+
public static final int S_256K = S_128K << 1;
66+
67+
/** 2^19 bytes, 512 KiB: {@value}. */
68+
public static final int S_512K = S_256K << 1;
69+
70+
/** 2^20 bytes, 1 MiB: {@value}. */
71+
public static final int S_1M = S_512K << 1;
72+
73+
/** 2^21 bytes, 2 MiB: {@value}. */
74+
public static final int S_2M = S_1M << 1;
75+
76+
/** 2^22 bytes, 4 MiB: {@value}. */
77+
public static final int S_4M = S_2M << 1;
78+
79+
/** 2^23 bytes, MiB: {@value}. */
80+
public static final int S_8M = S_4M << 1;
81+
82+
/** 2^24 bytes, MiB: {@value}. */
83+
public static final int S_16M = S_8M << 1;
84+
85+
/** 2^25 bytes, MiB: {@value}. */
86+
public static final int S_32M = S_16M << 1;
87+
88+
/** 5 MiB: {@value}. */
89+
public static final int S_5M = 5 * S_1M;
90+
91+
/** 10 MiB: {@value}. */
92+
public static final int S_10M = 10 * S_1M;
93+
94+
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

+6-3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
import java.time.Duration;
2727
import java.util.concurrent.TimeUnit;
2828

29+
import static org.apache.hadoop.io.Sizes.S_128K;
30+
import static org.apache.hadoop.io.Sizes.S_2M;
31+
2932
/**
3033
* Constants used with the {@link S3AFileSystem}.
3134
*
@@ -1545,14 +1548,14 @@ private Constants() {
15451548
"fs.s3a.vectored.read.max.merged.size";
15461549

15471550
/**
1548-
* Default minimum seek in bytes during vectored reads : {@value}.
1551+
* Default minimum seek in bytes during vectored reads: {@value}.
15491552
*/
1550-
public static final int DEFAULT_AWS_S3_VECTOR_READS_MIN_SEEK_SIZE = 4096; // 4K
1553+
public static final int DEFAULT_AWS_S3_VECTOR_READS_MIN_SEEK_SIZE = S_128K;
15511554

15521555
/**
15531556
* Default maximum read size in bytes during vectored reads : {@value}.
15541557
*/
1555-
public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE = 1048576; //1M
1558+
public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE = S_2M;
15561559

15571560
/**
15581561
* Maximum number of range reads a single input stream can have

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ on the client requirements.
6767
```xml
6868
<property>
6969
<name>fs.s3a.vectored.read.min.seek.size</name>
70-
<value>4K</value>
70+
<value>128K</value>
7171
<description>
7272
What is the smallest reasonable seek in bytes such
7373
that we group ranges together during vectored
@@ -76,7 +76,7 @@ on the client requirements.
7676
</property>
7777
<property>
7878
<name>fs.s3a.vectored.read.max.merged.size</name>
79-
<value>1M</value>
79+
<value>2M</value>
8080
<description>
8181
What is the largest merged read size in bytes such
8282
that we group ranges together during vectored read.
@@ -282,7 +282,7 @@ Fix: Use one of the dedicated [S3A Committers](committers.md).
282282

283283
## <a name="tuning"></a> Options to Tune
284284

285-
### <a name="flags"></a> Performance Flags: `fs.s3a.performance.flag`
285+
### <a name="flags"></a> Performance Flags: `fs.s3a.performance.flags`
286286

287287
This option takes a comma separated list of performance flags.
288288
View it as the equivalent of the `-O` compiler optimization list C/C++ compilers offer.

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java

+21-8
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,12 @@
6161
import static org.apache.hadoop.fs.contract.ContractTestUtils.range;
6262
import static org.apache.hadoop.fs.contract.ContractTestUtils.returnBuffersToPoolPostRead;
6363
import static org.apache.hadoop.fs.contract.ContractTestUtils.validateVectoredReadResult;
64+
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE;
65+
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_VECTOR_READS_MIN_SEEK_SIZE;
6466
import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticCounterValue;
6567
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString;
68+
import static org.apache.hadoop.io.Sizes.S_1M;
69+
import static org.apache.hadoop.io.Sizes.S_4K;
6670
import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture;
6771
import static org.apache.hadoop.test.MoreAsserts.assertEqual;
6872

@@ -139,13 +143,13 @@ public void testEOFRanges416Handling() throws Exception {
139143
public void testMinSeekAndMaxSizeConfigsPropagation() throws Exception {
140144
Configuration conf = getFileSystem().getConf();
141145
S3ATestUtils.removeBaseAndBucketOverrides(conf,
142-
Constants.AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE,
143-
Constants.AWS_S3_VECTOR_READS_MIN_SEEK_SIZE);
146+
AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE,
147+
AWS_S3_VECTOR_READS_MIN_SEEK_SIZE);
144148
S3ATestUtils.disableFilesystemCaching(conf);
145149
final int configuredMinSeek = 2 * 1024;
146150
final int configuredMaxSize = 10 * 1024 * 1024;
147-
conf.set(Constants.AWS_S3_VECTOR_READS_MIN_SEEK_SIZE, "2K");
148-
conf.set(Constants.AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE, "10M");
151+
conf.set(AWS_S3_VECTOR_READS_MIN_SEEK_SIZE, "2K");
152+
conf.set(AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE, "10M");
149153
try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) {
150154
try (FSDataInputStream fis = openVectorFile(fs)) {
151155
int newMinSeek = fis.minSeekForVectorReads();
@@ -162,8 +166,8 @@ public void testMinSeekAndMaxSizeConfigsPropagation() throws Exception {
162166
public void testMinSeekAndMaxSizeDefaultValues() throws Exception {
163167
Configuration conf = getFileSystem().getConf();
164168
S3ATestUtils.removeBaseAndBucketOverrides(conf,
165-
Constants.AWS_S3_VECTOR_READS_MIN_SEEK_SIZE,
166-
Constants.AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE);
169+
AWS_S3_VECTOR_READS_MIN_SEEK_SIZE,
170+
AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE);
167171
try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) {
168172
try (FSDataInputStream fis = openVectorFile(fs)) {
169173
int minSeek = fis.minSeekForVectorReads();
@@ -400,16 +404,25 @@ public void testMultiVectoredReadStatsCollection() throws Exception {
400404
}
401405
}
402406

407+
/**
408+
* Create a test fs with no readahead.
409+
* The vector IO ranges are set to the original small values,
410+
* so ranges on small files are not coalesced.
411+
* @return a filesystem
412+
* @throws IOException failure to instantiate.
413+
*/
403414
private S3AFileSystem getTestFileSystemWithReadAheadDisabled() throws IOException {
404415
Configuration conf = getFileSystem().getConf();
405416
// also resetting the min seek and max size values is important
406417
// as this same test suite has test which overrides these params.
407418
S3ATestUtils.removeBaseAndBucketOverrides(conf,
408419
Constants.READAHEAD_RANGE,
409-
Constants.AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE,
410-
Constants.AWS_S3_VECTOR_READS_MIN_SEEK_SIZE);
420+
AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE,
421+
AWS_S3_VECTOR_READS_MIN_SEEK_SIZE);
411422
S3ATestUtils.disableFilesystemCaching(conf);
412423
conf.setInt(Constants.READAHEAD_RANGE, 0);
424+
conf.setInt(AWS_S3_VECTOR_READS_MIN_SEEK_SIZE, S_4K);
425+
conf.setInt(AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE, S_1M);
413426
return S3ATestUtils.createTestFileSystem(conf);
414427
}
415428
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java

+14
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
import org.apache.commons.lang3.StringUtils;
2828
import org.apache.hadoop.classification.VisibleForTesting;
29+
import org.apache.hadoop.fs.PositionedReadable;
2930
import org.apache.hadoop.fs.impl.BackReference;
3031
import org.apache.hadoop.util.Preconditions;
3132

@@ -53,6 +54,8 @@
5354
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB;
5455
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.STREAM_ID_LEN;
5556
import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD;
57+
import static org.apache.hadoop.io.Sizes.S_128K;
58+
import static org.apache.hadoop.io.Sizes.S_2M;
5659
import static org.apache.hadoop.util.StringUtils.toLowerCase;
5760

5861
/**
@@ -891,4 +894,15 @@ long getLimit() {
891894
BackReference getFsBackRef() {
892895
return fsBackRef;
893896
}
897+
898+
@Override
899+
public int minSeekForVectorReads() {
900+
return S_128K;
901+
}
902+
903+
@Override
904+
public int maxReadSizeForVectorReads() {
905+
return S_2M;
906+
}
907+
894908
}

0 commit comments

Comments
 (0)