Skip to content

Commit 99b7a63

Browse files
committed
Safeprefetch: document and in sync with HADOOP-18577.
1 parent c147eeb commit 99b7a63

7 files changed

Lines changed: 153 additions & 13 deletions

File tree

README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,8 @@ You can also explore what directory tree structure is most efficient here.
620620

621621
## Command `mkcsv`
622622

623-
Creates a CSV file.
623+
Creates a large CSV file designed to trigger/validate the ABFS prefetching bug which
624+
came in HADOOP-17156/
624625

625626
See [mkcsv](src/main/site/mkcsv.md)
626627

@@ -720,6 +721,14 @@ a failure to connect to the instance metadata server.
720721

721722
This is to be expected, given that the service isn't there.
722723

724+
## Command `safeprefetch`
725+
726+
Probes an abfs store for being vulnerable to
727+
prefetch data corruption, providing the configuration
728+
information to disable it if so.
729+
730+
See [safeprefetch](src/main/site/safeprefetch.md)
731+
723732
## Command `tlsinfo`
724733

725734
Print out tls information. The `storediag` command prints the same information;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.gs;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a.extra;

src/main/extra/org/apache/hadoop/fs/store/commands/CommitterInfo.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.hadoop.conf.Configuration;
2727
import org.apache.hadoop.fs.FileSystem;
2828
import org.apache.hadoop.fs.Path;
29+
import org.apache.hadoop.fs.StreamCapabilities;
2930
import org.apache.hadoop.fs.store.StoreDurationInfo;
3031
import org.apache.hadoop.fs.store.StoreEntryPoint;
3132
import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -47,6 +48,7 @@
4748
*
4849
* Prints some performance numbers at the end.
4950
*/
51+
@SuppressWarnings("InstanceofIncompatibleInterface")
5052
public class CommitterInfo extends StoreEntryPoint {
5153

5254
private static final Logger LOG = LoggerFactory.getLogger(CommitterInfo.class);
@@ -94,6 +96,11 @@ public int run(String[] args) throws Exception {
9496
" %s:\n" +
9597
" %s",
9698
committer.getClass().getCanonicalName(), committer);
99+
if (committer instanceof StreamCapabilities
100+
&& ((StreamCapabilities) committer).hasCapability(
101+
"mapreduce.job.committer.dynamic.partitioning")) {
102+
println("Committer declares support for spark dynamic partitioning");
103+
}
97104
}
98105
return 0;
99106
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.store.commands;

src/main/java/org/apache/hadoop/fs/store/abfs/SafePrefetch.java

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public int run(String[] args) throws Exception {
7575

7676
// path on the CLI
7777
Path path = new Path(argList.get(0));
78-
println("Probing %s for prefetch safety %s", path);
78+
println("\nProbing %s for prefetch safety", path);
7979

8080
FileSystem fs = path.getFileSystem(conf);
8181
if (!(fs instanceof AzureBlobFileSystem)) {
@@ -84,7 +84,7 @@ public int run(String[] args) throws Exception {
8484
return 0;
8585
}
8686
String etag_capability = "fs.capability.etags.available";
87-
String hadoop_18546 = "hadoop-18546";
87+
String readahead_safe = "fs.azure.capability.readahead.safe";
8888

8989
println("Using filesystem %s", fs.getUri());
9090
Path abfsPath = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
@@ -98,13 +98,13 @@ public int run(String[] args) throws Exception {
9898
etag_capability);
9999
return 0;
100100
}
101-
if (checker.hasPathCapability(abfsPath, hadoop_18546)) {
101+
if (checker.hasPathCapability(abfsPath, readahead_safe)) {
102102

103-
println("Filesystem has prefetch issue fixed (has path capability %s)",
104-
abfsPath, hadoop_18546);
103+
println("Filesystem %s has prefetch issue fixed (has path capability %s)",
104+
abfsPath, readahead_safe);
105105
return 0;
106106
}
107-
println("Store is vulnerable to inconsistent prefetching. This MUST be disabled");
107+
println("Store is vulnerable to inconsistent prefetching. This MUST be disabled\n");
108108
final Configuration fsConf = fs.getConf();
109109
List<EnvEntry> entries = new ArrayList<>();
110110
entries.add(new EnvEntry(FS_AZURE_READAHEADQUEUE_DEPTH, "", "0"));
@@ -130,16 +130,12 @@ public int run(String[] args) throws Exception {
130130
// no disable option; don't confuse the user by mentioning it.
131131

132132
}
133-
EnvEntry property = new EnvEntry(FS_AZURE_READAHEADQUEUE_DEPTH, "", "0");
134-
println("To disable prefetching, set %s to %s", property.getName(), property.getValue());
135-
println("\n%s\n", property.xml());
136-
println("\n%s\n", property.spark());
137133

138134
warn("Filesystem is vulnerable until prefetching is disabled");
139135
StringBuilder xml = new StringBuilder();
140-
xml.append("<configuration>\n\n");
136+
xml.append("<configuration>\n");
141137
entries.forEach(e -> xml.append(e.xml()));
142-
xml.append("\n</configuration>\n");
138+
xml.append("</configuration>\n");
143139

144140
println("hadoop XML: %n%s%n", xml);
145141

src/main/site/safeprefetch.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
<!---
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License. See accompanying LICENSE file.
13+
-->
14+
15+
# Command `safeprefetch`: validate prefetch safety of abfs client
16+
17+
Command to probe an abfs store for prefetch safety.
18+
19+
If safe: returns a status code of 0
20+
21+
If unsafe, prints configuration options to disable prefetching
22+
and then returns the exit code -1.
23+
24+
The safety probe considers an abfs store safe if *any* of the conditions are met
25+
26+
* It is from a release *before* `HADOOP-17156. Clear abfs readahead requests on stream close`,
27+
* It has the fix `HADOOP-18546. ABFS. disable purging list of in progress reads in abfs stream close()`
28+
* The `fs.azure.readaheadqueue.depth` is 0
29+
* Cloudera releases: readahead has been completely disabled
30+
(`HADOOP-18517. ABFS: Add fs.azure.enable.readahead option to disable readahead` is in all CDH releases with the bug)
31+
32+
The probe for the fix relies on `HADOOP-18577. ABFS: Add probes of readahead fix`; a pathcapabilities probe
33+
`fs.azure.capability.readahead.safe` has been added to all abfs releases with the fix.
34+
35+
## Example, probe of a (safe) hadoop 3.3.5
36+
37+
38+
```
39+
bin/hadoop jar $CLOUDSTORE safeprefetch abfs://stevel-testing@stevelukwest.dfs.core.windows.net/user
40+
41+
Probing abfs://stevel-testing@stevelukwest.dfs.core.windows.net/user for prefetch safety
42+
Using filesystem abfs://stevel-testing@stevelukwest.dfs.core.windows.net
43+
Filesystem abfs://stevel-testing@stevelukwest.dfs.core.windows.net/user has prefetch issue fixed (has path capability fs.azure.capability.readahead.safe)
44+
```
45+
46+
## Example, probe of hadoop 3.3.4 -unsafe
47+
48+
```
49+
bin/hadoop jar $CLOUDSTORE safeprefetch abfs://stevel-testing@stevelukwest.dfs.core.windows.net/user
50+
Probing abfs://stevel-testing@stevelukwest.dfs.core.windows.net/user for prefetch safety
51+
Using filesystem abfs://stevel-testing@stevelukwest.dfs.core.windows.net
52+
Store is vulnerable to inconsistent prefetching. This MUST be disabled
53+
54+
WARNING: Filesystem is vulnerable until prefetching is disabled
55+
hadoop XML:
56+
<configuration>
57+
<fs.azure.readaheadqueue.depth>
58+
0
59+
</fs.azure.readaheadqueue.depth>
60+
</configuration>
61+
62+
63+
64+
spark:
65+
spark.hadoop.fs.azure.readaheadqueue.depth 0
66+
67+
68+
2022-12-19 12:32:06,003 [main] INFO util.ExitUtil (ExitUtil.java:terminate(241)) - Exiting with status -1:
69+
70+
```
71+

0 commit comments

Comments
 (0)