Skip to content

Commit 135f1bf

Browse files
committed
Abfs and s3a diagnostics to included detailed analysis of the local buffer dir
Reviews state of buffer dir, parent dirs, identifies permission issues, then tries to create a temporary file through the LocalDirAllocator, just as the filesystems will
1 parent 8710dd2 commit 135f1bf

10 files changed

Lines changed: 181 additions & 40 deletions

src/main/java/help.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@ public static void main(String[] args) {
5757
printCommand("committerinfo", "Print committer information");
5858
printCommand("deleteobject", "Delete an S3 object");
5959
printCommand("gcscreds", "credential diagnostics for GCS. Warning: logs secrets");
60-
printCommand("iampolicy", "generate IAM policy");
60+
printCommand("iampolicy", "generate AWS IAM policy");
6161
printCommand("listobjects", "list S3 objects and their translated statuses");
6262
printCommand("regions\t", "emulate region lookup of AWS SDK");
63-
printCommand("sessionkeys", "optional extra: generate session keys");
63+
printCommand("sessionkeys", "generate AWS session keys from a long-lived login");
6464
println("%nThese are only available on some builds and require a compatible hadoop release");
6565

6666
println("");

src/main/java/org/apache/hadoop/fs/store/diag/ADLDiagnosticsInfo.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ public List<URI> listEndpointsToProbe(final Configuration conf)
8888

8989
@Override
9090
protected void validateConfig(final Printout printout,
91-
final Configuration conf)
91+
final Configuration conf, final boolean writeOperations)
9292
throws IOException {
93-
super.validateConfig(printout, conf);
93+
super.validateConfig(printout, conf, writeOperations);
9494
warnOnInvalidDomain(printout, ".azuredatalakestore.net",
9595
"https://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.6.5/bk_cloud-data-access/content/adls-uri.html");
9696
}

src/main/java/org/apache/hadoop/fs/store/diag/AbfsDiagnosticsInfo.java

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@
3131
import org.apache.hadoop.conf.Configuration;
3232
import org.apache.hadoop.fs.FileSystem;
3333
import org.apache.hadoop.fs.Path;
34+
import org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations;
3435
import org.apache.hadoop.fs.store.StoreDurationInfo;
3536

3637
import static org.apache.hadoop.fs.store.diag.CapabilityKeys.*;
38+
import static org.apache.hadoop.fs.store.diag.OptionSets.HADOOP_TMP_DIR;
3739

3840
/**
3941
* Abfs diagnostics.
@@ -47,6 +49,43 @@ public class AbfsDiagnosticsInfo extends StoreDiagnosticsInfo {
4749

4850
public static final String FS_AZURE_READAHEADQUEUE_DEPTH = "fs.azure.readaheadqueue.depth";
4951

52+
53+
/**
54+
* Buffer directory path for uploading AbfsOutputStream data blocks.
55+
* Value: {@value}
56+
*/
57+
public static final String FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR =
58+
"fs.azure.buffer.dir";
59+
60+
/**
61+
* What data block buffer to use.
62+
* <br>
63+
* Options include: "disk"(Default), "array", and "bytebuffer".
64+
* <br>
65+
* Default is {@link FileSystemConfigurations#DATA_BLOCKS_BUFFER_DEFAULT}.
66+
* Value: {@value}
67+
*/
68+
public static final String DATA_BLOCKS_BUFFER =
69+
"fs.azure.data.blocks.buffer";
70+
71+
/**
72+
* Maximum Number of blocks a single output stream can have
73+
* active (uploading, or queued to the central FileSystem
74+
* instance's pool of queued operations.
75+
* This stops a single stream overloading the shared thread pool.
76+
* {@value}
77+
* <p>
78+
* Default is {@link FileSystemConfigurations#BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT}
79+
*/
80+
public static final String FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS =
81+
"fs.azure.block.upload.active.blocks";
82+
83+
/**
84+
* Limit of queued block upload operations before writes
85+
* block for an OutputStream. Value: {@value}
86+
*/
87+
public static final int BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT = 20;
88+
5089
private static final Object[][] options = {
5190

5291
{"abfs.external.authorization.class", false, false},
@@ -81,6 +120,9 @@ public class AbfsDiagnosticsInfo extends StoreDiagnosticsInfo {
81120
{"fs.azure.createRemoteFileSystemDuringInitialization", false, false},
82121
{"fs.azure.custom.token.fetch.retry.count", false, false},
83122
{"fs.azure.delegation.token.provider.type", false, false},
123+
{DATA_BLOCKS_BUFFER, false, false},
124+
{FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR, false, false},
125+
{FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS, false, false},
84126
{"fs.azure.disable.outputstream.flush", false, false},
85127
{"fs.azure.enable.abfslistiterator", false, false},
86128
{"fs.azure.enable.autothrottling", false, false},
@@ -287,7 +329,7 @@ protected void addAccountOption(
287329
add(list, key + "." + getFsURI().getHost(), secret, sensitive);
288330
}
289331
}
290-
332+
291333
@Override
292334
public String[] getClassnames(final Configuration conf) {
293335
return classnames;
@@ -304,11 +346,24 @@ public String[] getOptionalPathCapabilites() {
304346

305347
@Override
306348
protected void validateConfig(final Printout printout,
307-
final Configuration conf)
349+
final Configuration conf,
350+
final boolean writeOperations)
308351
throws IOException {
309-
super.validateConfig(printout, conf);
352+
super.validateConfig(printout, conf, writeOperations);
310353
warnOnInvalidDomain(printout, ".dfs.core.windows.net",
311354
"https://docs.microsoft.com/en-us/azure/storage/data-lake-storage/introduction-abfs-uri");
355+
356+
// look at block buffering
357+
printout.heading("Output Buffering");
358+
final String buffering = conf.getTrimmed(DATA_BLOCKS_BUFFER, "disk");
359+
int activeBlocks = conf.getInt(FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS,
360+
BLOCK_UPLOAD_ACTIVE_BLOCKS_DEFAULT);
361+
printout.println("Written data is buffered to %s with up to %d blocks queued per stream",
362+
buffering, activeBlocks);
363+
if ("disk".equals(buffering)) {
364+
validateBufferDir(printout, conf, FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR, HADOOP_TMP_DIR,
365+
writeOperations);
366+
}
312367
}
313368

314369
@Override

src/main/java/org/apache/hadoop/fs/store/diag/GCSDiagnosticsInfo.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,9 @@ public List<URI> listOptionalEndpointsToProbe(final Configuration conf)
220220

221221
@Override
222222
protected void validateConfig(final Printout printout,
223-
final Configuration conf)
223+
final Configuration conf, final boolean writeOperations)
224224
throws IOException {
225-
super.validateConfig(printout, conf);
225+
super.validateConfig(printout, conf, writeOperations);
226226

227227
// now print everything fs.s3a.ext, assuming that
228228
// there are no secrets in it. Don't do that.

src/main/java/org/apache/hadoop/fs/store/diag/OptionSets.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ public class OptionSets {
155155
MOZILLA_PUBLIC_SUFFIX_LIST
156156
};
157157

158+
public static final String HADOOP_TMP_DIR = "hadoop.tmp.dir";
159+
158160

159161
/**
160162
* The enhanced {@code openFile()} options.

src/main/java/org/apache/hadoop/fs/store/diag/S3ADiagnosticsInfo.java

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
package org.apache.hadoop.fs.store.diag;
2020

21-
import java.io.File;
2221
import java.io.IOException;
2322
import java.lang.reflect.InvocationTargetException;
2423
import java.lang.reflect.Method;
@@ -33,7 +32,6 @@
3332

3433
import org.apache.hadoop.conf.Configuration;
3534
import org.apache.hadoop.fs.FileSystem;
36-
import org.apache.hadoop.fs.LocalDirAllocator;
3735
import org.apache.hadoop.fs.Path;
3836
import org.apache.hadoop.fs.store.StoreExitException;
3937

@@ -77,11 +75,11 @@ public class S3ADiagnosticsInfo extends StoreDiagnosticsInfo {
7775
public static final String ASSUMED_ROLE_STS_ENDPOINT
7876
= "fs.s3a.assumed.role.sts.endpoint";
7977

80-
public static final String HADOOP_TMP_DIR = "hadoop.tmp.dir";
81-
8278
//use a custom endpoint?
8379
public static final String ENDPOINT = "fs.s3a.endpoint";
80+
8481
public static final String DEFAULT_ENDPOINT = "";
82+
8583
public static final String REGION = "fs.s3a.endpoint.region";
8684

8785
//Enable path style access? Overrides default virtual hosting
@@ -504,13 +502,13 @@ public Configuration patchConfigurationToInitalization(
504502
Method m = aClass.getMethod("propagateBucketOptions",
505503
Configuration.class,
506504
String.class);
507-
return (Configuration)m.invoke(null, conf, getFsURI().getHost());
505+
return (Configuration) m.invoke(null, conf, getFsURI().getHost());
508506
} catch (ClassNotFoundException e) {
509507
LOG.error("S3AUtils not found: hadoop-aws is not on the classpath", e);
510508
// this will carry on elsewhere
511509
} catch (NoSuchMethodException
512-
| IllegalAccessException
513-
| InvocationTargetException e) {
510+
| IllegalAccessException
511+
| InvocationTargetException e) {
514512
LOG.info("S3AUtils.propagateBucketOptions() not found; assume old Hadoop version");
515513
}
516514
return conf;
@@ -583,22 +581,15 @@ public List<URI> listOptionalEndpointsToProbe(final Configuration conf)
583581

584582
@Override
585583
protected void validateConfig(final Printout printout,
586-
final Configuration conf) throws IOException {
587-
String bufferOption = conf.get(BUFFER_DIR) != null
588-
? BUFFER_DIR : HADOOP_TMP_DIR;
584+
final Configuration conf,
585+
final boolean writeOperations) throws IOException {
589586
printout.heading("S3A Config validation");
590587

591-
printout.println("Buffer configuration option %s = %s",
592-
bufferOption, conf.get(bufferOption));
588+
printout.heading("Output Buffering");
589+
validateBufferDir(printout, conf, BUFFER_DIR, OptionSets.HADOOP_TMP_DIR,
590+
writeOperations);
593591

594-
final LocalDirAllocator directoryAllocator = new LocalDirAllocator(
595-
bufferOption);
596-
597-
File temp = directoryAllocator.createTmpFileForWrite("temp", 1, conf);
598-
599-
printout.println("Temporary files created in %s",
600-
temp.getParentFile());
601-
temp.delete();
592+
printout.heading("Encryption");
602593

603594
String encryption =
604595
conf.get("fs.s3a.server-side-encryption-algorithm", "").trim();
@@ -649,7 +640,8 @@ protected void validateConfig(final Printout printout,
649640
if (endpoint.startsWith("https:") || endpoint.startsWith("http:")) {
650641
printout.warn("Value of %s looks like a URL: %s", ENDPOINT, endpoint);
651642
printout.println("It SHOULD normally be a hostname or IP address");
652-
printout.println("Unless you have a private store with a non-standard port or are using AWS S3 PrivateLink");
643+
printout.println("Unless you have a private store with a non-standard port or"
644+
+ " are using AWS S3 PrivateLink");
653645
if (!pathStyleAccess) {
654646
printout.warn("You should probably set %s to true", PATH_STYLE_ACCESS);
655647
}
@@ -701,7 +693,8 @@ protected void validateConfig(final Printout printout,
701693

702694
if (!dtbinding.isEmpty()) {
703695
printout.println("Delegation token binding %s is active", dtbinding);
704-
printout.println("This will take over authentication from the settings in %s", AWS_CREDENTIALS_PROVIDER);
696+
printout.println("This will take over authentication from the settings in %s",
697+
AWS_CREDENTIALS_PROVIDER);
705698
} else {
706699
// TODO: analyse default values.
707700
}
@@ -752,14 +745,13 @@ protected void performanceHints(
752745
DIRECTORY_MARKER_RETENTION);
753746
hint(printout,
754747
"org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore"
755-
.equals(conf.get("fs.s3a.metadatastore.impl","")),
748+
.equals(conf.get("fs.s3a.metadatastore.impl", "")),
756749
"S3Guard is no longer needed -decommission it");
757750

758751

759752
reviewReadPolicy(printout, conf);
760753

761754

762-
763755
// TODO look at output buffer options
764756
}
765757

src/main/java/org/apache/hadoop/fs/store/diag/StoreDiag.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,8 @@ public int run(String[] args, PrintStream stream) throws Exception {
194194

195195
// and its FS URI
196196
storeInfo = bindToStore(path.toUri());
197+
final boolean writeOperations = hasOption(WRITE);
198+
197199
printHadoopVersionInfo();
198200
printOSVersion();
199201
if (hasOption(SYSPROPS)) {
@@ -217,7 +219,7 @@ public int run(String[] args, PrintStream stream) throws Exception {
217219
printStoreConfiguration();
218220
probeRequiredAndOptionalClasses(hasOption(OPTIONAL));
219221
probeRequiredAndOptionalResources();
220-
storeInfo.validateConfig(this, getConf());
222+
storeInfo.validateConfig(this, getConf(), writeOperations);
221223
printPerformanceHints();
222224
probeForFileSystemClass(storeInfo.getScheme());
223225
if (storeInfo.printTLSInfo()) {
@@ -226,7 +228,7 @@ public int run(String[] args, PrintStream stream) throws Exception {
226228
probeAllEndpoints();
227229

228230
// and the filesystem operations
229-
executeFileSystemOperations(path, hasOption(WRITE));
231+
executeFileSystemOperations(path, writeOperations);
230232

231233
// dump JVM status
232234
printJVMStats();

0 commit comments

Comments
 (0)