Skip to content

Commit 8fc9238

Browse files
authored
[b/368220129] Make hdfs content summary task not fail hard on AccessControlException(s) (#615)
1 parent 19a1d17 commit 8fc9238

File tree

1 file changed

+18
-9
lines changed

1 file changed

+18
-9
lines changed

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/hdfs/HdfsContentSummaryTask.java

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package com.google.edwmigration.dumper.application.dumper.connector.hdfs;
1818

19+
import static com.google.edwmigration.dumper.application.dumper.connector.hdfs.SingleDirScanJob.trimExceptionMessage;
1920
import static java.lang.String.format;
2021
import static java.nio.charset.StandardCharsets.UTF_8;
2122

@@ -33,17 +34,21 @@
3334
import org.apache.hadoop.fs.FileStatus;
3435
import org.apache.hadoop.fs.Path;
3536
import org.apache.hadoop.hdfs.DistributedFileSystem;
37+
import org.slf4j.Logger;
38+
import org.slf4j.LoggerFactory;
3639

3740
public class HdfsContentSummaryTask extends AbstractTask<Void> implements ContentSummaryFormat {
3841

42+
private static final Logger LOG = LoggerFactory.getLogger(HdfsContentSummaryTask.class);
43+
3944
HdfsContentSummaryTask() {
4045
super(ZIP_ENTRY_NAME);
4146
}
4247

4348
@Override
4449
public String toString() {
4550
return format(
46-
"Write content summary of the top-level directories of the HDFS path '%s'",
51+
"Write content summary of the top-level directories of the HDFS path to '%s'",
4752
getTargetPath());
4853
}
4954

@@ -64,14 +69,18 @@ protected Void doRun(TaskRunContext context, @Nonnull ByteSink sink, @Nonnull Ha
6469
final CSVPrinter csvPrinter = FORMAT.withHeader(Header.class).print(output)) {
6570
for (FileStatus file : topLevelFiles) {
6671
if (file.isDirectory()) {
67-
ContentSummary summary = fs.getContentSummary(file.getPath());
68-
long totalFileSize = summary.getLength();
69-
long totalNumberOfFiles = summary.getFileCount();
70-
long totalNumberOfDirectories = summary.getDirectoryCount();
71-
csvPrinter.printRecord(
72-
file.getPath().toUri().getPath(),
73-
totalFileSize,
74-
totalNumberOfDirectories + totalNumberOfFiles);
72+
try {
73+
ContentSummary summary = fs.getContentSummary(file.getPath());
74+
long totalFileSize = summary.getLength();
75+
long totalNumberOfFiles = summary.getFileCount();
76+
long totalNumberOfDirectories = summary.getDirectoryCount();
77+
csvPrinter.printRecord(
78+
file.getPath().toUri().getPath(),
79+
totalFileSize,
80+
totalNumberOfDirectories + totalNumberOfFiles);
81+
} catch (org.apache.hadoop.security.AccessControlException exn) {
82+
LOG.error("AccessControlException: {}", trimExceptionMessage(exn.getMessage()));
83+
}
7584
}
7685
}
7786
}

0 commit comments

Comments
 (0)