1616 */
1717package com .google .edwmigration .dumper .application .dumper .connector .hdfs ;
1818
19+ import static com .google .edwmigration .dumper .application .dumper .connector .hdfs .SingleDirScanJob .trimExceptionMessage ;
1920import static java .lang .String .format ;
2021import static java .nio .charset .StandardCharsets .UTF_8 ;
2122
3334import org .apache .hadoop .fs .FileStatus ;
3435import org .apache .hadoop .fs .Path ;
3536import org .apache .hadoop .hdfs .DistributedFileSystem ;
37+ import org .slf4j .Logger ;
38+ import org .slf4j .LoggerFactory ;
3639
3740public class HdfsContentSummaryTask extends AbstractTask <Void > implements ContentSummaryFormat {
3841
42+ private static final Logger LOG = LoggerFactory .getLogger (HdfsContentSummaryTask .class );
43+
3944 HdfsContentSummaryTask () {
4045 super (ZIP_ENTRY_NAME );
4146 }
4247
4348 @ Override
4449 public String toString () {
4550 return format (
46- "Write content summary of the top-level directories of the HDFS path '%s'" ,
51+ "Write content summary of the top-level directories of the HDFS path to '%s'" ,
4752 getTargetPath ());
4853 }
4954
@@ -64,14 +69,18 @@ protected Void doRun(TaskRunContext context, @Nonnull ByteSink sink, @Nonnull Ha
6469 final CSVPrinter csvPrinter = FORMAT .withHeader (Header .class ).print (output )) {
6570 for (FileStatus file : topLevelFiles ) {
6671 if (file .isDirectory ()) {
67- ContentSummary summary = fs .getContentSummary (file .getPath ());
68- long totalFileSize = summary .getLength ();
69- long totalNumberOfFiles = summary .getFileCount ();
70- long totalNumberOfDirectories = summary .getDirectoryCount ();
71- csvPrinter .printRecord (
72- file .getPath ().toUri ().getPath (),
73- totalFileSize ,
74- totalNumberOfDirectories + totalNumberOfFiles );
72+ try {
73+ ContentSummary summary = fs .getContentSummary (file .getPath ());
74+ long totalFileSize = summary .getLength ();
75+ long totalNumberOfFiles = summary .getFileCount ();
76+ long totalNumberOfDirectories = summary .getDirectoryCount ();
77+ csvPrinter .printRecord (
78+ file .getPath ().toUri ().getPath (),
79+ totalFileSize ,
80+ totalNumberOfDirectories + totalNumberOfFiles );
81+ } catch (org .apache .hadoop .security .AccessControlException exn ) {
82+ LOG .error ("AccessControlException: {}" , trimExceptionMessage (exn .getMessage ()));
83+ }
7584 }
7685 }
7786 }
0 commit comments