4242import org .apache .hadoop .fs .store .StoreDurationInfo ;
4343import org .apache .hadoop .fs .store .StoreEntryPoint ;
4444import org .apache .hadoop .fs .tools .csv .SimpleCsvWriter ;
45+ import org .apache .hadoop .io .IOUtils ;
4546import org .apache .hadoop .util .ToolRunner ;
4647
47- import static org .apache .hadoop .fs .s3a .Invoker .once ;
48- import static org .apache .hadoop .fs .s3a .extra .S3ListingSupport .keyToPath ;
4948import static org .apache .hadoop .fs .store .CommonParameters .DEFINE ;
5049import static org .apache .hadoop .fs .store .CommonParameters .LIMIT ;
5150import static org .apache .hadoop .fs .store .CommonParameters .TOKENFILE ;
@@ -64,23 +63,28 @@ public class ListVersions extends StoreEntryPoint {
6463
6564 public static final String QUIET = "q" ;
6665
66+ public static final String SEPARATOR = "separator" ;
67+
6768
6869 public static final String USAGE
69- = "Usage: listobjects <path>\n "
70+ = "Usage: listversions <path>\n "
7071 + optusage (DEFINE , "key=value" , "Define a property" )
7172 + optusage (LIMIT , "limit" , "limit of files to list" )
7273 + optusage (OUTPUT , "file" , "output file" )
7374 + optusage (QUIET , "quiet output" )
75+ + optusage (SEPARATOR , "string" , "Separator if not <tab>" )
7476 + optusage (TOKENFILE , "file" , "Hadoop token file to load" )
7577 + optusage (VERBOSE , "print verbose output" )
7678 + optusage (XMLFILE , "file" , "XML config file to load" );
7779
80+ public static final String NAME = "listversions" ;
81+
7882 public ListVersions () {
7983 createCommandFormat (1 , 1 ,
8084 DELETE ,
8185 QUIET ,
8286 VERBOSE );
83- addValueOptions (TOKENFILE , XMLFILE , DEFINE , LIMIT , OUTPUT );
87+ addValueOptions (TOKENFILE , XMLFILE , DEFINE , LIMIT , OUTPUT , SEPARATOR );
8488 }
8589
8690 @ Override
@@ -101,10 +105,10 @@ public int run(String[] args) throws Exception {
101105 S3AFileSystem fs = null ;
102106 final Path source = new Path (paths .get (0 ));
103107 try (StoreDurationInfo duration = new StoreDurationInfo (LOG ,
104- "listobjects" )) {
108+ NAME )) {
105109 fs = (S3AFileSystem ) source .getFileSystem (conf );
106110
107- final AmazonS3 s3 = fs .getAmazonS3ClientForTesting ("listobjects" );
111+ final AmazonS3 s3 = fs .getAmazonS3ClientForTesting (NAME );
108112 String key = S3ListingSupport .pathToKey (source );
109113 ListVersionsRequest request = S3ListingSupport .createListVersionsRequest (
110114 source .toUri ().getHost (), key , null );
@@ -113,45 +117,91 @@ public int run(String[] args) throws Exception {
113117 = new ListVersionsIterator (s3 , source , request );
114118
115119 int objectCount = 0 ;
116- long size = 0 ;
120+ long totalSize = 0 ;
117121 heading ("Listing %s" , source );
118-
119- final String output = getOption (OUTPUT );
120- PrintStream dest ;
121- boolean closeOutput ;
122- if (output != null ) {
123- // writing to a dir
124- final Path destPath = new Path (output );
125- final FileSystem destFS = destPath .getFileSystem (conf );
126- final FSDataOutputStream dataOutputStream = destFS .createFile (destPath )
127- .overwrite (true )
128- .recursive ()
129- .build ();
130- dest = new PrintStream (dataOutputStream );
131- closeOutput = true ;
132- println ("Saving output to %s" , destPath );
122+ SummaryWriter writer ;
123+
124+
125+ if (!quiet ) {
126+ final String output = getOption (OUTPUT );
127+ PrintStream dest ;
128+ boolean closeOutput ;
129+ if (output != null ) {
130+ // writing to a dir
131+ final Path destPath = new Path (output );
132+ final FileSystem destFS = destPath .getFileSystem (conf );
133+ final FSDataOutputStream dataOutputStream = destFS .createFile (destPath )
134+ .overwrite (true )
135+ .recursive ()
136+ .build ();
137+ dest = new PrintStream (dataOutputStream );
138+ closeOutput = true ;
139+ println ("Saving output to %s" , destPath );
140+ } else {
141+ dest = getOut ();
142+ closeOutput = false ;
143+ }
144+ final String separator = getOptional (SEPARATOR ).orElse ("\t " );
145+ writer = new CsvVersionWriter (dest , closeOutput , separator );
133146 } else {
134- dest = getOut ();
135- closeOutput = false ;
147+ writer = new SummaryWriter ();
136148 }
137- try (CsvVersionWriter writer = new CsvVersionWriter (dest , closeOutput )) {
149+ long tombstones = 0 ;
150+ long fileTombstones = 0 ;
151+ long hidden = 0 ;
152+ long hiddenData = 0 ;
153+ long hiddenZeroByteFiles = 0 ;
154+ long dirMarkers = 0 ;
155+ long hiddenDirMarkers = 0 ;
156+ try {
138157
139158 boolean finished = false ;
140159 while (!finished && objects .hasNext ()) {
141160 final VersionListing page = objects .next ();
142161 for (S3VersionSummary summary : page .getVersionSummaries ()) {
143162 objectCount ++;
144- if (limit > 0 && objectCount >= limit ) {
163+ if (limit > 0 && objectCount > limit ) {
145164 finished = true ;
146165 break ;
166+ } else {
167+ final long size = summary .getSize ();
168+ totalSize += size ;
169+ writer .write (summary ,
170+ fs .keyToQualifiedPath (summary .getKey ()));
171+ final boolean isDirMarker = isDirMarker (summary );
172+ dirMarkers += result (isDirMarker );
173+ if (summary .isDeleteMarker ()) {
174+ tombstones ++;
175+ fileTombstones += result (!isDirMarker );
176+
177+ } else {
178+ if (!summary .isLatest ()) {
179+ if (!isDirMarker ) {
180+ hidden ++;
181+ hiddenData += size ;
182+ hiddenZeroByteFiles += result (size == 0 );
183+ } else {
184+ hiddenDirMarkers ++;
185+ }
186+ }
187+ }
147188 }
148- size += summary .getSize ();
149- writer .write (summary );
150189 }
151190 }
191+ } finally {
192+ writer .close ();
193+
152194 }
153- println ("found %d objects with total size %d bytes" , objectCount , size );
154195
196+ println ();
197+ println ("Found %,d objects under %s with total size %,d bytes" , objectCount , source , totalSize );
198+ println ("Hidden file count %,d with hidden data size %,d bytes" ,
199+ hidden , hiddenData );
200+ println ("Hidden zero-byte file count %,d" , hiddenZeroByteFiles );
201+ println ("Hidden directory markers %,d" , hiddenDirMarkers );
202+ println ("Tombstone entries %,d comprising %,d files and %,d dir markers" ,
203+ tombstones , fileTombstones , tombstones - fileTombstones );
204+ println ();
155205
156206 } finally {
157207 maybeDumpStorageStatistics (fs );
@@ -160,16 +210,37 @@ public int run(String[] args) throws Exception {
160210 return 0 ;
161211 }
162212
213+ private int result (boolean b ) {
214+ return b ? 1 : 0 ;
215+ }
216+
217+ private static class SummaryWriter implements Closeable {
218+
219+ void write (S3VersionSummary summary , Path path ) throws IOException {
220+
221+ }
222+ @ Override
223+ public void close () throws IOException {
224+
225+ }
226+ }
227+
228+
163229 /**
164230 * write to csv; pulled out to make writing to avro etc easier in future.
165231 */
166- private static final class CsvVersionWriter implements Closeable {
232+ private static final class CsvVersionWriter extends SummaryWriter {
233+
167234 private final SimpleCsvWriter csv ;
235+
168236 private final DateFormat df = new SimpleDateFormat ("yyyy-MM-ddZhh:mm:ss" );
169237
170- private CsvVersionWriter (final OutputStream out , final boolean closeOutput ) throws IOException {
171- csv = new SimpleCsvWriter (out , "\t " , "\n " , true , closeOutput );
238+ long index = 0 ;
239+
240+ private CsvVersionWriter (final OutputStream out , final boolean closeOutput , String separator ) throws IOException {
241+ csv = new SimpleCsvWriter (out , separator , "\n " , true , closeOutput );
172242 csv .columns (
243+ "index" ,
173244 "key" ,
174245 "path" ,
175246 "restore" ,
@@ -190,25 +261,34 @@ public void close() throws IOException {
190261 csv .close ();
191262 }
192263
193- void write (S3VersionSummary summary ) throws IOException {
194- String k = summary .getKey ();
195- csv .column (k );
196- csv .column (keyToPath (k ));
197- csv .column (false );
198- csv .column (summary .isLatest ());
199- csv .column (summary .getSize ());
200- csv .column (summary .isDeleteMarker ());
201- csv .column (S3ListingSupport .objectRepresentsDirectory (k , summary .getSize ()));
264+ void write (S3VersionSummary summary , Path path ) throws IOException {
265+ final boolean deleteMarker = summary .isDeleteMarker ();
266+ final boolean dirMarker = isDirMarker (summary );
267+ csv .columnL (++index );
268+ csv .column (summary .getKey ());
269+ csv .column (path );
270+ csv .columnB (!deleteMarker && !dirMarker );
271+ csv .columnB (summary .isLatest ());
272+ csv .columnL (summary .getSize ());
273+ csv .columnB (deleteMarker );
274+ csv .columnB (dirMarker );
202275 final Date lastModified = summary .getLastModified ();
203276 csv .column (df .format (lastModified ));
204- csv .column (lastModified .getTime ());
205- csv .column (summary .getVersionId ());
277+ csv .columnL (lastModified .getTime ());
278+ final String versionId = summary .getVersionId ();
279+ csv .column ("null" .equals (versionId ) ? "" : versionId );
206280 csv .column (summary .getETag ());
207281 csv .newline ();
208282 }
209283
284+ private long getIndex () {
285+ return index ;
286+ }
210287 }
211288
289+ private static boolean isDirMarker (final S3VersionSummary summary ) {
290+ return S3ListingSupport .objectRepresentsDirectory (summary .getKey (), summary .getSize ());
291+ }
212292
213293
214294 /**
0 commit comments