Skip to content

Commit ad3517f

Browse files
committed
listversions improvements
* includes summary data in output * ability to specify separator in -out file * -q skips the printing * docs
1 parent 1090cad commit ad3517f

5 files changed

Lines changed: 291 additions & 62 deletions

File tree

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,10 @@ Usage: listobjects <path>
537537
-xmlfile <file> XML config file to load
538538
```
539539

540+
## Command `listversions`
541+
542+
See [listversions](./src/main/site/listversions.md).
543+
540544
## Command `localhost`
541545

542546
Print out localhost information from java APIs and then the hadoop network APIs.

src/main/extra/org/apache/hadoop/fs/s3a/extra/ListVersions.java

Lines changed: 123 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,9 @@
4242
import org.apache.hadoop.fs.store.StoreDurationInfo;
4343
import org.apache.hadoop.fs.store.StoreEntryPoint;
4444
import org.apache.hadoop.fs.tools.csv.SimpleCsvWriter;
45+
import org.apache.hadoop.io.IOUtils;
4546
import org.apache.hadoop.util.ToolRunner;
4647

47-
import static org.apache.hadoop.fs.s3a.Invoker.once;
48-
import static org.apache.hadoop.fs.s3a.extra.S3ListingSupport.keyToPath;
4948
import static org.apache.hadoop.fs.store.CommonParameters.DEFINE;
5049
import static org.apache.hadoop.fs.store.CommonParameters.LIMIT;
5150
import static org.apache.hadoop.fs.store.CommonParameters.TOKENFILE;
@@ -64,23 +63,28 @@ public class ListVersions extends StoreEntryPoint {
6463

6564
public static final String QUIET = "q";
6665

66+
public static final String SEPARATOR = "separator";
67+
6768

6869
public static final String USAGE
69-
= "Usage: listobjects <path>\n"
70+
= "Usage: listversions <path>\n"
7071
+ optusage(DEFINE, "key=value", "Define a property")
7172
+ optusage(LIMIT, "limit", "limit of files to list")
7273
+ optusage(OUTPUT, "file", "output file")
7374
+ optusage(QUIET, "quiet output")
75+
+ optusage(SEPARATOR, "string", "Separator if not <tab>")
7476
+ optusage(TOKENFILE, "file", "Hadoop token file to load")
7577
+ optusage(VERBOSE, "print verbose output")
7678
+ optusage(XMLFILE, "file", "XML config file to load");
7779

80+
public static final String NAME = "listversions";
81+
7882
public ListVersions() {
7983
createCommandFormat(1, 1,
8084
DELETE,
8185
QUIET,
8286
VERBOSE);
83-
addValueOptions(TOKENFILE, XMLFILE, DEFINE, LIMIT, OUTPUT);
87+
addValueOptions(TOKENFILE, XMLFILE, DEFINE, LIMIT, OUTPUT,SEPARATOR);
8488
}
8589

8690
@Override
@@ -101,10 +105,10 @@ public int run(String[] args) throws Exception {
101105
S3AFileSystem fs = null;
102106
final Path source = new Path(paths.get(0));
103107
try (StoreDurationInfo duration = new StoreDurationInfo(LOG,
104-
"listobjects")) {
108+
NAME)) {
105109
fs = (S3AFileSystem) source.getFileSystem(conf);
106110

107-
final AmazonS3 s3 = fs.getAmazonS3ClientForTesting("listobjects");
111+
final AmazonS3 s3 = fs.getAmazonS3ClientForTesting(NAME);
108112
String key = S3ListingSupport.pathToKey(source);
109113
ListVersionsRequest request = S3ListingSupport.createListVersionsRequest(
110114
source.toUri().getHost(), key, null);
@@ -113,45 +117,91 @@ public int run(String[] args) throws Exception {
113117
= new ListVersionsIterator(s3, source, request);
114118

115119
int objectCount = 0;
116-
long size = 0;
120+
long totalSize = 0;
117121
heading("Listing %s", source);
118-
119-
final String output = getOption(OUTPUT);
120-
PrintStream dest;
121-
boolean closeOutput;
122-
if (output != null) {
123-
// writing to a dir
124-
final Path destPath = new Path(output);
125-
final FileSystem destFS = destPath.getFileSystem(conf);
126-
final FSDataOutputStream dataOutputStream = destFS.createFile(destPath)
127-
.overwrite(true)
128-
.recursive()
129-
.build();
130-
dest = new PrintStream(dataOutputStream);
131-
closeOutput = true;
132-
println("Saving output to %s", destPath);
122+
SummaryWriter writer;
123+
124+
125+
if (!quiet) {
126+
final String output = getOption(OUTPUT);
127+
PrintStream dest;
128+
boolean closeOutput;
129+
if (output != null) {
130+
// writing to a dir
131+
final Path destPath = new Path(output);
132+
final FileSystem destFS = destPath.getFileSystem(conf);
133+
final FSDataOutputStream dataOutputStream = destFS.createFile(destPath)
134+
.overwrite(true)
135+
.recursive()
136+
.build();
137+
dest = new PrintStream(dataOutputStream);
138+
closeOutput = true;
139+
println("Saving output to %s", destPath);
140+
} else {
141+
dest = getOut();
142+
closeOutput = false;
143+
}
144+
final String separator = getOptional(SEPARATOR).orElse("\t");
145+
writer = new CsvVersionWriter(dest, closeOutput, separator);
133146
} else {
134-
dest = getOut();
135-
closeOutput = false;
147+
writer = new SummaryWriter();
136148
}
137-
try (CsvVersionWriter writer = new CsvVersionWriter(dest, closeOutput)) {
149+
long tombstones = 0;
150+
long fileTombstones = 0;
151+
long hidden = 0;
152+
long hiddenData = 0;
153+
long hiddenZeroByteFiles = 0;
154+
long dirMarkers = 0;
155+
long hiddenDirMarkers = 0;
156+
try {
138157

139158
boolean finished = false;
140159
while (!finished && objects.hasNext()) {
141160
final VersionListing page = objects.next();
142161
for (S3VersionSummary summary : page.getVersionSummaries()) {
143162
objectCount++;
144-
if (limit > 0 && objectCount >= limit) {
163+
if (limit > 0 && objectCount > limit) {
145164
finished = true;
146165
break;
166+
} else {
167+
final long size = summary.getSize();
168+
totalSize += size;
169+
writer.write(summary,
170+
fs.keyToQualifiedPath(summary.getKey()));
171+
final boolean isDirMarker = isDirMarker(summary);
172+
dirMarkers += result(isDirMarker);
173+
if (summary.isDeleteMarker()) {
174+
tombstones++;
175+
fileTombstones += result(!isDirMarker);
176+
177+
} else {
178+
if (!summary.isLatest()) {
179+
if (!isDirMarker) {
180+
hidden++;
181+
hiddenData += size;
182+
hiddenZeroByteFiles += result(size == 0);
183+
} else {
184+
hiddenDirMarkers++;
185+
}
186+
}
187+
}
147188
}
148-
size += summary.getSize();
149-
writer.write(summary);
150189
}
151190
}
191+
} finally {
192+
writer.close();
193+
152194
}
153-
println("found %d objects with total size %d bytes", objectCount, size);
154195

196+
println();
197+
println("Found %,d objects under %s with total size %,d bytes", objectCount, source, totalSize);
198+
println("Hidden file count %,d with hidden data size %,d bytes",
199+
hidden, hiddenData);
200+
println("Hidden zero-byte file count %,d", hiddenZeroByteFiles);
201+
println("Hidden directory markers %,d", hiddenDirMarkers);
202+
println("Tombstone entries %,d comprising %,d files and %,d dir markers",
203+
tombstones, fileTombstones, tombstones - fileTombstones);
204+
println();
155205

156206
} finally {
157207
maybeDumpStorageStatistics(fs);
@@ -160,16 +210,37 @@ public int run(String[] args) throws Exception {
160210
return 0;
161211
}
162212

213+
private int result(boolean b) {
214+
return b ? 1 : 0;
215+
}
216+
217+
private static class SummaryWriter implements Closeable {
218+
219+
void write(S3VersionSummary summary, Path path) throws IOException {
220+
221+
}
222+
@Override
223+
public void close() throws IOException {
224+
225+
}
226+
}
227+
228+
163229
/**
164230
* write to csv; pulled out to make writing to avro etc easier in future.
165231
*/
166-
private static final class CsvVersionWriter implements Closeable {
232+
private static final class CsvVersionWriter extends SummaryWriter {
233+
167234
private final SimpleCsvWriter csv;
235+
168236
private final DateFormat df = new SimpleDateFormat("yyyy-MM-ddZhh:mm:ss");
169237

170-
private CsvVersionWriter(final OutputStream out, final boolean closeOutput) throws IOException {
171-
csv = new SimpleCsvWriter(out, "\t", "\n", true, closeOutput);
238+
long index = 0;
239+
240+
private CsvVersionWriter(final OutputStream out, final boolean closeOutput, String separator) throws IOException {
241+
csv = new SimpleCsvWriter(out, separator, "\n", true, closeOutput);
172242
csv.columns(
243+
"index",
173244
"key",
174245
"path",
175246
"restore",
@@ -190,25 +261,34 @@ public void close() throws IOException {
190261
csv.close();
191262
}
192263

193-
void write(S3VersionSummary summary) throws IOException {
194-
String k = summary.getKey();
195-
csv.column(k);
196-
csv.column(keyToPath(k));
197-
csv.column(false);
198-
csv.column(summary.isLatest());
199-
csv.column(summary.getSize());
200-
csv.column(summary.isDeleteMarker());
201-
csv.column(S3ListingSupport.objectRepresentsDirectory(k, summary.getSize()));
264+
void write(S3VersionSummary summary, Path path) throws IOException {
265+
final boolean deleteMarker = summary.isDeleteMarker();
266+
final boolean dirMarker = isDirMarker(summary);
267+
csv.columnL(++index);
268+
csv.column(summary.getKey());
269+
csv.column(path);
270+
csv.columnB(!deleteMarker && !dirMarker);
271+
csv.columnB(summary.isLatest());
272+
csv.columnL(summary.getSize());
273+
csv.columnB(deleteMarker);
274+
csv.columnB(dirMarker);
202275
final Date lastModified = summary.getLastModified();
203276
csv.column(df.format(lastModified));
204-
csv.column(lastModified.getTime());
205-
csv.column(summary.getVersionId());
277+
csv.columnL(lastModified.getTime());
278+
final String versionId = summary.getVersionId();
279+
csv.column("null".equals(versionId) ? "" : versionId);
206280
csv.column(summary.getETag());
207281
csv.newline();
208282
}
209283

284+
private long getIndex() {
285+
return index;
286+
}
210287
}
211288

289+
private static boolean isDirMarker(final S3VersionSummary summary) {
290+
return S3ListingSupport.objectRepresentsDirectory(summary.getKey(), summary.getSize());
291+
}
212292

213293

214294
/**

src/main/java/org/apache/hadoop/fs/tools/csv/MkCSV.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,16 +183,16 @@ public int run(String[] args) throws Exception {
183183
String dataRow = blockData.get(r % blockCount);
184184
int length = Math.min(lastElt, elements);
185185
String data = dataRow.substring(length);
186-
writer.column(data.length());
186+
writer.columnL(data.length());
187187
// data CRC
188188
CRC32 crc = new CRC32();
189189
crc.update(data.getBytes(StandardCharsets.UTF_8));
190-
writer.column(crc.getValue());
190+
writer.columnL(crc.getValue());
191191
writer.column(data);
192192
// repeat the row ID
193193
writer.column(rowId);
194194
// full row checksum
195-
writer.column(writer.getRowCrc());
195+
writer.columnL(writer.getRowCrc());
196196
// end of row
197197
writer.column(END);
198198
writer.newline();

0 commit comments

Comments
 (0)