Skip to content

Commit 624780a

Browse files
committed
add bulkdelete command for bulk deletion.
* Only works with hadoop 3.4.1 or later releases with the bulkdelete API Review more bulkdelete settings in storediag performance
1 parent 6bc3267 commit 624780a

18 files changed

Lines changed: 544 additions & 19 deletions

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ Retrieves metadata from an S3 Bucket (v2 SDK only) by probing the store.
123123

124124
See [bucketmetadata](src/main/site/bucketmetadata.md) for details.
125125

126+
## Command `bulkdelete`
127+
128+
Retrieves metadata from an S3 Bucket (v2 SDK only) by probing the store.
129+
130+
See [bucketmetadata](src/main/site/bulkdelete.md) for details.
131+
126132
## Command `cloudup` -upload and download files; optimised for cloud storage
127133

128134
See [cloudup](src/main/site/cloudup.md)

src/main/java/help.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ public static void main(String[] args) {
7575
println("========================");
7676
println("");
7777
printCommand("bucketmetadata", "Print bucket metadata from store");
78+
printCommand("bulkdelete", "bulk delete objects/files");
7879
printCommand("regions2", "emulate region lookup of AWS v2 SDK");
7980
println("");
8081
println("See https://github.com/steveloughran/cloudstore");

src/main/java/org/apache/hadoop/fs/store/CommonParameters.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ public final class CommonParameters {
6767

6868
public static final String CSVFILE = "csv";
6969

70+
public static final String DEBUG = "debug";
71+
72+
public static final String DEBUG_USAGE = "debug with extra logging";
7073
public static final String FLUSH = "flush";
7174

7275
public static final String HFLUSH = "hflush";
@@ -77,11 +80,10 @@ public final class CommonParameters {
7780

7881
public static final String OVERWRITE = "overwrite";
7982

80-
public static final String UPDATE = "update";
83+
public static final String PAGE = "page";
8184

82-
public static final String DEBUG = "debug";
85+
public static final String UPDATE = "update";
8386

84-
public static final String DEBUG_USAGE = "debug with extra logging";
8587

8688
private CommonParameters() {
8789
}

src/main/java/org/apache/hadoop/fs/store/StoreDurationInfo.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
* This allows it to be used in a try-with-resources clause, and have the
3030
* duration automatically logged.
3131
*
32-
* Base on the S3A one; adds an empty constructor which doesn't do
32+
* Based on the hadoop one; adds an empty constructor which doesn't do
3333
* any logging.
3434
*/
3535
public class StoreDurationInfo

src/main/java/org/apache/hadoop/fs/store/StoreEntryPoint.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ protected Configuration createPreconfiguredConfig()
650650

651651
maybeAddXMLFileOption(conf);
652652
maybePatchDefined(conf, DEFINE);
653-
conf.set(IOSTATISTICS_LOGGING_LEVEL, "info");
653+
// conf.set(IOSTATISTICS_LOGGING_LEVEL, "info");
654654

655655
return conf;
656656
}

src/main/java/org/apache/hadoop/fs/store/StoreUtils.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,13 @@
1818

1919
package org.apache.hadoop.fs.store;
2020

21+
import java.io.BufferedReader;
22+
import java.io.File;
23+
import java.io.FileReader;
2124
import java.io.IOException;
2225
import java.lang.reflect.Array;
26+
import java.util.ArrayList;
27+
import java.util.List;
2328
import java.util.Locale;
2429
import java.util.Map;
2530
import java.util.concurrent.ExecutionException;
@@ -198,7 +203,8 @@ public static String sanitize(final String value, boolean hide) {
198203
return String.format("\"%s\" [%d]", safe, len);
199204
}
200205

201-
public static class StringPair implements Map.Entry<String, String>{
206+
public static final class StringPair implements Map.Entry<String, String> {
207+
202208
private String key, value;
203209

204210
private StringPair(final String left, final String right) {
@@ -221,4 +227,23 @@ public String setValue(final String value) {
221227
}
222228
}
223229

230+
/**
231+
* Read lines, skipping those with # or blank.
232+
* (generated by copilot.)
233+
* @param file path to file
234+
* @return the list of lines
235+
* @throws IOException failure to read
236+
*/
237+
public static List<String> readLines(File file) throws IOException {
238+
List<String> lines = new ArrayList<>();
239+
try (BufferedReader br = new BufferedReader(new FileReader(file))) {
240+
String line;
241+
while ((line = br.readLine()) != null) {
242+
if (!line.trim().isEmpty() && !line.trim().startsWith("#")) {
243+
lines.add(line);
244+
}
245+
}
246+
}
247+
return lines;
248+
}
224249
}

src/main/java/org/apache/hadoop/fs/store/commands/DirectoryTree.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ public int run(String[] args) throws Exception {
7777
List<String> paths = processArgs(args, 1, 1, USAGE);
7878
final Configuration conf = createPreconfiguredConfig();
7979

80-
int threads = getOptional(THREADS).map(Integer::valueOf).orElse(
81-
DEFAULT_THREADS);
80+
int threads = getIntOption(THREADS, DEFAULT_THREADS);
8281

8382
final Path source = new Path(paths.get(0));
8483
heading("Deleting __temporary directories under %s with thread count %d",

src/main/java/org/apache/hadoop/fs/store/commands/EnvEntry.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
/**
2222
* Environment/property entry, with
23-
* methods to convert to a given format,
23+
* methods to convert to a given format.
2424
*/
2525
public class EnvEntry {
2626
final String name;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.store.commands;

src/main/java/org/apache/hadoop/fs/store/diag/S3ADiagnosticsInfo.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ public class S3ADiagnosticsInfo extends StoreDiagnosticsInfo {
180180

181181
public static final String BULK_DELETE_PAGE_SIZE = "fs.s3a.bulk.delete.page.size";
182182

183+
public static final int BULK_DELETE_PAGE_SIZE_DEFAULT = 250;
184+
183185
public static final String CONNECTION_SSL_ENABLED = "fs.s3a.connection.ssl.enabled";
184186

185187
public static final String SERVER_SIDE_ENCRYPTION_ALGORITHM =
@@ -278,6 +280,8 @@ public class S3ADiagnosticsInfo extends StoreDiagnosticsInfo {
278280

279281
public static final String CHANNEL_MODE = "fs.s3a.ssl.channel.mode";
280282

283+
public static final String MULTIOBJECTDELETE_ENABLE = "fs.s3a.multiobjectdelete.enable";
284+
281285
private static final Object[][] options = {
282286
/* Core auth */
283287
{ACCESS_KEY, true, true},
@@ -340,7 +344,7 @@ public class S3ADiagnosticsInfo extends StoreDiagnosticsInfo {
340344
{DISABLE_CACHE, false, false},
341345
{"fs.s3a.list.version", false, false},
342346
{"fs.s3a.max.total.tasks", false, false},
343-
{"fs.s3a.multiobjectdelete.enable", false, false},
347+
{MULTIOBJECTDELETE_ENABLE, false, false},
344348
{FS_S3A_MULTIPART_SIZE, false, false},
345349
{MULTIPART_UPLOADS_ENABLED, false, false},
346350
{MULTIPART_PURGE, false, false},
@@ -1370,7 +1374,7 @@ CONNECTION_TTL, ofMinutes(5),
13701374
requestTimeout,
13711375
true, "");
13721376

1373-
printout.subheading("Other options");
1377+
printout.subheading("Misc options");
13741378
printout.println();
13751379

13761380
int bucketProbe = conf.getInt(BUCKET_PROBE, 0);
@@ -1390,6 +1394,23 @@ CONNECTION_TTL, ofMinutes(5),
13901394

13911395
reviewReadPolicy(printout, conf);
13921396

1397+
printout.subheading("Bulk Delete behavior");
1398+
boolean multi = conf.getBoolean(MULTIOBJECTDELETE_ENABLE, true);
1399+
int pageSize = conf.getInt(BULK_DELETE_PAGE_SIZE, BULK_DELETE_PAGE_SIZE_DEFAULT);
1400+
printout.println("%s = %s", MULTIOBJECTDELETE_ENABLE, multi);
1401+
printout.println("%s = %d", BULK_DELETE_PAGE_SIZE, pageSize);
1402+
printout.println();
1403+
if (multi) {
1404+
printout.println("Multi object delete is enabled; page size is %d", pageSize);
1405+
hint(printout, pageSize > 500,
1406+
"The page size is > 500. This is dangerous on heavily loaded stores\n"
1407+
+ "See HADOOP-16823. Large DeleteObject requests are their own Thundering Herd");
1408+
hint(printout, pageSize < 100,
1409+
"The page size is <100. This will slow down renames, deletes and other bulk operations.");
1410+
} else {
1411+
printout.println("Multi object delete is disabled.");
1412+
printout.println("This should only be done when working with third party stores.");
1413+
}
13931414
}
13941415

13951416
/**
@@ -1400,7 +1421,7 @@ private void reviewReadPolicy(final Printout printout,
14001421
// look at seek policy and warn of risks
14011422
final String fadvise =
14021423
conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL);
1403-
printout.heading("Seek policy: %s", fadvise);
1424+
printout.subheading("Seek policy: %s", fadvise);
14041425
switch (fadvise) {
14051426
case INPUT_FADV_NORMAL:
14061427
case OptionSets.EnhancedOpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE:

0 commit comments

Comments
 (0)