Skip to content

Commit 3a52b88

Browse files
committed
ValidateTool: Implement multithreaded validation
1 parent f6c9c59 commit 3a52b88

File tree

1 file changed

+45
-16
lines changed

1 file changed

+45
-16
lines changed

src/org/netpreserve/jwarc/tools/ValidateTool.java

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99

1010
import java.io.IOException;
1111
import java.nio.ByteBuffer;
12+
import java.nio.file.Path;
1213
import java.nio.file.Paths;
1314
import java.security.DigestException;
1415
import java.security.MessageDigest;
1516
import java.security.NoSuchAlgorithmException;
17+
import java.util.ArrayList;
1618
import java.util.List;
1719
import java.util.Optional;
20+
import java.util.concurrent.ForkJoinPool;
1821
import java.util.concurrent.atomic.AtomicBoolean;
1922
import java.util.concurrent.atomic.AtomicLong;
2023
import java.util.function.Consumer;
@@ -253,6 +256,23 @@ record = reader.next().orElse(null);
253256
return warcValidates;
254257
}
255258

259+
private boolean validate(Path warcFile) {
260+
try (WarcReader reader = new WarcReader(warcFile)) {
261+
reader.calculateBlockDigest();
262+
if (verbose)
263+
System.out.println("Validating " + warcFile);
264+
if (!validate(reader)) {
265+
System.err.println("Failed to validate " + warcFile);
266+
return false;
267+
}
268+
return true;
269+
} catch (IOException e) {
270+
System.err.println("Exception validating " + warcFile + ": " + e);
271+
e.printStackTrace();
272+
return false;
273+
}
274+
}
275+
256276
private static void usage(int exitValue) {
257277
System.err.println("");
258278
System.err.println("ValidateTool [-h] [-v] filename...");
@@ -270,15 +290,17 @@ private static void usage(int exitValue) {
270290
System.exit(exitValue);
271291
}
272292

273-
public static void main(String[] args) throws IOException {
293+
public static void main(String[] args) throws Exception {
274294
int res = 0;
275295
boolean verbose = false;
276296
boolean headerValidation = true;
277297
boolean forbidExtensions = false;
298+
int threads = Runtime.getRuntime().availableProcessors();
299+
List<Path> warcFiles = new ArrayList<>();
278300
if (args.length == 0)
279301
usage(0);
280-
for (String arg : args) {
281-
switch (arg) {
302+
for (int i = 0; i < args.length; i++) {
303+
switch (args[i]) {
282304
case "--no-header-validation":
283305
headerValidation = false;
284306
break;
@@ -289,26 +311,33 @@ public static void main(String[] args) throws IOException {
289311
case "--help":
290312
usage(0);
291313
break;
314+
case "-j":
315+
case "--threads":
316+
threads = Integer.parseInt(args[++i]);
317+
break;
292318
case "-v":
293319
case "--verbose":
294320
verbose = true;
295321
break;
296322
default:
297-
ValidateTool validator = new ValidateTool(verbose);
298-
if (headerValidation) {
299-
validator.headerValidator = HeaderValidator.warc_1_1(forbidExtensions);
300-
}
301-
try (WarcReader reader = new WarcReader(Paths.get(arg))) {
302-
reader.calculateBlockDigest();
303-
if (verbose)
304-
System.out.println("Validating " + arg);
305-
if (!validator.validate(reader)) {
306-
System.err.println("Failed to validate " + arg);
307-
res = 1;
308-
}
309-
}
323+
warcFiles.add(Paths.get(args[i]));
310324
}
311325
}
326+
327+
ValidateTool validator = new ValidateTool(verbose);
328+
if (headerValidation) {
329+
validator.headerValidator = HeaderValidator.warc_1_1(forbidExtensions);
330+
}
331+
332+
ForkJoinPool pool = new ForkJoinPool(threads);
333+
try {
334+
res = pool.submit(() -> warcFiles.parallelStream()
335+
.map(validator::validate)
336+
.anyMatch(valid -> !valid) ? 1 : 0).get();
337+
} finally {
338+
pool.shutdown();
339+
}
340+
312341
System.exit(res);
313342
}
314343

0 commit comments

Comments
 (0)