Skip to content

Commit 3c30f64

Browse files
committed
1. add strict mode
2. bump version to 0.1.3
1 parent 83865bf commit 3c30f64

File tree

9 files changed

+61
-34
lines changed

9 files changed

+61
-34
lines changed

README.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@ Remove text stamps of **any font**, **any encoding** and **any language** with p
2525

2626
## Usage
2727
```
28-
Usage:
28+
Usage:
2929
[OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])
3030
[OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])
3131
3232
Options:
33-
-d, --directly directly modify the input file(s), which makes option o/O unnecessary
33+
-d, --directly directly modify the input file(s), option o/O is
34+
unnecessary when this option is on
3435
-r, --recursive process files in the given dir recursively
36+
-s, --strict use strict mode, a text area is considered as water mark
37+
only if its content strictly equals one of the keywords
3538
```
3639

3740
## Get it now
@@ -48,10 +51,10 @@ Make sure you have `wget` installed.
4851
#### Run
4952
```shell
5053
# For single file processing
51-
➜ unstamp -i "C Recipes.pdf" -o "C Recipes.unstamped.pdf" -k www.allitebooks.com
54+
➜ unstamp -i "C Recipes.pdf" -o "C Recipes.unstamped.pdf" -k www.allitebooks.com -s
5255
➜ unstamp -i RoR.pdf -o RoR.unstamped.pdf -k 图灵社区会员
5356
# Or
54-
➜ unstamp -i "C Recipes.pdf" -d -k www.allitebooks.com
57+
➜ unstamp -i "C Recipes.pdf" -d -k www.allitebooks.com -s
5558
➜ unstamp -i RoR.pdf -d -k 图灵社区会员
5659

5760
# For massive files processing

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<modelVersion>4.0.0</modelVersion>
77
<groupId>com.amastigote</groupId>
88
<artifactId>unstamper</artifactId>
9-
<version>0.1.2</version>
9+
<version>0.1.3</version>
1010
<description>Text stamp remover for PDF files.</description>
1111
<name>pdf-unstamper</name>
1212
<url>https://github.com/hwding/pdf-unstamper</url>

script/install

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ user_bin=`echo ~`"/bin/"
88
locl_bin="/usr/local/bin/"
99
jar_name="pdf-unstamper.jar"
1010
exe_name="unstamp"
11-
_version="0.1.2"
11+
_version="0.1.3"
1212
jar_durl="https://github.com/hwding/pdf-unstamper/releases/download/$_version/$jar_name"
1313
wrapper="#!/bin/bash\njava -jar ${user_bin}${jar_name} \"\$@\"\n"
1414

src/com/amastigote/unstamper/Main.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
AUTH | hwding
3-
DATE | Sep 05 2017
3+
DATE | Sep 10 2017
44
DESC | text stamp remover for PDF files
55
66
GITH | github.com/hwding
@@ -37,7 +37,8 @@ public static void main(@NotNull String[] args) {
3737
System.exit(0);
3838
} else {
3939
TaskRunner.init(
40-
commandLine.getOptionValues('k'));
40+
commandLine.getOptionValues('k'),
41+
commandLine.hasOption('s'));
4142

4243
if (commandLine.hasOption('i') && (commandLine.hasOption('o') || commandLine.hasOption('d'))) {
4344
if (commandLine.hasOption('d'))

src/com/amastigote/unstamper/core/Processor.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
AUTH | hwding
3-
DATE | Sep 05 2017
3+
DATE | Sep 10 2017
44
DESC | text stamp remover for PDF files
55
66
GITH | github.com/hwding
@@ -27,7 +27,8 @@
2727
public class Processor {
2828
public static void process(
2929
@NotNull File file,
30-
@NotNull String[] strings) {
30+
@NotNull String[] strings,
31+
@NotNull boolean useStrict) {
3132
AtomicBoolean processAllOk = new AtomicBoolean(true);
3233
GeneralLogger.Processor.procInProgress(file.getName());
3334

@@ -61,7 +62,7 @@ public static void process(
6162
if (e instanceof COSString) {
6263
/* Ignore Any Exception During Parallel Processing */
6364
try {
64-
if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts))
65+
if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts, useStrict))
6566
((COSString) e).setValue(new byte[0]);
6667
} catch (Exception ignored) {
6768
}
Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
AUTH | hwding
3-
DATE | Sep 05 2017
3+
DATE | Sep 10 2017
44
DESC | text stamp remover for PDF files
55
66
GITH | github.com/hwding
@@ -18,15 +18,16 @@ class TextStampRecognizer {
1818
private static boolean recognizeWithFont(
1919
@NotNull String[] keywords,
2020
@NotNull byte[] inputText,
21-
@NotNull Set<PDFont> pdFonts) {
22-
String bs = generateByteString(inputText);
21+
@NotNull Set<PDFont> pdFonts,
22+
@NotNull boolean useStrict) {
23+
String encodedInput = generateByteString(inputText);
2324
for (PDFont f : pdFonts) {
2425
if (f == null) continue;
2526
for (String k : keywords) {
2627
try {
27-
byte[] encodedKeywords = f.encode(k);
28-
if (bs.contains(generateByteString(encodedKeywords)))
29-
return true;
28+
byte[] encodedKeywordBytes = f.encode(k);
29+
final String encodedKeyword = generateByteString(encodedKeywordBytes);
30+
if (checkDuplicate(encodedInput, encodedKeyword, useStrict)) return true;
3031
} catch (IOException | IllegalArgumentException ignored) {
3132
}
3233
}
@@ -36,26 +37,38 @@ private static boolean recognizeWithFont(
3637

3738
private static boolean recognizePlain(
3839
@NotNull String[] keywords,
39-
@NotNull byte[] inputText
40+
@NotNull byte[] inputText,
41+
@NotNull boolean useStrict
4042
) {
41-
for (String k : keywords) {
42-
if (new String(inputText).contains(k)) return true;
43+
for (String k : keywords)
44+
if (checkDuplicate(new String(inputText), k, useStrict)) return true;
45+
return false;
46+
}
47+
48+
private static boolean checkDuplicate(
49+
@NotNull String input,
50+
@NotNull String keyword,
51+
@NotNull boolean useStrict) {
52+
if (useStrict) {
53+
if (input.equals(keyword)) return true;
54+
} else {
55+
if (input.contains(keyword)) return true;
4356
}
4457
return false;
4558
}
4659

4760
static boolean recognize(@NotNull String[] keywords,
4861
@NotNull byte[] inputText,
49-
@NotNull Set<PDFont> pdFonts) {
50-
return recognizePlain(keywords, inputText) ||
51-
recognizeWithFont(keywords, inputText, pdFonts);
62+
@NotNull Set<PDFont> pdFonts,
63+
@NotNull boolean useStrict) {
64+
return recognizePlain(keywords, inputText, useStrict) ||
65+
recognizeWithFont(keywords, inputText, pdFonts, useStrict);
5266
}
5367

5468
private static String generateByteString(@NotNull byte[] bytes) {
5569
StringBuilder stringBuilder = new StringBuilder();
56-
for (byte b : bytes) {
70+
for (byte b : bytes)
5771
stringBuilder.append(Byte.toString(b));
58-
}
5972
return stringBuilder.toString();
6073
}
6174
}

src/com/amastigote/unstamper/log/GeneralLogger.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
AUTH | hwding
3-
DATE | Sep 05 2017
3+
DATE | Sep 10 2017
44
DESC | text stamp remover for PDF files
55
66
GITH | github.com/hwding
@@ -12,13 +12,16 @@
1212
public class GeneralLogger {
1313
public static class Help {
1414
private static final String usage =
15-
"\nPDF-UnStamper ver. 0.1.2 by hwding@GitHub\n" +
15+
"\nPDF-UnStamper ver. 0.1.3 by hwding@GitHub\n" +
1616
"\nUsage:" +
1717
"\n [OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])" +
1818
"\n [OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])\n" +
1919
"\nOptions:" +
20-
"\n -d, --directly directly modify the input file(s), which makes option o/O unnecessary" +
21-
"\n -r, --recursive process files in the given dir recursively\n";
20+
"\n -d, --directly directly modify the input file(s), option o/O is\n" +
21+
" unnecessary when this option is on" +
22+
"\n -r, --recursive process files in the given dir recursively" +
23+
"\n -s, --strict use strict mode, a text area is considered as water mark\n" +
24+
" only if its content strictly equals one of the keywords\n";
2225

2326
public static void print() {
2427
System.out.println(usage);
@@ -61,7 +64,7 @@ public static void procInProgress(@NotNull String fn) {
6164
}
6265

6366
public static void procFinished() {
64-
System.out.println(" GOOD");
67+
System.out.println(" done");
6568
}
6669
}
6770
}

src/com/amastigote/unstamper/util/OptionManager.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
AUTH | hwding
3-
DATE | Aug 25 2017
3+
DATE | Sep 10 2017
44
DESC | text stamp remover for PDF files
55
66
GITH | github.com/hwding
@@ -18,6 +18,7 @@ public class OptionManager {
1818
private final static Option optionK = new Option("k", true, null);
1919
private final static Option optionD = new Option("d", "directly", false, null);
2020
private final static Option optionR = new Option("r", "recursive", false, null);
21+
private final static Option optionS = new Option("s", "strict", false, null);
2122

2223
public static Options buildOptions() {
2324
Options options = new Options();
@@ -34,6 +35,7 @@ public static Options buildOptions() {
3435
options.addOption(optionD);
3536
options.addOption(optionR);
3637
options.addOption(optionK);
38+
options.addOption(optionS);
3739
return options;
3840
}
3941
}

src/com/amastigote/unstamper/util/TaskRunner.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
AUTH | hwding
3-
DATE | Sep 05 2017
3+
DATE | Sep 10 2017
44
DESC | text stamp remover for PDF files
55
66
GITH | github.com/hwding
@@ -19,9 +19,13 @@
1919

2020
public class TaskRunner {
2121
private static String[] keywords;
22+
private static boolean useStrict;
2223

23-
public static void init(@NotNull String[] keywords) {
24+
public static void init(
25+
@NotNull String[] keywords,
26+
@NotNull boolean useStrict) {
2427
TaskRunner.keywords = keywords;
28+
TaskRunner.useStrict = useStrict;
2529
}
2630

2731
public static void procSingleFile(
@@ -42,7 +46,7 @@ public static void procSingleFile(
4246
}
4347

4448
private static void submitToProcessor(@NotNull File file) {
45-
Processor.process(file, keywords);
49+
Processor.process(file, keywords, useStrict);
4650
}
4751

4852
public static void procSingleFileDirectly(@NotNull String ifn) {

0 commit comments

Comments
 (0)