11/*
22 AUTH | hwding
3- DATE | Aug 27 2017
3+ DATE | Sep 04 2017
44 DESC | text stamp remover for PDF files
5566 GITH | github.com/hwding
77 */
88package com .amastigote .unstamper .core ;
99
1010import com .amastigote .unstamper .log .GeneralLogger ;
11- import org .apache .pdfbox .cos .COSName ;
1211import org .apache .pdfbox .cos .COSString ;
1312import org .apache .pdfbox .pdfparser .PDFStreamParser ;
1413import org .apache .pdfbox .pdfwriter .ContentStreamWriter ;
1918import java .io .File ;
2019import java .io .IOException ;
2120import java .io .OutputStream ;
22- import java .util .Collections ;
2321import java .util .HashSet ;
2422import java .util .List ;
2523import java .util .Set ;
26- import java .util .stream .Collectors ;
2724
2825public class Processor {
2926 public static void process (File file , String [] strings ) {
@@ -35,45 +32,34 @@ public static void process(File file, String[] strings) {
3532 PDDocument pdDocument = PDDocument .load (file );
3633 pdDocument .getPages ().forEach (pdPage -> {
3734 try {
38- /* START: loading font resources for further parsing */
35+ /* START: loading font resources from current page */
3936 PDFStreamParser pdfStreamParser = new PDFStreamParser (pdPage );
4037 pdfStreamParser .parse ();
4138
42- List <Object > objects =
43- Collections . synchronizedList ( pdfStreamParser . getTokens () );
39+ List <Object > objects = pdfStreamParser . getTokens ();
40+ Set < PDFont > pdFonts = new HashSet <>( );
4441
45- List <Object > cosNames =
46- objects .parallelStream ()
47- .filter (e -> e instanceof COSName )
48- .collect (Collectors .toList ());
49-
50- Set <PDFont > pdFonts =
51- Collections .synchronizedSet (new HashSet <>());
52-
53- cosNames .parallelStream ()
54- .forEach (e -> {
55- /* Ignore Any Exception During Parallel Processing */
56- try {
57- PDFont pdFont = pdPage .getResources ().getFont (((COSName ) e ));
58- if (pdFont != null )
59- pdFonts .add (pdFont );
60- } catch (Exception ignored ) {
61- }
62- });
42+ pdPage .getResources ().getFontNames ().forEach (e -> {
43+ /* Ignore Any Exception During Parallel Processing */
44+ try {
45+ PDFont pdFont = pdPage .getResources ().getFont (e );
46+ if (pdFont != null )
47+ pdFonts .add (pdFont );
48+ } catch (Exception ignored ) {
49+ }
50+ });
6351 /* END */
64- objects
65- .parallelStream ()
66- .forEach (e -> {
67- if (e instanceof COSString ) {
68- /* Ignore Any Exception During Parallel Processing */
69- try {
70- if (TextStampRecognizer .recognize (strings , ((COSString ) e ).getBytes (), pdFonts ))
71- ((COSString ) e ).setValue (new byte [0 ]);
72- } catch (Exception ignored ) {
73- }
74- }
75- }
76- );
52+
53+ objects .parallelStream ().forEach (e -> {
54+ if (e instanceof COSString ) {
55+ /* Ignore Any Exception During Parallel Processing */
56+ try {
57+ if (TextStampRecognizer .recognize (strings , ((COSString ) e ).getBytes (), pdFonts ))
58+ ((COSString ) e ).setValue (new byte [0 ]);
59+ } catch (Exception ignored ) {
60+ }
61+ }
62+ });
7763
7864 PDStream newContents = new PDStream (pdDocument );
7965 OutputStream out = newContents .createOutputStream ();
0 commit comments