Skip to content

Commit 187662c

Browse files
Sören RöttgerSören Röttger
authored andcommitted
Merge branch 'release/1.0.12'
* release/1.0.12: bumped version to 1.0.12 extends error message with response-content-type fixes pdf-validation so that bom marker is detected and removed before validation fixes pdf-validation so that bom marker is detected and removed before validation fixes pdf-validation so that bom marker is detected and removed before validation fixes pdf-validation so that bom marker is detected and removed before validation Update spring boot dependency to patch vulnerable transitive dependencies Version bump Get classname for Logger dynamically to allow sub-classes to be respected in logging next dev version 1.0.12-SNAPSHOT next dev version 1.0.11-SNAPSHOT
2 parents 9a6e1bf + 44b2049 commit 187662c

File tree

11 files changed

+161
-80
lines changed

11 files changed

+161
-80
lines changed

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55

66
<groupId>de.tk.opensource</groupId>
77
<artifactId>3rdparty-privacy-proxy</artifactId>
8-
<version>1.0.11</version>
8+
<version>1.0.12</version>
99
<packaging>jar</packaging>
1010
<name>3rd Party Privacy Proxy (Open Source)</name>
1111

1212
<parent>
1313
<groupId>org.springframework.boot</groupId>
1414
<artifactId>spring-boot-starter-parent</artifactId>
15-
<version>2.2.0.RELEASE</version>
15+
<version>2.4.2</version>
1616
</parent>
1717

1818
<description>

src/main/java/de/tk/opensource/privacyproxy/delivery/AssetDeliveryController.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
@RequestMapping(value = UrlPattern.Contexts.DELIVERY)
3030
public abstract class AssetDeliveryController {
3131

32-
private static final Logger LOGGER = LoggerFactory.getLogger(AssetDeliveryController.class);
32+
protected final Logger LOGGER = LoggerFactory.getLogger(getClass());
3333

3434
@Autowired
3535
private ResourceLoader resourceLoader;

src/main/java/de/tk/opensource/privacyproxy/retrieval/AssetRetrievalService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
*/
2323
public abstract class AssetRetrievalService implements InitializingBean {
2424

25-
private static final Logger LOGGER = LoggerFactory.getLogger(AssetRetrievalService.class);
25+
protected final Logger LOGGER = LoggerFactory.getLogger(getClass());
2626

2727
/**
2828
* Configure where the files will be stored on the file system.

src/main/java/de/tk/opensource/privacyproxy/retrieval/AssetRetryRetrievalService.java

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/*--- (C) 1999-2019 Techniker Krankenkasse ---*/
1+
/*--- (C) 1999-2021 Techniker Krankenkasse ---*/
22

33
package de.tk.opensource.privacyproxy.retrieval;
44

@@ -16,19 +16,22 @@
1616
import java.util.zip.ZipInputStream;
1717

1818
import org.apache.commons.io.IOUtils;
19+
import org.slf4j.Logger;
20+
import org.slf4j.LoggerFactory;
1921
import org.springframework.beans.factory.annotation.Value;
2022
import org.springframework.retry.annotation.Backoff;
2123
import org.springframework.retry.annotation.Retryable;
2224
import org.springframework.stereotype.Service;
2325

2426
import de.tk.opensource.privacyproxy.config.RetrievalEndpoint;
25-
import de.tk.opensource.privacyproxy.util.PDFCorruptedException;
2627
import de.tk.opensource.privacyproxy.util.PDFHelper;
2728
import de.tk.opensource.privacyproxy.util.ProxyHelper;
2829

2930
@Service
3031
public class AssetRetryRetrievalService {
3132

33+
private final Logger LOGGER = LoggerFactory.getLogger(getClass());
34+
3235
/**
3336
* Configure where the files will be stored on the file system.
3437
*/
@@ -46,9 +49,7 @@ public AssetRetryRetrievalService(ProxyHelper proxyHelper) {
4649
backoff = @Backoff(delay = 3000),
4750
maxAttempts = 4
4851
)
49-
void retrieveAsset(String provider, RetrievalEndpoint endpoint) throws IOException,
50-
PDFCorruptedException
51-
{
52+
void retrieveAsset(String provider, RetrievalEndpoint endpoint) throws IOException {
5253
final URL url = new URL(endpoint.getRemoteUrlWithCacheBuster());
5354
final URLConnection connection = url.openConnection(proxyHelper.selectProxy(url));
5455
connection.setRequestProperty("User-Agent", "3rd Party Privacy Proxy");
@@ -60,31 +61,7 @@ void retrieveAsset(String provider, RetrievalEndpoint endpoint) throws IOExcepti
6061
} else {
6162
try(final InputStream httpInputStream = connection.getInputStream()) {
6263
if (endpoint.getFilename().endsWith(".pdf")) {
63-
64-
// copy stream to check for pdf validity, afterwards use another new stream to handle download
65-
try(final ByteArrayOutputStream copiedStream = new ByteArrayOutputStream()) {
66-
IOUtils.copy(httpInputStream, copiedStream);
67-
if (!PDFHelper.isPdf(copiedStream.toByteArray())) {
68-
throw new PDFCorruptedException(
69-
String.format(
70-
"The requested resource %s wasn't a valid pdf file. Maybe the endpoint has an error "
71-
+ "and therefore the pdf content is the content of a maintenance site",
72-
endpoint.getRemoteUrl()
73-
)
74-
);
75-
} // Looks like we have a valid pdf. Download it...
76-
try(
77-
final ByteArrayInputStream inputStream =
78-
new ByteArrayInputStream(copiedStream.toByteArray())
79-
) {
80-
retrieveFileByChannel(
81-
provider,
82-
endpoint,
83-
inputStream,
84-
originalFileSize
85-
);
86-
}
87-
}
64+
retrievePdf(provider, endpoint, httpInputStream, originalFileSize, connection);
8865
} else {
8966
retrieveFileByChannel(provider, endpoint, httpInputStream, originalFileSize);
9067
}
@@ -131,6 +108,29 @@ void retrieveZip(
131108
}
132109
}
133110

111+
void retrievePdf(
112+
String provider,
113+
RetrievalEndpoint endpoint,
114+
InputStream httpInputStream,
115+
long originalFileSize,
116+
URLConnection connection
117+
) throws IOException
118+
{
119+
byte[] bytes = IOUtils.toByteArray(httpInputStream);
120+
if (PDFHelper.isPdf(bytes)) {
121+
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes);
122+
retrieveFileByChannel(provider, endpoint, byteArrayInputStream, originalFileSize);
123+
} else {
124+
LOGGER.error(
125+
"The requested resource {} wasn't a valid pdf file. Content-type was {}."
126+
+ " Maybe the endpoint has an error and therefore the pdf content is the content"
127+
+ " of a maintenance site",
128+
endpoint.getRemoteUrl(),
129+
connection.getContentType()
130+
);
131+
}
132+
}
133+
134134
void retrieveFileByChannel(
135135
final String provider,
136136
final RetrievalEndpoint endpoint,

src/main/java/de/tk/opensource/privacyproxy/routing/RoutingHandler.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,16 @@
4747
* use blacklisting. If the service delivers a response, this also has to be kind of whitelisted.
4848
* Cookies will be set by this service and thus will always be 1st party! You have to implement your
4949
* own RoutingHandler per provider. E.g. you could write a RoutingProvider to proxy traffic to an
50-
* external Matomo instance.
51-
* REQUIREMENT: You have to be able to configure the 3rd Party JS to talk to this service URL
52-
* instead of their server directly. If they don't allow this without patching their code by
53-
* yourself, look for another service provider. There is no technical requirement for not allowing
54-
* this.
50+
* external Matomo instance. REQUIREMENT: You have to be able to configure the 3rd Party JS to talk
51+
* to this service URL instead of their server directly. If they don't allow this without patching
52+
* their code by yourself, look for another service provider. There is no technical requirement for
53+
* not allowing this.
5554
*/
5655
@Controller
5756
@RequestMapping(value = UrlPattern.Contexts.PROXY)
5857
public abstract class RoutingHandler {
5958

60-
private static final Logger LOGGER = LoggerFactory.getLogger(RoutingHandler.class);
59+
protected final Logger LOGGER = LoggerFactory.getLogger(getClass());
6160

6261
private static final String[] DEFAULT_RETURN_VALUE = new String[0];
6362

@@ -350,9 +349,10 @@ protected CookieNameMatchType getCookieNameMatchType() {
350349
* Transform the given query parameter before appending it to the request. The default
351350
* implementation applies percent-encoding to the value.
352351
*
353-
* @param name query parameter name
354-
* @param value query parameter value
355-
* @return encoded parameter
352+
* @param name query parameter name
353+
* @param value query parameter value
354+
*
355+
* @return encoded parameter
356356
*/
357357
protected String transformQueryParam(String name, String value) {
358358
return RequestUtils.urlencode(value);

src/main/java/de/tk/opensource/privacyproxy/util/PDFCorruptedException.java

Lines changed: 0 additions & 7 deletions
This file was deleted.
Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,83 @@
1+
/*--- (C) 1999-2019 Techniker Krankenkasse ---*/
2+
13
package de.tk.opensource.privacyproxy.util;
24

5+
import java.io.ByteArrayInputStream;
6+
import java.util.Arrays;
7+
38
public class PDFHelper {
49

5-
/**
6-
* Check if a byte array is an PDF file.
7-
* PDF files starts with magic numbers '%PDF-' and ends with '%%EOF'.
8-
* A File that does not contains PDF magic numbers is therefore not a PDF file.
9-
* @param data file byte array
10-
* @return true if byte array looks like a pdf, otherwise false
11-
*/
12-
public static boolean isPdf(byte[] data) {
13-
if (data != null && data.length > 4 &&
14-
data[0] == 0x25 && // %
15-
data[1] == 0x50 && // P
16-
data[2] == 0x44 && // D
17-
data[3] == 0x46 && // F
18-
data[4] == 0x2D) { // -
19-
int count = 0;
20-
int offset = data.length - 8; // check last 8 bytes for %%EOF with optional white-space
21-
while (offset < data.length) {
22-
if (count == 0 && data[offset] == 0x25) count++; // %
23-
if (count == 1 && data[offset] == 0x25) count++; // %
24-
if (count == 2 && data[offset] == 0x45) count++; // E
25-
if (count == 3 && data[offset] == 0x4F) count++; // O
26-
if (count == 4 && data[offset] == 0x46) count++; // F
27-
offset++;
28-
}
29-
return count == 5;
30-
}
31-
32-
return false;
33-
}
34-
}
10+
private PDFHelper() {
11+
}
12+
13+
private static final int[] UTF8_BYTE_ORDER_MARK = { 239, 187, 191 };
14+
15+
/**
16+
* Check if a byte array is an PDF file. PDF files starts with magic numbers '%PDF-' and ends
17+
* with '%%EOF'. A File that does not contains PDF magic numbers is therefore not a PDF file.
18+
*
19+
* @param data file byte array
20+
*
21+
* @return true if byte array looks like a pdf, otherwise false
22+
*/
23+
public static boolean isPdf(byte[] data) {
24+
data = removeBomMarker(data);
25+
if (
26+
data != null
27+
&& data.length > 4
28+
&& data[0] == 0x25 // %
29+
&& data[1] == 0x50 // P
30+
&& data[2] == 0x44 // D
31+
&& data[3] == 0x46 // F
32+
&& data[4] == 0x2D
33+
) {
34+
int count = 0;
35+
int offset = data.length - 8; // check last 8 bytes for %%EOF with optional white-space
36+
while (offset < data.length) {
37+
if (count == 0 && data[offset] == 0x25) {
38+
count++; // %
39+
}
40+
if (count == 1 && data[offset] == 0x25) {
41+
count++; // %
42+
}
43+
if (count == 2 && data[offset] == 0x45) {
44+
count++; // E
45+
}
46+
if (count == 3 && data[offset] == 0x4F) {
47+
count++; // O
48+
}
49+
if (count == 4 && data[offset] == 0x46) {
50+
count++; // F
51+
}
52+
offset++;
53+
}
54+
return count == 5;
55+
}
56+
return false;
57+
}
58+
59+
/**
60+
* Check if a bom marker is at the beginning of the file. If there is a bom marker, the bom
61+
* marker is removed from the byte array.
62+
*
63+
* @param data file byte array
64+
*
65+
* @return file byte array free of any bom markers
66+
*/
67+
private static byte[] removeBomMarker(byte[] data) {
68+
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(data);
69+
int[] bomTestArr = new int[UTF8_BYTE_ORDER_MARK.length];
70+
71+
for (int index = 0; index < UTF8_BYTE_ORDER_MARK.length; ++index) {
72+
bomTestArr[index] = byteArrayInputStream.read();
73+
}
74+
boolean isBomMarked = Arrays.equals(bomTestArr, UTF8_BYTE_ORDER_MARK);
75+
76+
if (isBomMarked) {
77+
data = Arrays.copyOfRange(data, UTF8_BYTE_ORDER_MARK.length, data.length);
78+
}
79+
return data;
80+
}
81+
}
82+
83+
/*--- Formatiert nach TK Code Konventionen vom 05.03.2002 ---*/
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*--- (C) 1999-2019 Techniker Krankenkasse ---*/
2+
3+
package de.tk.opensource.privacyproxy.util;
4+
5+
import java.io.IOException;
6+
7+
import org.apache.commons.io.IOUtils;
8+
import org.junit.Test;
9+
import org.springframework.core.io.ClassPathResource;
10+
11+
import static org.junit.Assert.assertFalse;
12+
import static org.junit.Assert.assertTrue;
13+
14+
public class PDFHelperTest {
15+
@Test
16+
public void testPdfWithBomMarker() throws IOException {
17+
ClassPathResource classPathResource =
18+
new ClassPathResource("testPdfs/pdfWithBomMarker.pdf");
19+
byte[] bytes = IOUtils.toByteArray(classPathResource.getInputStream());
20+
assertTrue(PDFHelper.isPdf(bytes));
21+
}
22+
23+
@Test
24+
public void testStandardPdf() throws IOException {
25+
ClassPathResource classPathResource = new ClassPathResource("testPdfs/testPdf.pdf");
26+
byte[] bytes = IOUtils.toByteArray(classPathResource.getInputStream());
27+
assertTrue(PDFHelper.isPdf(bytes));
28+
}
29+
30+
@Test
31+
public void testCorruptPdf() throws IOException {
32+
ClassPathResource classPathResource =
33+
new ClassPathResource("testPdfs/shellBinaryAsPdf.pdf");
34+
byte[] bytes = IOUtils.toByteArray(classPathResource.getInputStream());
35+
assertFalse(PDFHelper.isPdf(bytes));
36+
}
37+
}
38+
39+
/*--- Formatiert nach TK Code Konventionen vom 05.03.2002 ---*/
7.47 KB
Binary file not shown.
25 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)