Skip to content

Commit 6a509e0

Browse files
duncdrumclaude
andcommitted
[ignore] Fix Codacy field-placement and setAccessible() findings
Move XSD11_DETECTION_CACHE/XSD11_DETECTION_CACHE_MAX_ENTRIES to the top of Jaxp, alongside its other fields, instead of declaring them mid-class next to the methods that use them. Make isXsd11Schema package-private instead of private, the same pattern already used for clearXsd11DetectionCache(), so JaxpSchemaLocationSecurityTest/JaxpXsd11DetectionCacheTest (both in the same package) can call it directly instead of via setAccessible(true). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Also scopes XSD11_DETECTION_CACHE by the requesting Subject's name (a cache hit skips isXsd11Schema's permission-checked openStream() entirely, so without this a Subject lacking read permission on a schema resource could observe a boolean populated by a different Subject's earlier fetch), and replaces the hand-rolled synchronized LinkedHashMap-based LRU with a Caffeine cache (already a project dependency) for the same bound with less hand-written locking. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 708917a commit 6a509e0

3 files changed

Lines changed: 99 additions & 68 deletions

File tree

exist-core/src/main/java/org/exist/xquery/functions/validation/Jaxp.java

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,14 @@
2929
import java.nio.file.Path;
3030
import java.util.ArrayList;
3131
import java.util.HashMap;
32-
import java.util.LinkedHashMap;
3332
import java.util.List;
3433
import java.util.Locale;
3534
import java.util.Map;
3635
import java.util.Objects;
3736

37+
import com.github.benmanes.caffeine.cache.Cache;
38+
import com.github.benmanes.caffeine.cache.Caffeine;
39+
3840
import javax.annotation.Nullable;
3941
import javax.xml.parsers.ParserConfigurationException;
4042
import javax.xml.parsers.SAXParser;
@@ -109,6 +111,33 @@ public class Jaxp extends BasicFunction {
109111
private static final String XSI_NS = "http://www.w3.org/2001/XMLSchema-instance";
110112
private static final String XSD_VERSIONING_NS = "http://www.w3.org/2007/XMLSchema-versioning";
111113

114+
/**
115+
* Bound on the size of {@link #XSD11_DETECTION_CACHE} (see there for what it caches).
116+
*/
117+
private static final int XSD11_DETECTION_CACHE_MAX_ENTRIES = 256;
118+
119+
/**
120+
* Cache key for {@link #XSD11_DETECTION_CACHE}: the requesting Subject's name plus the
121+
* resolved schema URI. Including the Subject prevents a Subject without read permission on
122+
* the schema resource from observing a boolean populated by a different (permitted)
123+
* Subject's earlier, permission-checked fetch -- a cache hit skips {@link
124+
* #isXsd11Schema(String, String, String)}'s {@code openStream()} entirely, so without this
125+
* the cache itself would bypass whatever permission check that open would otherwise perform.
126+
*/
127+
private record Xsd11DetectionCacheKey(String subjectName, String resolvedSchemaUri) {
128+
}
129+
130+
/**
131+
* Bounded (see {@link #XSD11_DETECTION_CACHE_MAX_ENTRIES}), LRU-evicted cache of
132+
* "does the schema at this resolved URI declare vc:minVersion 1.1?", so that validating many
133+
* documents against the same schema doesn't re-fetch and re-peek it every time. Cleared by
134+
* {@code validation:clear-grammar-cache()} (see {@link GrammarTooling}) alongside the Xerces
135+
* grammar pool, so operators have one function to clear every validation-related cache.
136+
*/
137+
private static final Cache<Xsd11DetectionCacheKey, Boolean> XSD11_DETECTION_CACHE = Caffeine.newBuilder()
138+
.maximumSize(XSD11_DETECTION_CACHE_MAX_ENTRIES)
139+
.build();
140+
112141
private static final String simpleFunctionTxt = """
113142
Validate document by parsing $instance. Optionally \
114143
grammar caching can be enabled. Supported grammars types \
@@ -445,12 +474,12 @@ private record ParseTarget(ContentHandler contenthandler, @Nullable MemTreeBuild
445474

446475
/**
447476
* Acquires a fresh, disposable {@link InputSource} for the instance and runs
448-
* {@link #detectXsd11ViaSchemaLocation(InputSource)} against it.
477+
* {@link #detectXsd11ViaSchemaLocation(String, InputSource)} against it.
449478
*/
450479
private boolean peekIsXsd11ViaSchemaLocation(final Sequence[] args) throws XPathException, IOException {
451480
final InputSource peekInstance = Shared.getInputSource(args[0].itemAt(0), context);
452481
try {
453-
return detectXsd11ViaSchemaLocation(peekInstance);
482+
return detectXsd11ViaSchemaLocation(context.getSubject().getName(), peekInstance);
454483
} finally {
455484
Shared.closeInputSource(peekInstance);
456485
}
@@ -531,9 +560,11 @@ private ParseTarget retryWithXsd11ValidatorIfNeeded(final Sequence[] args, final
531560
* retry-after-failure check in {@code eval()} remains the safety net for those cases
532561
* (catalog-mediated locations, an unresolvable hint, etc.).
533562
*
563+
* @param subjectName the requesting Subject's name, used to scope {@link #XSD11_DETECTION_CACHE}
564+
* (see there for why).
534565
* @param peekInstance a fresh, not-yet-consumed InputSource for the same instance document.
535566
*/
536-
private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstance) {
567+
private static boolean detectXsd11ViaSchemaLocation(final String subjectName, final InputSource peekInstance) {
537568
final Map<String, String> rootAttrs = peekRootAttributes(peekInstance);
538569
final String baseUri = peekInstance.getSystemId();
539570
if (rootAttrs == null || baseUri == null) {
@@ -555,7 +586,7 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
555586
}
556587

557588
for (final String location : candidateLocations) {
558-
if (isXsd11Schema(baseUri, location)) {
589+
if (isXsd11Schema(subjectName, baseUri, location)) {
559590
return true;
560591
}
561592
}
@@ -568,6 +599,8 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
568599
* and checks whether its root element declares {@code vc:minVersion} containing "1.1".
569600
* Returns {@code false} for any resolution/read failure -- this is a best-effort peek, not
570601
* a substitute for the real catalog-aware resolution the actual validation pass performs.
602+
* Package-private (not {@code private}) so {@code JaxpSchemaLocationSecurityTest}/
603+
* {@code JaxpXsd11DetectionCacheTest}, both in this package, can call it directly.
571604
*
572605
* <p>{@code location} is the literal, attacker/document-author-controlled value of the
573606
* instance's own {@code xsi:schemaLocation}/{@code noNamespaceSchemaLocation} hint -- if it's
@@ -593,8 +626,13 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
593626
* way to get a {@code file:} base URI here), which already requires the caller to have used
594627
* {@code util:} Java-interop functions to construct that object in the first place -- a
595628
* separate, pre-existing privilege boundary this peek doesn't change either way.</p>
629+
*
630+
* <p>{@code subjectName} scopes {@link #XSD11_DETECTION_CACHE}: a cache hit skips this
631+
* method's permission-checked {@code openStream()} entirely, so without scoping by Subject,
632+
* a Subject without read permission on the schema resource could observe a boolean populated
633+
* by a different (permitted) Subject's earlier fetch -- a cross-Subject information leak.</p>
596634
*/
597-
private static boolean isXsd11Schema(final String baseUri, final String location) {
635+
static boolean isXsd11Schema(final String subjectName, final String baseUri, final String location) {
598636
try {
599637
final URI baseUriNormalized = new URI(ResolverFactory.fixupExistCatalogUri(baseUri));
600638
final URI resolvedUri = baseUriNormalized.resolve(location);
@@ -606,7 +644,7 @@ private static boolean isXsd11Schema(final String baseUri, final String location
606644
return false;
607645
}
608646

609-
final String cacheKey = resolvedUri.toString();
647+
final Xsd11DetectionCacheKey cacheKey = new Xsd11DetectionCacheKey(subjectName, resolvedUri.toString());
610648
final Boolean cached = getCachedXsd11Detection(cacheKey);
611649
if (cached != null) {
612650
return cached;
@@ -636,44 +674,22 @@ private static boolean isXsd11Schema(final String baseUri, final String location
636674
}
637675
}
638676

639-
/**
640-
* Bounded (see {@link #XSD11_DETECTION_CACHE_MAX_ENTRIES}), LRU-evicted cache of
641-
* "does the schema at this resolved URI declare vc:minVersion 1.1?", so that validating many
642-
* documents against the same schema doesn't re-fetch and re-peek it every time. Cleared by
643-
* {@code validation:clear-grammar-cache()} (see {@link GrammarTooling}) alongside the Xerces
644-
* grammar pool, so operators have one function to clear every validation-related cache.
645-
*/
646-
private static final int XSD11_DETECTION_CACHE_MAX_ENTRIES = 256;
647-
648-
private static final Map<String, Boolean> XSD11_DETECTION_CACHE = new LinkedHashMap<>(16, 0.75f, true) {
649-
@Override
650-
protected boolean removeEldestEntry(final Map.Entry<String, Boolean> eldest) {
651-
return size() > XSD11_DETECTION_CACHE_MAX_ENTRIES;
652-
}
653-
};
654-
655677
@Nullable
656-
private static Boolean getCachedXsd11Detection(final String resolvedUri) {
657-
synchronized (XSD11_DETECTION_CACHE) {
658-
return XSD11_DETECTION_CACHE.get(resolvedUri);
659-
}
678+
private static Boolean getCachedXsd11Detection(final Xsd11DetectionCacheKey key) {
679+
return XSD11_DETECTION_CACHE.getIfPresent(key);
660680
}
661681

662-
private static void cacheXsd11Detection(final String resolvedUri, final boolean isXsd11) {
663-
synchronized (XSD11_DETECTION_CACHE) {
664-
XSD11_DETECTION_CACHE.put(resolvedUri, isXsd11);
665-
}
682+
private static void cacheXsd11Detection(final Xsd11DetectionCacheKey key, final boolean isXsd11) {
683+
XSD11_DETECTION_CACHE.put(key, isXsd11);
666684
}
667685

668686
/**
669-
* Discards all cached {@link #isXsd11Schema(String, String)} results. Package-private so
670-
* {@link GrammarTooling}'s {@code clear-grammar-cache()} can clear this alongside the Xerces
671-
* grammar pool.
687+
* Discards all cached {@link #isXsd11Schema(String, String, String)} results. Package-private
688+
* so {@link GrammarTooling}'s {@code clear-grammar-cache()} can clear this alongside the
689+
* Xerces grammar pool.
672690
*/
673691
static void clearXsd11DetectionCache() {
674-
synchronized (XSD11_DETECTION_CACHE) {
675-
XSD11_DETECTION_CACHE.clear();
676-
}
692+
XSD11_DETECTION_CACHE.invalidateAll();
677693
}
678694

679695
/**

exist-core/src/test/java/org/exist/xquery/functions/validation/JaxpSchemaLocationSecurityTest.java

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.junit.BeforeClass;
2626
import org.junit.Test;
2727

28-
import java.lang.reflect.Method;
2928
import java.nio.charset.StandardCharsets;
3029
import java.nio.file.Files;
3130
import java.nio.file.Path;
@@ -34,14 +33,14 @@
3433
import static org.junit.Assert.assertTrue;
3534

3635
/**
37-
* Targeted white-box test for {@code Jaxp.isXsd11Schema(String, String)}'s same-origin
36+
* Targeted white-box test for {@code Jaxp.isXsd11Schema(String, String, String)}'s same-origin
3837
* (scheme + authority) restriction -- the up-front XSD 1.1 detection peek must only ever follow
3938
* a {@code schemaLocation} hint that resolves to the exact same origin as the instance document's
4039
* own base URI, since the hint is document-author-controlled and the peek runs unconditionally,
4140
* before any catalog/permission check would otherwise govern the fetch.
4241
*
43-
* <p>Tests the method directly (via reflection, since it's {@code private static}) rather than
44-
* through the full {@code validation:jaxp()} pipeline: going through the pipeline would also
42+
* <p>Tests {@code Jaxp.isXsd11Schema} directly rather than through the full {@code
43+
* validation:jaxp()} pipeline: going through the pipeline would also
4544
* exercise the (separate, pre-existing, unrelated) default Xerces resolution that runs when the
4645
* peek declines and falls through -- which would independently attempt the same kind of fetch,
4746
* confounding any attempt to observe whether the peek itself made a network call.</p>
@@ -77,43 +76,37 @@ public static void teardown() throws Exception {
7776
}
7877

7978
@Test
80-
public void sameOriginRelativeXsd11SchemaIsDetected() throws Exception {
81-
assertTrue(invokeIsXsd11Schema(fileBaseUri, "schema11.xsd"));
79+
public void sameOriginRelativeXsd11SchemaIsDetected() {
80+
assertTrue(Jaxp.isXsd11Schema("test-subject", fileBaseUri, "schema11.xsd"));
8281
}
8382

8483
@Test
85-
public void sameOriginRelativeXsd10SchemaIsNotDetectedAsXsd11() throws Exception {
86-
assertFalse(invokeIsXsd11Schema(fileBaseUri, "schema10.xsd"));
84+
public void sameOriginRelativeXsd10SchemaIsNotDetectedAsXsd11() {
85+
assertFalse(Jaxp.isXsd11Schema("test-subject", fileBaseUri, "schema10.xsd"));
8786
}
8887

8988
@Test(timeout = 5000)
90-
public void crossOriginHttpLocationIsRefused() throws Exception {
89+
public void crossOriginHttpLocationIsRefused() {
9190
// A real instance would never have a `file://` base URI reachable from an unprivileged
9291
// caller (only Java-object-backed items do, which already requires elevated capability) --
9392
// this is just the most convenient same-scheme baseline to contrast against. The case that
9493
// matters is: whatever the base URI's origin, an absolute, different-origin location must
9594
// never be followed.
96-
assertFalse(invokeIsXsd11Schema(fileBaseUri, "http://203.0.113.1:1/evil.xsd"));
95+
assertFalse(Jaxp.isXsd11Schema("test-subject", fileBaseUri, "http://203.0.113.1:1/evil.xsd"));
9796
}
9897

9998
@Test(timeout = 5000)
100-
public void crossOriginHttpLocationIsRefusedForDatabaseBaseUri() throws Exception {
99+
public void crossOriginHttpLocationIsRefusedForDatabaseBaseUri() {
101100
// The realistic, unprivileged case: a document stored in the database (xmldb:// base URI)
102101
// with an absolute http:// schemaLocation hint pointing out to an attacker-controlled host.
103-
assertFalse(invokeIsXsd11Schema("xmldb://db/test/instance.xml", "http://203.0.113.1:1/evil.xsd"));
102+
assertFalse(Jaxp.isXsd11Schema("test-subject", "xmldb://db/test/instance.xml", "http://203.0.113.1:1/evil.xsd"));
104103
}
105104

106105
@Test(timeout = 5000)
107-
public void crossHostXmldbLocationIsRefused() throws Exception {
106+
public void crossHostXmldbLocationIsRefused() {
108107
// An absolute xmldb:// location naming a different host is not "same scheme" enough --
109108
// XmldbURL.isEmbedded() treats a non-empty host as a remote XML-RPC target
110109
// (EmbeddedURLConnection -> XmlrpcInputStream), so this must be refused too.
111-
assertFalse(invokeIsXsd11Schema("xmldb://db/test/instance.xml", "xmldb://203.0.113.1:1/db/evil.xsd"));
112-
}
113-
114-
private static boolean invokeIsXsd11Schema(final String baseUri, final String location) throws Exception {
115-
final Method method = Jaxp.class.getDeclaredMethod("isXsd11Schema", String.class, String.class);
116-
method.setAccessible(true);
117-
return (boolean) method.invoke(null, baseUri, location);
110+
assertFalse(Jaxp.isXsd11Schema("test-subject", "xmldb://db/test/instance.xml", "xmldb://203.0.113.1:1/db/evil.xsd"));
118111
}
119112
}

exist-core/src/test/java/org/exist/xquery/functions/validation/JaxpXsd11DetectionCacheTest.java

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.junit.After;
2525
import org.junit.Test;
2626

27-
import java.lang.reflect.Method;
2827
import java.nio.charset.StandardCharsets;
2928
import java.nio.file.Files;
3029
import java.nio.file.Path;
@@ -34,9 +33,11 @@
3433

3534
/**
3635
* Tests that {@code Jaxp.isXsd11Schema}'s result cache (a) actually caches -- a second call for
37-
* the same resolved schema URI doesn't re-read the (changed) underlying content -- and (b) is
38-
* fully cleared by {@code Jaxp.clearXsd11DetectionCache()}, the hook {@code
39-
* validation:clear-grammar-cache()} calls (see {@link GrammarTooling}).
36+
* the same Subject and resolved schema URI doesn't re-read the (changed) underlying content --
37+
* (b) is fully cleared by {@code Jaxp.clearXsd11DetectionCache()}, the hook {@code
38+
* validation:clear-grammar-cache()} calls (see {@link GrammarTooling}), and (c) is scoped per
39+
* Subject, so a different Subject querying the same resolved URI doesn't observe a cached answer
40+
* populated by someone else's fetch.
4041
*/
4142
public class JaxpXsd11DetectionCacheTest {
4243

@@ -64,30 +65,51 @@ public void cachesResultAcrossCallsAndClearCacheInvalidatesIt() throws Exception
6465
final String baseUri = instance.toUri().toString();
6566

6667
// First call: reads the real (XSD 1.1) content from disk.
67-
assertTrue(invokeIsXsd11Schema(baseUri, "schema.xsd"));
68+
assertTrue(Jaxp.isXsd11Schema("subject-a", baseUri, "schema.xsd"));
6869

6970
// Flip the on-disk content to XSD 1.0 without going through the cache -- if the second
7071
// call is actually served from cache, it must still report the stale (cached) "true",
7172
// not re-read this new content.
7273
Files.writeString(schema, XSD_1_0_SCHEMA, StandardCharsets.UTF_8);
7374
assertTrue("second call should be served from cache, not re-read the changed file",
74-
invokeIsXsd11Schema(baseUri, "schema.xsd"));
75+
Jaxp.isXsd11Schema("subject-a", baseUri, "schema.xsd"));
7576

7677
// Clearing the cache (what validation:clear-grammar-cache() does) must make the next
7778
// call re-read the file and observe the now-current (XSD 1.0) content.
7879
Jaxp.clearXsd11DetectionCache();
7980
assertFalse("after clearing the cache, the now-current XSD 1.0 content must be observed",
80-
invokeIsXsd11Schema(baseUri, "schema.xsd"));
81+
Jaxp.isXsd11Schema("subject-a", baseUri, "schema.xsd"));
8182
} finally {
8283
Files.deleteIfExists(tempDir.resolve("instance.xml"));
8384
Files.deleteIfExists(tempDir.resolve("schema.xsd"));
8485
Files.deleteIfExists(tempDir);
8586
}
8687
}
8788

88-
private static boolean invokeIsXsd11Schema(final String baseUri, final String location) throws Exception {
89-
final Method method = Jaxp.class.getDeclaredMethod("isXsd11Schema", String.class, String.class);
90-
method.setAccessible(true);
91-
return (boolean) method.invoke(null, baseUri, location);
89+
@Test
90+
public void cacheIsScopedPerSubject() throws Exception {
91+
final Path tempDir = Files.createTempDirectory("jaxp-xsd11-cache-subject-test");
92+
try {
93+
final Path instance = tempDir.resolve("instance.xml");
94+
Files.writeString(instance, "<root/>", StandardCharsets.UTF_8);
95+
final Path schema = tempDir.resolve("schema.xsd");
96+
Files.writeString(schema, XSD_1_1_SCHEMA, StandardCharsets.UTF_8);
97+
98+
final String baseUri = instance.toUri().toString();
99+
100+
// subject-a's call populates the cache for this resolved URI.
101+
assertTrue(Jaxp.isXsd11Schema("subject-a", baseUri, "schema.xsd"));
102+
103+
// Delete the underlying file so any call that actually has to read it (i.e. a cache
104+
// miss) fails/returns false -- if subject-b's call were wrongly served from
105+
// subject-a's cache entry, it would still report "true" despite never reading anything.
106+
Files.delete(schema);
107+
assertFalse("a different Subject must not observe a cache entry populated by another Subject's fetch",
108+
Jaxp.isXsd11Schema("subject-b", baseUri, "schema.xsd"));
109+
} finally {
110+
Files.deleteIfExists(tempDir.resolve("instance.xml"));
111+
Files.deleteIfExists(tempDir.resolve("schema.xsd"));
112+
Files.deleteIfExists(tempDir);
113+
}
92114
}
93115
}

0 commit comments

Comments
 (0)