2929import java .nio .file .Path ;
3030import java .util .ArrayList ;
3131import java .util .HashMap ;
32- import java .util .LinkedHashMap ;
3332import java .util .List ;
3433import java .util .Locale ;
3534import java .util .Map ;
3635import java .util .Objects ;
3736
37+ import com .github .benmanes .caffeine .cache .Cache ;
38+ import com .github .benmanes .caffeine .cache .Caffeine ;
39+
3840import javax .annotation .Nullable ;
3941import javax .xml .parsers .ParserConfigurationException ;
4042import javax .xml .parsers .SAXParser ;
@@ -109,6 +111,33 @@ public class Jaxp extends BasicFunction {
109111 private static final String XSI_NS = "http://www.w3.org/2001/XMLSchema-instance" ;
110112 private static final String XSD_VERSIONING_NS = "http://www.w3.org/2007/XMLSchema-versioning" ;
111113
114+ /**
115+ * Bound on the size of {@link #XSD11_DETECTION_CACHE} (see there for what it caches).
116+ */
117+ private static final int XSD11_DETECTION_CACHE_MAX_ENTRIES = 256 ;
118+
119+ /**
120+ * Cache key for {@link #XSD11_DETECTION_CACHE}: the requesting Subject's name plus the
121+ * resolved schema URI. Including the Subject prevents a Subject without read permission on
122+ * the schema resource from observing a boolean populated by a different (permitted)
123+ * Subject's earlier, permission-checked fetch -- a cache hit skips {@link
124+ * #isXsd11Schema(String, String, String)}'s {@code openStream()} entirely, so without this
125+ * the cache itself would bypass whatever permission check that open would otherwise perform.
126+ */
127+ private record Xsd11DetectionCacheKey (String subjectName , String resolvedSchemaUri ) {
128+ }
129+
130+ /**
131+ * Bounded (see {@link #XSD11_DETECTION_CACHE_MAX_ENTRIES}), LRU-evicted cache of
132+ * "does the schema at this resolved URI declare vc:minVersion 1.1?", so that validating many
133+ * documents against the same schema doesn't re-fetch and re-peek it every time. Cleared by
134+ * {@code validation:clear-grammar-cache()} (see {@link GrammarTooling}) alongside the Xerces
135+ * grammar pool, so operators have one function to clear every validation-related cache.
136+ */
137+ private static final Cache <Xsd11DetectionCacheKey , Boolean > XSD11_DETECTION_CACHE = Caffeine .newBuilder ()
138+ .maximumSize (XSD11_DETECTION_CACHE_MAX_ENTRIES )
139+ .build ();
140+
112141 private static final String simpleFunctionTxt = """
113142 Validate document by parsing $instance. Optionally \
114143 grammar caching can be enabled. Supported grammars types \
@@ -443,12 +472,12 @@ private record ParseTarget(ContentHandler contenthandler, @Nullable MemTreeBuild
443472
444473 /**
445474 * Acquires a fresh, disposable {@link InputSource} for the instance and runs
446- * {@link #detectXsd11ViaSchemaLocation(InputSource)} against it.
475+ * {@link #detectXsd11ViaSchemaLocation(String, InputSource)} against it.
447476 */
448477 private boolean peekIsXsd11ViaSchemaLocation (final Sequence [] args ) throws XPathException , IOException {
449478 final InputSource peekInstance = Shared .getInputSource (args [0 ].itemAt (0 ), context );
450479 try {
451- return detectXsd11ViaSchemaLocation (peekInstance );
480+ return detectXsd11ViaSchemaLocation (context . getSubject (). getName (), peekInstance );
452481 } finally {
453482 Shared .closeInputSource (peekInstance );
454483 }
@@ -538,9 +567,11 @@ private ParseTarget retryWithXsd11ValidatorIfNeeded(final Sequence[] args, final
538567 * retry-after-failure check in {@code eval()} remains the safety net for those cases
539568 * (catalog-mediated locations, an unresolvable hint, etc.).
540569 *
570+ * @param subjectName the requesting Subject's name, used to scope {@link #XSD11_DETECTION_CACHE}
571+ * (see there for why).
541572 * @param peekInstance a fresh, not-yet-consumed InputSource for the same instance document.
542573 */
543- private static boolean detectXsd11ViaSchemaLocation (final InputSource peekInstance ) {
574+ private static boolean detectXsd11ViaSchemaLocation (final String subjectName , final InputSource peekInstance ) {
544575 final Map <String , String > rootAttrs = peekRootAttributes (peekInstance );
545576 final String baseUri = peekInstance .getSystemId ();
546577 if (rootAttrs == null || baseUri == null ) {
@@ -562,7 +593,7 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
562593 }
563594
564595 for (final String location : candidateLocations ) {
565- if (isXsd11Schema (baseUri , location )) {
596+ if (isXsd11Schema (subjectName , baseUri , location )) {
566597 return true ;
567598 }
568599 }
@@ -575,6 +606,8 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
575606 * and checks whether its root element declares {@code vc:minVersion} containing "1.1".
576607 * Returns {@code false} for any resolution/read failure -- this is a best-effort peek, not
577608 * a substitute for the real catalog-aware resolution the actual validation pass performs.
609+ * Package-private (not {@code private}) so {@code JaxpSchemaLocationSecurityTest}/
610+ * {@code JaxpXsd11DetectionCacheTest}, both in this package, can call it directly.
578611 *
579612 * <p>{@code location} is the literal, attacker/document-author-controlled value of the
580613 * instance's own {@code xsi:schemaLocation}/{@code noNamespaceSchemaLocation} hint -- if it's
@@ -600,8 +633,13 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
600633 * way to get a {@code file:} base URI here), which already requires the caller to have used
601634 * {@code util:} Java-interop functions to construct that object in the first place -- a
602635 * separate, pre-existing privilege boundary this peek doesn't change either way.</p>
636+ *
637+ * <p>{@code subjectName} scopes {@link #XSD11_DETECTION_CACHE}: a cache hit skips this
638+ * method's permission-checked {@code openStream()} entirely, so without scoping by Subject,
639+ * a Subject without read permission on the schema resource could observe a boolean populated
640+ * by a different (permitted) Subject's earlier fetch -- a cross-Subject information leak.</p>
603641 */
604- private static boolean isXsd11Schema (final String baseUri , final String location ) {
642+ static boolean isXsd11Schema (final String subjectName , final String baseUri , final String location ) {
605643 try {
606644 final URI baseUriNormalized = new URI (ResolverFactory .fixupExistCatalogUri (baseUri ));
607645 final URI resolvedUri = baseUriNormalized .resolve (location );
@@ -613,7 +651,7 @@ private static boolean isXsd11Schema(final String baseUri, final String location
613651 return false ;
614652 }
615653
616- final String cacheKey = resolvedUri .toString ();
654+ final Xsd11DetectionCacheKey cacheKey = new Xsd11DetectionCacheKey ( subjectName , resolvedUri .toString () );
617655 final Boolean cached = getCachedXsd11Detection (cacheKey );
618656 if (cached != null ) {
619657 return cached ;
@@ -643,44 +681,22 @@ private static boolean isXsd11Schema(final String baseUri, final String location
643681 }
644682 }
645683
646- /**
647- * Bounded (see {@link #XSD11_DETECTION_CACHE_MAX_ENTRIES}), LRU-evicted cache of
648- * "does the schema at this resolved URI declare vc:minVersion 1.1?", so that validating many
649- * documents against the same schema doesn't re-fetch and re-peek it every time. Cleared by
650- * {@code validation:clear-grammar-cache()} (see {@link GrammarTooling}) alongside the Xerces
651- * grammar pool, so operators have one function to clear every validation-related cache.
652- */
653- private static final int XSD11_DETECTION_CACHE_MAX_ENTRIES = 256 ;
654-
655- private static final Map <String , Boolean > XSD11_DETECTION_CACHE = new LinkedHashMap <>(16 , 0.75f , true ) {
656- @ Override
657- protected boolean removeEldestEntry (final Map .Entry <String , Boolean > eldest ) {
658- return size () > XSD11_DETECTION_CACHE_MAX_ENTRIES ;
659- }
660- };
661-
662684 @ Nullable
663- private static Boolean getCachedXsd11Detection (final String resolvedUri ) {
664- synchronized (XSD11_DETECTION_CACHE ) {
665- return XSD11_DETECTION_CACHE .get (resolvedUri );
666- }
685+ private static Boolean getCachedXsd11Detection (final Xsd11DetectionCacheKey key ) {
686+ return XSD11_DETECTION_CACHE .getIfPresent (key );
667687 }
668688
669- private static void cacheXsd11Detection (final String resolvedUri , final boolean isXsd11 ) {
670- synchronized (XSD11_DETECTION_CACHE ) {
671- XSD11_DETECTION_CACHE .put (resolvedUri , isXsd11 );
672- }
689+ private static void cacheXsd11Detection (final Xsd11DetectionCacheKey key , final boolean isXsd11 ) {
690+ XSD11_DETECTION_CACHE .put (key , isXsd11 );
673691 }
674692
675693 /**
676- * Discards all cached {@link #isXsd11Schema(String, String)} results. Package-private so
677- * {@link GrammarTooling}'s {@code clear-grammar-cache()} can clear this alongside the Xerces
678- * grammar pool.
694+ * Discards all cached {@link #isXsd11Schema(String, String, String )} results. Package-private
695+ * so {@link GrammarTooling}'s {@code clear-grammar-cache()} can clear this alongside the
696+ * Xerces grammar pool.
679697 */
680698 static void clearXsd11DetectionCache () {
681- synchronized (XSD11_DETECTION_CACHE ) {
682- XSD11_DETECTION_CACHE .clear ();
683- }
699+ XSD11_DETECTION_CACHE .invalidateAll ();
684700 }
685701
686702 /**
0 commit comments