2929import java .nio .file .Path ;
3030import java .util .ArrayList ;
3131import java .util .HashMap ;
32- import java .util .LinkedHashMap ;
3332import java .util .List ;
3433import java .util .Locale ;
3534import java .util .Map ;
3635import java .util .Objects ;
3736
37+ import com .github .benmanes .caffeine .cache .Cache ;
38+ import com .github .benmanes .caffeine .cache .Caffeine ;
39+
3840import javax .annotation .Nullable ;
3941import javax .xml .parsers .ParserConfigurationException ;
4042import javax .xml .parsers .SAXParser ;
@@ -109,6 +111,33 @@ public class Jaxp extends BasicFunction {
109111 private static final String XSI_NS = "http://www.w3.org/2001/XMLSchema-instance" ;
110112 private static final String XSD_VERSIONING_NS = "http://www.w3.org/2007/XMLSchema-versioning" ;
111113
114+ /**
115+ * Bound on the size of {@link #XSD11_DETECTION_CACHE} (see there for what it caches).
116+ */
117+ private static final int XSD11_DETECTION_CACHE_MAX_ENTRIES = 256 ;
118+
119+ /**
120+ * Cache key for {@link #XSD11_DETECTION_CACHE}: the requesting Subject's name plus the
121+ * resolved schema URI. Including the Subject prevents a Subject without read permission on
122+ * the schema resource from observing a boolean populated by a different (permitted)
123+ * Subject's earlier, permission-checked fetch -- a cache hit skips {@link
124+ * #isXsd11Schema(String, String, String)}'s {@code openStream()} entirely, so without this
125+ * the cache itself would bypass whatever permission check that open would otherwise perform.
126+ */
127+ private record Xsd11DetectionCacheKey (String subjectName , String resolvedSchemaUri ) {
128+ }
129+
130+ /**
131+ * Bounded (see {@link #XSD11_DETECTION_CACHE_MAX_ENTRIES}), LRU-evicted cache of
132+ * "does the schema at this resolved URI declare vc:minVersion 1.1?", so that validating many
133+ * documents against the same schema doesn't re-fetch and re-peek it every time. Cleared by
134+ * {@code validation:clear-grammar-cache()} (see {@link GrammarTooling}) alongside the Xerces
135+ * grammar pool, so operators have one function to clear every validation-related cache.
136+ */
137+ private static final Cache <Xsd11DetectionCacheKey , Boolean > XSD11_DETECTION_CACHE = Caffeine .newBuilder ()
138+ .maximumSize (XSD11_DETECTION_CACHE_MAX_ENTRIES )
139+ .build ();
140+
112141 private static final String simpleFunctionTxt = """
113142 Validate document by parsing $instance. Optionally \
114143 grammar caching can be enabled. Supported grammars types \
@@ -445,12 +474,12 @@ private record ParseTarget(ContentHandler contenthandler, @Nullable MemTreeBuild
445474
446475 /**
447476 * Acquires a fresh, disposable {@link InputSource} for the instance and runs
448- * {@link #detectXsd11ViaSchemaLocation(InputSource)} against it.
477+ * {@link #detectXsd11ViaSchemaLocation(String, InputSource)} against it.
449478 */
450479 private boolean peekIsXsd11ViaSchemaLocation (final Sequence [] args ) throws XPathException , IOException {
451480 final InputSource peekInstance = Shared .getInputSource (args [0 ].itemAt (0 ), context );
452481 try {
453- return detectXsd11ViaSchemaLocation (peekInstance );
482+ return detectXsd11ViaSchemaLocation (context . getSubject (). getName (), peekInstance );
454483 } finally {
455484 Shared .closeInputSource (peekInstance );
456485 }
@@ -531,9 +560,11 @@ private ParseTarget retryWithXsd11ValidatorIfNeeded(final Sequence[] args, final
531560 * retry-after-failure check in {@code eval()} remains the safety net for those cases
532561 * (catalog-mediated locations, an unresolvable hint, etc.).
533562 *
563+ * @param subjectName the requesting Subject's name, used to scope {@link #XSD11_DETECTION_CACHE}
564+ * (see there for why).
534565 * @param peekInstance a fresh, not-yet-consumed InputSource for the same instance document.
535566 */
536- private static boolean detectXsd11ViaSchemaLocation (final InputSource peekInstance ) {
567+ private static boolean detectXsd11ViaSchemaLocation (final String subjectName , final InputSource peekInstance ) {
537568 final Map <String , String > rootAttrs = peekRootAttributes (peekInstance );
538569 final String baseUri = peekInstance .getSystemId ();
539570 if (rootAttrs == null || baseUri == null ) {
@@ -555,7 +586,7 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
555586 }
556587
557588 for (final String location : candidateLocations ) {
558- if (isXsd11Schema (baseUri , location )) {
589+ if (isXsd11Schema (subjectName , baseUri , location )) {
559590 return true ;
560591 }
561592 }
@@ -568,6 +599,8 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
568599 * and checks whether its root element declares {@code vc:minVersion} containing "1.1".
569600 * Returns {@code false} for any resolution/read failure -- this is a best-effort peek, not
570601 * a substitute for the real catalog-aware resolution the actual validation pass performs.
602+ * Package-private (not {@code private}) so {@code JaxpSchemaLocationSecurityTest}/
603+ * {@code JaxpXsd11DetectionCacheTest}, both in this package, can call it directly.
571604 *
572605 * <p>{@code location} is the literal, attacker/document-author-controlled value of the
573606 * instance's own {@code xsi:schemaLocation}/{@code noNamespaceSchemaLocation} hint -- if it's
@@ -593,8 +626,13 @@ private static boolean detectXsd11ViaSchemaLocation(final InputSource peekInstan
593626 * way to get a {@code file:} base URI here), which already requires the caller to have used
594627 * {@code util:} Java-interop functions to construct that object in the first place -- a
595628 * separate, pre-existing privilege boundary this peek doesn't change either way.</p>
629+ *
630+ * <p>{@code subjectName} scopes {@link #XSD11_DETECTION_CACHE}: a cache hit skips this
631+ * method's permission-checked {@code openStream()} entirely, so without scoping by Subject,
632+ * a Subject without read permission on the schema resource could observe a boolean populated
633+ * by a different (permitted) Subject's earlier fetch -- a cross-Subject information leak.</p>
596634 */
597- private static boolean isXsd11Schema (final String baseUri , final String location ) {
635+ static boolean isXsd11Schema (final String subjectName , final String baseUri , final String location ) {
598636 try {
599637 final URI baseUriNormalized = new URI (ResolverFactory .fixupExistCatalogUri (baseUri ));
600638 final URI resolvedUri = baseUriNormalized .resolve (location );
@@ -606,7 +644,7 @@ private static boolean isXsd11Schema(final String baseUri, final String location
606644 return false ;
607645 }
608646
609- final String cacheKey = resolvedUri .toString ();
647+ final Xsd11DetectionCacheKey cacheKey = new Xsd11DetectionCacheKey ( subjectName , resolvedUri .toString () );
610648 final Boolean cached = getCachedXsd11Detection (cacheKey );
611649 if (cached != null ) {
612650 return cached ;
@@ -636,44 +674,22 @@ private static boolean isXsd11Schema(final String baseUri, final String location
636674 }
637675 }
638676
639- /**
640- * Bounded (see {@link #XSD11_DETECTION_CACHE_MAX_ENTRIES}), LRU-evicted cache of
641- * "does the schema at this resolved URI declare vc:minVersion 1.1?", so that validating many
642- * documents against the same schema doesn't re-fetch and re-peek it every time. Cleared by
643- * {@code validation:clear-grammar-cache()} (see {@link GrammarTooling}) alongside the Xerces
644- * grammar pool, so operators have one function to clear every validation-related cache.
645- */
646- private static final int XSD11_DETECTION_CACHE_MAX_ENTRIES = 256 ;
647-
648- private static final Map <String , Boolean > XSD11_DETECTION_CACHE = new LinkedHashMap <>(16 , 0.75f , true ) {
649- @ Override
650- protected boolean removeEldestEntry (final Map .Entry <String , Boolean > eldest ) {
651- return size () > XSD11_DETECTION_CACHE_MAX_ENTRIES ;
652- }
653- };
654-
655677 @ Nullable
656- private static Boolean getCachedXsd11Detection (final String resolvedUri ) {
657- synchronized (XSD11_DETECTION_CACHE ) {
658- return XSD11_DETECTION_CACHE .get (resolvedUri );
659- }
678+ private static Boolean getCachedXsd11Detection (final Xsd11DetectionCacheKey key ) {
679+ return XSD11_DETECTION_CACHE .getIfPresent (key );
660680 }
661681
662- private static void cacheXsd11Detection (final String resolvedUri , final boolean isXsd11 ) {
663- synchronized (XSD11_DETECTION_CACHE ) {
664- XSD11_DETECTION_CACHE .put (resolvedUri , isXsd11 );
665- }
682+ private static void cacheXsd11Detection (final Xsd11DetectionCacheKey key , final boolean isXsd11 ) {
683+ XSD11_DETECTION_CACHE .put (key , isXsd11 );
666684 }
667685
668686 /**
669- * Discards all cached {@link #isXsd11Schema(String, String)} results. Package-private so
670- * {@link GrammarTooling}'s {@code clear-grammar-cache()} can clear this alongside the Xerces
671- * grammar pool.
687+ * Discards all cached {@link #isXsd11Schema(String, String, String )} results. Package-private
688+ * so {@link GrammarTooling}'s {@code clear-grammar-cache()} can clear this alongside the
689+ * Xerces grammar pool.
672690 */
673691 static void clearXsd11DetectionCache () {
674- synchronized (XSD11_DETECTION_CACHE ) {
675- XSD11_DETECTION_CACHE .clear ();
676- }
692+ XSD11_DETECTION_CACHE .invalidateAll ();
677693 }
678694
679695 /**
0 commit comments