3131import java .util .Optional ;
3232import java .util .Properties ;
3333
34+ import javax .annotation .Nullable ;
3435import javax .xml .parsers .ParserConfigurationException ;
3536import javax .xml .parsers .SAXParser ;
3637import javax .xml .parsers .SAXParserFactory ;
4041import javax .xml .transform .TransformerConfigurationException ;
4142import javax .xml .transform .TransformerException ;
4243import javax .xml .transform .TransformerFactory ;
44+ import javax .xml .transform .sax .SAXResult ;
45+ import javax .xml .transform .sax .SAXSource ;
4346import javax .xml .transform .stream .StreamResult ;
4447import javax .xml .transform .stream .StreamSource ;
48+ import javax .xml .validation .Schema ;
49+ import javax .xml .validation .SchemaFactory ;
50+ import javax .xml .validation .Validator ;
4551
4652import com .evolvedbinary .j8fu .tuple .Tuple2 ;
4753
7783import org .exist .xquery .value .BooleanValue ;
7884import org .exist .xquery .value .FunctionParameterSequenceType ;
7985import org .exist .xquery .value .FunctionReturnSequenceType ;
86+ import org .exist .xquery .value .Item ;
8087import org .exist .xquery .value .Sequence ;
8188import org .exist .xquery .value .SequenceType ;
8289import org .exist .xquery .value .Type ;
8390import org .exist .xquery .value .ValueSequence ;
8491
92+ import org .xml .sax .Attributes ;
8593import org .xml .sax .ContentHandler ;
8694import org .xml .sax .InputSource ;
8795import org .xml .sax .SAXException ;
8896import org .xml .sax .SAXNotRecognizedException ;
8997import org .xml .sax .SAXNotSupportedException ;
9098import org .xml .sax .XMLReader ;
99+ import org .xml .sax .helpers .DefaultHandler ;
91100import org .xmlresolver .Resolver ;
92101
93102import static com .evolvedbinary .j8fu .tuple .Tuple .Tuple ;
@@ -207,6 +216,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
207216 }
208217
209218 InputSource instance = null ;
219+ Resolver catalogResolver = null ;
210220 try {
211221 report .start ();
212222
@@ -229,6 +239,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
229239 LOG .debug ("Using system catalog." );
230240 final Configuration config = brokerPool .getConfiguration ();
231241 final Resolver resolver = (Resolver ) config .getProperty (XMLReaderObjectFactory .CATALOG_RESOLVER );
242+ catalogResolver = resolver ;
232243 XercesXmlResolverAdapter .setXmlReaderEntityResolver (xmlReader , resolver );
233244
234245 } else {
@@ -270,6 +281,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
270281 catalogs .add (Tuple (catalogUrl , maybeInputSource ));
271282 }
272283 final Resolver resolver = ResolverFactory .newResolver (catalogs );
284+ catalogResolver = resolver ;
273285 XercesXmlResolverAdapter .setXmlReaderEntityResolver (xmlReader , resolver );
274286
275287 } else {
@@ -292,6 +304,29 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
292304 xmlReader .parse (instance );
293305 LOG .debug ("Stopped parsing document" );
294306
307+ /* The bundled Xerces XSD 1.1 support is only wired into the JAXP
308+ SchemaFactory/Validator API, not into this dynamic-discovery
309+ SAXParser pipeline (see plans/catalog-dtd.plan.md). When that
310+ shows up as "no declaration found" for the root element and the
311+ instance actually references a schema, retry once with the
312+ XSD-1.1-capable Validator before giving up. DTD-only documents
313+ never produce this cvc-* signature, so they never retry. */
314+ if (!report .isValid () && isMissingElementDeclaration (report )
315+ && hasSchemaLocationHint (args [0 ].itemAt (0 ))) {
316+ LOG .debug ("Retrying validation with XSD 1.1 validator after cvc-elt.1.a" );
317+ report .clear ();
318+
319+ final Validator validator = newXsd11Validator (catalogResolver );
320+ validator .setErrorHandler (report );
321+
322+ final InputSource retryInstance = Shared .getInputSource (args [0 ].itemAt (0 ), context );
323+ try {
324+ validator .validate (new SAXSource (retryInstance ), new SAXResult (contenthandler ));
325+ } finally {
326+ Shared .closeInputSource (retryInstance );
327+ }
328+ }
329+
295330 // Distill namespace from document
296331 if (contenthandler instanceof ValidationContentHandler handler ) {
297332 report .setNamespaceUri (
@@ -379,13 +414,92 @@ private void setXmlReaderFeature(XMLReader xmlReader, String featureName, boolea
379414
380415 try {
381416 xmlReader .setFeature (featureName , value );
382-
417+
383418 } catch (final SAXNotRecognizedException | SAXNotSupportedException ex ) {
384419 LOG .error (ex .getMessage ());
385420
386421 }
387422 }
388423
424+ private static final String XSD_1_1_NS = "http://www.w3.org/XML/XMLSchema/v1.1" ;
425+ private static final String XSI_NS = "http://www.w3.org/2001/XMLSchema-instance" ;
426+
427+ /**
428+ * @return true if any reported error is the "no global declaration for
429+ * the root element" signature ({@code cvc-elt.1.a}) produced when this
430+ * Xerces fork's dynamic-discovery pipeline meets an XSD 1.1-only schema.
431+ */
432+ private static boolean isMissingElementDeclaration (final ValidationReport report ) {
433+ return report .getValidationReportItemList ().stream ()
434+ .anyMatch (item -> item .getMessage () != null && item .getMessage ().startsWith ("cvc-elt.1.a:" ));
435+ }
436+
437+ /**
438+ * Cheaply peeks at the root element's attributes to check whether the
439+ * instance references a schema via {@code xsi:schemaLocation} /
440+ * {@code xsi:noNamespaceSchemaLocation}, without validating it. Used to
441+ * decide whether the XSD 1.1 fallback retry could possibly help (a
442+ * DTD-only document never produces the cvc-elt.1.a signature in the
443+ * first place, but this guards against retrying on documents that
444+ * reference no schema at all).
445+ */
446+ private boolean hasSchemaLocationHint (final Item item ) throws XPathException , IOException {
447+ final InputSource probe = Shared .getInputSource (item , context );
448+ try {
449+ final SAXParserFactory factory = SAXParserFactory .newInstance ();
450+ factory .setNamespaceAware (true );
451+ final XMLReader reader = factory .newSAXParser ().getXMLReader ();
452+
453+ final boolean [] found = {false };
454+ reader .setContentHandler (new DefaultHandler () {
455+ @ Override
456+ public void startElement (final String uri , final String localName , final String qName , final Attributes attributes ) throws SAXException {
457+ found [0 ] = attributes .getValue (XSI_NS , "schemaLocation" ) != null
458+ || attributes .getValue (XSI_NS , "noNamespaceSchemaLocation" ) != null ;
459+ throw StopAfterRootElement .INSTANCE ;
460+ }
461+ });
462+
463+ try {
464+ reader .parse (probe );
465+ } catch (final StopAfterRootElement stop ) {
466+ // expected: we only need the root element's attributes
467+ }
468+ return found [0 ];
469+
470+ } catch (final ParserConfigurationException | SAXException ex ) {
471+ throw new IOException (ex .getMessage (), ex );
472+ } finally {
473+ Shared .closeInputSource (probe );
474+ }
475+ }
476+
477+ /**
478+ * Sentinel used to abort {@link #hasSchemaLocationHint(Item)}'s probe
479+ * parse immediately after the root element's attributes are seen.
480+ */
481+ private static final class StopAfterRootElement extends SAXException {
482+ private static final StopAfterRootElement INSTANCE = new StopAfterRootElement ();
483+ }
484+
485+ /**
486+ * @param resolver catalog resolver to use for schema/entity resolution, or null.
487+ * @return a {@link Validator} for the only XSD 1.1-capable pipeline this
488+ * Xerces fork supports: {@link SchemaFactory}/{@link Schema} with no
489+ * pre-supplied schema documents, so it dynamically discovers the schema
490+ * from the instance's own schemaLocation hint, mirroring how the default
491+ * SAXParser pipeline behaves for XSD 1.0.
492+ */
493+ private Validator newXsd11Validator (@ Nullable final Resolver resolver ) throws SAXException {
494+ final SchemaFactory schemaFactory = SchemaFactory .newInstance (XSD_1_1_NS );
495+ final Schema schema = schemaFactory .newSchema ();
496+ final Validator validator = schema .newValidator ();
497+ if (resolver != null ) {
498+ validator .setResourceResolver (resolver );
499+ }
500+ return validator ;
501+ }
502+
389503 // TODO(AR) remove this when PR https://github.com/xmlresolver/xmlresolver/pull/98 is merged
390504 private String serializeDocument (final XmldbURI documentUri ) throws SAXException , IOException {
391505 try (final LockedDocument lockedDocument = context .getBroker ().getXMLResource (documentUri , Lock .LockMode .READ_LOCK )) {
0 commit comments