Skip to content

Commit 8413f89

Browse files
committed
[bugfix] Support XSD 1.1 in validation:jaxp()'s dynamic discovery
The bundled Xerces XSD 1.1 fork only wires 1.1 support into the JAXP SchemaFactory/Validator API, not into the validating-SAXParser pipeline `validation:jaxp()` uses for dynamic, schemaLocation-hint-driven schema discovery. Any XSD declaring `vc:minVersion="1.1"` (e.g. xs:assert) failed with `"cvc-elt.1.a: Cannot find the declaration of element"` even though the same schema validates fine via validation:jaxv() with the language explicitly set to v1.1. Add a narrow fallback: when the default pipeline reports `cvc-elt.1.a` and the instance references a schema via `xsi:schemaLocation` or `xsi:noNamespaceSchemaLocation`, retry once with a `SchemaFactory/Validator` built for v1.1. DTD-only documents. The default pipeline (and its DTD/grammar-pool behavior) is untouched for them. surfaced during work on see eXist-db#6002
1 parent 96883fc commit 8413f89

3 files changed

Lines changed: 222 additions & 1 deletion

File tree

exist-core/src/main/java/org/exist/validation/ValidationReport.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,16 @@ public List<ValidationReportItem> getValidationReportItemList() {
135135
return validationReport;
136136
}
137137

138+
/**
139+
* Discard previously recorded errors/warnings so the report can be
140+
* reused for a second validation pass (e.g. retrying with a different
141+
* validator), while keeping start/duration/namespace tracking intact.
142+
*/
143+
public void clear() {
144+
validationReport.clear();
145+
lastItem = null;
146+
}
147+
138148
public List<String> getTextValidationReport() {
139149

140150
final List<String> textReport = new ArrayList<>();

exist-core/src/main/java/org/exist/xquery/functions/validation/Jaxp.java

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import java.util.Optional;
3232
import java.util.Properties;
3333

34+
import javax.annotation.Nullable;
3435
import javax.xml.parsers.ParserConfigurationException;
3536
import javax.xml.parsers.SAXParser;
3637
import javax.xml.parsers.SAXParserFactory;
@@ -40,8 +41,13 @@
4041
import javax.xml.transform.TransformerConfigurationException;
4142
import javax.xml.transform.TransformerException;
4243
import javax.xml.transform.TransformerFactory;
44+
import javax.xml.transform.sax.SAXResult;
45+
import javax.xml.transform.sax.SAXSource;
4346
import javax.xml.transform.stream.StreamResult;
4447
import javax.xml.transform.stream.StreamSource;
48+
import javax.xml.validation.Schema;
49+
import javax.xml.validation.SchemaFactory;
50+
import javax.xml.validation.Validator;
4551

4652
import com.evolvedbinary.j8fu.tuple.Tuple2;
4753

@@ -77,17 +83,20 @@
7783
import org.exist.xquery.value.BooleanValue;
7884
import org.exist.xquery.value.FunctionParameterSequenceType;
7985
import org.exist.xquery.value.FunctionReturnSequenceType;
86+
import org.exist.xquery.value.Item;
8087
import org.exist.xquery.value.Sequence;
8188
import org.exist.xquery.value.SequenceType;
8289
import org.exist.xquery.value.Type;
8390
import org.exist.xquery.value.ValueSequence;
8491

92+
import org.xml.sax.Attributes;
8593
import org.xml.sax.ContentHandler;
8694
import org.xml.sax.InputSource;
8795
import org.xml.sax.SAXException;
8896
import org.xml.sax.SAXNotRecognizedException;
8997
import org.xml.sax.SAXNotSupportedException;
9098
import org.xml.sax.XMLReader;
99+
import org.xml.sax.helpers.DefaultHandler;
91100
import org.xmlresolver.Resolver;
92101

93102
import static com.evolvedbinary.j8fu.tuple.Tuple.Tuple;
@@ -207,6 +216,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
207216
}
208217

209218
InputSource instance = null;
219+
Resolver catalogResolver = null;
210220
try {
211221
report.start();
212222

@@ -229,6 +239,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
229239
LOG.debug("Using system catalog.");
230240
final Configuration config = brokerPool.getConfiguration();
231241
final Resolver resolver = (Resolver) config.getProperty(XMLReaderObjectFactory.CATALOG_RESOLVER);
242+
catalogResolver = resolver;
232243
XercesXmlResolverAdapter.setXmlReaderEntityResolver(xmlReader, resolver);
233244

234245
} else {
@@ -270,6 +281,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
270281
catalogs.add(Tuple(catalogUrl, maybeInputSource));
271282
}
272283
final Resolver resolver = ResolverFactory.newResolver(catalogs);
284+
catalogResolver = resolver;
273285
XercesXmlResolverAdapter.setXmlReaderEntityResolver(xmlReader, resolver);
274286

275287
} else {
@@ -292,6 +304,29 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
292304
xmlReader.parse(instance);
293305
LOG.debug("Stopped parsing document");
294306

307+
/* The bundled Xerces XSD 1.1 support is only wired into the JAXP
308+
SchemaFactory/Validator API, not into this dynamic-discovery
309+
SAXParser pipeline (see plans/catalog-dtd.plan.md). When that
310+
shows up as "no declaration found" for the root element and the
311+
instance actually references a schema, retry once with the
312+
XSD-1.1-capable Validator before giving up. DTD-only documents
313+
never produce this cvc-* signature, so they never retry. */
314+
if (!report.isValid() && isMissingElementDeclaration(report)
315+
&& hasSchemaLocationHint(args[0].itemAt(0))) {
316+
LOG.debug("Retrying validation with XSD 1.1 validator after cvc-elt.1.a");
317+
report.clear();
318+
319+
final Validator validator = newXsd11Validator(catalogResolver);
320+
validator.setErrorHandler(report);
321+
322+
final InputSource retryInstance = Shared.getInputSource(args[0].itemAt(0), context);
323+
try {
324+
validator.validate(new SAXSource(retryInstance), new SAXResult(contenthandler));
325+
} finally {
326+
Shared.closeInputSource(retryInstance);
327+
}
328+
}
329+
295330
// Distill namespace from document
296331
if (contenthandler instanceof ValidationContentHandler handler) {
297332
report.setNamespaceUri(
@@ -379,13 +414,92 @@ private void setXmlReaderFeature(XMLReader xmlReader, String featureName, boolea
379414

380415
try {
381416
xmlReader.setFeature(featureName, value);
382-
417+
383418
} catch (final SAXNotRecognizedException | SAXNotSupportedException ex) {
384419
LOG.error(ex.getMessage());
385420

386421
}
387422
}
388423

424+
private static final String XSD_1_1_NS = "http://www.w3.org/XML/XMLSchema/v1.1";
425+
private static final String XSI_NS = "http://www.w3.org/2001/XMLSchema-instance";
426+
427+
/**
428+
* @return true if any reported error is the "no global declaration for
429+
* the root element" signature ({@code cvc-elt.1.a}) produced when this
430+
* Xerces fork's dynamic-discovery pipeline meets an XSD 1.1-only schema.
431+
*/
432+
private static boolean isMissingElementDeclaration(final ValidationReport report) {
433+
return report.getValidationReportItemList().stream()
434+
.anyMatch(item -> item.getMessage() != null && item.getMessage().startsWith("cvc-elt.1.a:"));
435+
}
436+
437+
/**
438+
* Cheaply peeks at the root element's attributes to check whether the
439+
* instance references a schema via {@code xsi:schemaLocation} /
440+
* {@code xsi:noNamespaceSchemaLocation}, without validating it. Used to
441+
* decide whether the XSD 1.1 fallback retry could possibly help (a
442+
* DTD-only document never produces the cvc-elt.1.a signature in the
443+
* first place, but this guards against retrying on documents that
444+
* reference no schema at all).
445+
*/
446+
private boolean hasSchemaLocationHint(final Item item) throws XPathException, IOException {
447+
final InputSource probe = Shared.getInputSource(item, context);
448+
try {
449+
final SAXParserFactory factory = SAXParserFactory.newInstance();
450+
factory.setNamespaceAware(true);
451+
final XMLReader reader = factory.newSAXParser().getXMLReader();
452+
453+
final boolean[] found = {false};
454+
reader.setContentHandler(new DefaultHandler() {
455+
@Override
456+
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException {
457+
found[0] = attributes.getValue(XSI_NS, "schemaLocation") != null
458+
|| attributes.getValue(XSI_NS, "noNamespaceSchemaLocation") != null;
459+
throw StopAfterRootElement.INSTANCE;
460+
}
461+
});
462+
463+
try {
464+
reader.parse(probe);
465+
} catch (final StopAfterRootElement stop) {
466+
// expected: we only need the root element's attributes
467+
}
468+
return found[0];
469+
470+
} catch (final ParserConfigurationException | SAXException ex) {
471+
throw new IOException(ex.getMessage(), ex);
472+
} finally {
473+
Shared.closeInputSource(probe);
474+
}
475+
}
476+
477+
/**
478+
* Sentinel used to abort {@link #hasSchemaLocationHint(Item)}'s probe
479+
* parse immediately after the root element's attributes are seen.
480+
*/
481+
private static final class StopAfterRootElement extends SAXException {
482+
private static final StopAfterRootElement INSTANCE = new StopAfterRootElement();
483+
}
484+
485+
/**
486+
* @param resolver catalog resolver to use for schema/entity resolution, or null.
487+
* @return a {@link Validator} for the only XSD 1.1-capable pipeline this
488+
* Xerces fork supports: {@link SchemaFactory}/{@link Schema} with no
489+
* pre-supplied schema documents, so it dynamically discovers the schema
490+
* from the instance's own schemaLocation hint, mirroring how the default
491+
* SAXParser pipeline behaves for XSD 1.0.
492+
*/
493+
private Validator newXsd11Validator(@Nullable final Resolver resolver) throws SAXException {
494+
final SchemaFactory schemaFactory = SchemaFactory.newInstance(XSD_1_1_NS);
495+
final Schema schema = schemaFactory.newSchema();
496+
final Validator validator = schema.newValidator();
497+
if (resolver != null) {
498+
validator.setResourceResolver(resolver);
499+
}
500+
return validator;
501+
}
502+
389503
// TODO(AR) remove this when PR https://github.com/xmlresolver/xmlresolver/pull/98 is merged
390504
private String serializeDocument(final XmldbURI documentUri) throws SAXException, IOException {
391505
try (final LockedDocument lockedDocument = context.getBroker().getXMLResource(documentUri, Lock.LockMode.READ_LOCK)) {
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
(:
2+
: eXist-db Open Source Native XML Database
3+
: Copyright (C) 2001 The eXist-db Authors
4+
:
5+
: info@exist-db.org
6+
: http://www.exist-db.org
7+
:
8+
: This library is free software; you can redistribute it and/or
9+
: modify it under the terms of the GNU Lesser General Public
10+
: License as published by the Free Software Foundation; either
11+
: version 2.1 of the License, or (at your option) any later version.
12+
:
13+
: This library is distributed in the hope that it will be useful,
14+
: but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16+
: Lesser General Public License for more details.
17+
:
18+
: You should have received a copy of the GNU Lesser General Public
19+
: License along with this library; if not, write to the Free Software
20+
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21+
:)
22+
xquery version "3.1";
23+
24+
(:~
25+
: validation:jaxp() validates by dynamically discovering the grammar from
26+
: the instance document's own xsi:(no)NamespaceSchemaLocation hint, unlike
27+
: validation:jaxv() (see jaxv.xql) which takes the grammar as an explicit
28+
: argument. That dynamic-discovery path needs a real stored document (for a
29+
: real base URI to resolve the relative schemaLocation hint against), so
30+
: unlike jaxv.xql this module stores fixtures rather than using in-memory
31+
: node constructors.
32+
:)
33+
module namespace jaxp ="http://exist-db.org/xquery/test/validation/jaxp";
34+
35+
declare namespace test="http://exist-db.org/xquery/xqsuite";
36+
37+
declare variable $jaxp:COLLECTION_NAME := "validation-jaxp-test";
38+
declare variable $jaxp:COLLECTION := "/db/" || $jaxp:COLLECTION_NAME;
39+
40+
(: No-namespace XSD 1.1 schema -- xs:assert does not exist in XSD 1.0, so a
41+
processor that silently falls back to 1.0 grammar parsing fails to load
42+
this schema at all, rather than just failing the assertion. :)
43+
declare variable $jaxp:XSD11 :=
44+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
45+
xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning" vc:minVersion="1.1">
46+
<xs:element name="root">
47+
<xs:complexType>
48+
<xs:sequence>
49+
<xs:element name="value1" type="xs:integer"/>
50+
<xs:element name="value2" type="xs:integer"/>
51+
</xs:sequence>
52+
<xs:assert test="value2 gt value1"/>
53+
</xs:complexType>
54+
</xs:element>
55+
</xs:schema>;
56+
57+
declare variable $jaxp:VALID_XML :=
58+
<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="schema.xsd">
59+
<value1>20</value1>
60+
<value2>30</value2>
61+
</root>;
62+
63+
declare variable $jaxp:INVALID_XML :=
64+
<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="schema.xsd">
65+
<value1>30</value1>
66+
<value2>20</value2>
67+
</root>;
68+
69+
declare
70+
%test:setUp
71+
function jaxp:setup() {
72+
xmldb:create-collection("/db", $jaxp:COLLECTION_NAME),
73+
xmldb:store($jaxp:COLLECTION, "schema.xsd", $jaxp:XSD11),
74+
xmldb:store($jaxp:COLLECTION, "valid.xml", $jaxp:VALID_XML),
75+
xmldb:store($jaxp:COLLECTION, "invalid.xml", $jaxp:INVALID_XML)
76+
};
77+
78+
declare
79+
%test:tearDown
80+
function jaxp:cleanup() {
81+
xmldb:remove($jaxp:COLLECTION)
82+
};
83+
84+
(: validation:jaxp() must dynamically discover and load an XSD 1.1 schema
85+
via the instance's own schemaLocation hint, the same as validation:jaxv()
86+
already does when given the v1.1 schema-language URI explicitly. :)
87+
declare
88+
%test:assertEquals("valid")
89+
function jaxp:xsd11_valid() {
90+
data(validation:jaxp-report(doc($jaxp:COLLECTION || "/valid.xml"), false())//status)
91+
};
92+
93+
declare
94+
%test:assertEquals("cvc-assertion: Assertion evaluation ('value2 gt value1') for element 'root' on schema type '#AnonType_root' did not succeed. ")
95+
function jaxp:xsd11_invalid() {
96+
data(validation:jaxp-report(doc($jaxp:COLLECTION || "/invalid.xml"), false())//message)
97+
};

0 commit comments

Comments
 (0)