Skip to content

Commit 87ea5d1

Browse files
committed
[feature] Add catalog support to validation:jaxv()
`validation:jaxp()` already accepts an explicit catalog for schema and entity resolution; `jaxv()` had none, so `xs:import/xs:include` inside its explicitly-supplied grammars could only resolve via relative schemaLocation paths. Add a 4th, optional catalogs argument to `jaxv()/jaxv-report()`, wired via `SchemaFactory#setResourceResolver()` so resolution happens at schema-compile time.
1 parent eceee27 commit 87ea5d1

3 files changed

Lines changed: 273 additions & 16 deletions

File tree

exist-core/src/main/java/org/exist/xquery/functions/validation/Jaxv.java

Lines changed: 186 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,36 @@
2121
*/
2222
package org.exist.xquery.functions.validation;
2323

24+
import java.io.IOException;
2425
import java.net.MalformedURLException;
26+
import java.util.ArrayList;
27+
import java.util.List;
28+
import java.util.Optional;
29+
import java.util.Properties;
2530

2631
import javax.xml.XMLConstants;
32+
import javax.xml.transform.OutputKeys;
2733
import javax.xml.transform.stream.StreamSource;
2834
import javax.xml.validation.Schema;
2935
import javax.xml.validation.SchemaFactory;
3036
import javax.xml.validation.Validator;
3137

38+
import com.evolvedbinary.j8fu.tuple.Tuple2;
39+
3240
import org.exist.dom.QName;
3341
import org.exist.dom.memtree.MemTreeBuilder;
3442
import org.exist.dom.memtree.NodeImpl;
43+
import org.exist.dom.persistent.DocumentImpl;
44+
import org.exist.dom.persistent.LockedDocument;
45+
import org.exist.resolver.ResolverFactory;
46+
import org.exist.security.PermissionDeniedException;
47+
import org.exist.storage.BrokerPool;
48+
import org.exist.storage.lock.Lock;
49+
import org.exist.storage.serializers.Serializer;
50+
import org.exist.util.Configuration;
51+
import org.exist.util.XMLReaderObjectFactory;
3552
import org.exist.validation.ValidationReport;
53+
import org.exist.xmldb.XmldbURI;
3654
import org.exist.xquery.BasicFunction;
3755
import org.exist.xquery.Cardinality;
3856
import org.exist.xquery.FunctionSignature;
@@ -46,6 +64,13 @@
4664
import org.exist.xquery.value.Type;
4765
import org.exist.xquery.value.ValueSequence;
4866

67+
import org.xml.sax.InputSource;
68+
import org.xmlresolver.Resolver;
69+
import org.xmlresolver.utils.SaxProducer;
70+
71+
import static com.evolvedbinary.j8fu.tuple.Tuple.Tuple;
72+
import static java.nio.charset.StandardCharsets.UTF_8;
73+
4974
/**
5075
* xQuery function for validation of XML instance documents
5176
* using grammars like XSDs and DTDs.
@@ -73,9 +98,14 @@ public class Jaxv extends BasicFunction {
7398
"The namespace URI to designate a schema language. Depending on the " +
7499
"jaxv.SchemaFactory implementation the following values are valid:" +
75100
"(XSD 1.0) http://www.w3.org/2001/XMLSchema http://www.w3.org/XML/XMLSchema/v1.0, " +
76-
"(XSD 1.1) http://www.w3.org/XML/XMLSchema/v1.1, " +
101+
"(XSD 1.1) http://www.w3.org/XML/XMLSchema/v1.1, " +
77102
"(RELAX NG 1.0) http://relaxng.org/ns/structure/1.0";
78103

104+
private static final String catalogTxt = "The catalogs referenced as xs:anyURI's. An empty " +
105+
"sequence uses the system catalog. A directory-search catalog (a collection URI ending " +
106+
"in '/') is not supported here, as javax.xml.validation.Validator has no equivalent of " +
107+
"the SAX entity resolver used for that case in validation:jaxp().";
108+
79109
// Setup function signature
80110
public final static FunctionSignature[] signatures = {
81111

@@ -107,8 +137,24 @@ public class Jaxv extends BasicFunction {
107137
new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE,
108138
Shared.simplereportText)
109139
),
110-
111-
140+
141+
new FunctionSignature(
142+
new QName("jaxv", ValidationModule.NAMESPACE_URI, ValidationModule.PREFIX),
143+
extendedFunctionTxt+" Optionally an XML catalog can be specified for schema/entity resolution.",
144+
new SequenceType[]{
145+
new FunctionParameterSequenceType("instance", Type.ITEM, Cardinality.EXACTLY_ONE,
146+
instanceText),
147+
new FunctionParameterSequenceType("grammars", Type.ITEM, Cardinality.ONE_OR_MORE,
148+
grammarText),
149+
new FunctionParameterSequenceType("language", Type.STRING, Cardinality.EXACTLY_ONE,
150+
languageText),
151+
new FunctionParameterSequenceType("catalogs", Type.ITEM, Cardinality.ZERO_OR_MORE,
152+
catalogTxt),
153+
},
154+
new FunctionReturnSequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE,
155+
Shared.simplereportText)
156+
),
157+
112158
new FunctionSignature(
113159
new QName("jaxv-report", ValidationModule.NAMESPACE_URI, ValidationModule.PREFIX),
114160
extendedFunctionTxt+" An XML report is returned.",
@@ -121,7 +167,7 @@ public class Jaxv extends BasicFunction {
121167
new FunctionReturnSequenceType(Type.NODE, Cardinality.EXACTLY_ONE,
122168
Shared.xmlreportText)
123169
),
124-
170+
125171
new FunctionSignature(
126172
new QName("jaxv-report", ValidationModule.NAMESPACE_URI, ValidationModule.PREFIX),
127173
extendedFunctionTxt+" An XML report is returned.",
@@ -135,20 +181,40 @@ public class Jaxv extends BasicFunction {
135181
},
136182
new FunctionReturnSequenceType(Type.NODE, Cardinality.EXACTLY_ONE,
137183
Shared.xmlreportText)
184+
),
185+
186+
new FunctionSignature(
187+
new QName("jaxv-report", ValidationModule.NAMESPACE_URI, ValidationModule.PREFIX),
188+
extendedFunctionTxt+" An XML report is returned. Optionally an XML catalog can be specified for schema/entity resolution.",
189+
new SequenceType[]{
190+
new FunctionParameterSequenceType("instance", Type.ITEM, Cardinality.EXACTLY_ONE,
191+
instanceText),
192+
new FunctionParameterSequenceType("grammars", Type.ITEM, Cardinality.ONE_OR_MORE,
193+
grammarText),
194+
new FunctionParameterSequenceType("language", Type.STRING, Cardinality.EXACTLY_ONE,
195+
languageText),
196+
new FunctionParameterSequenceType("catalogs", Type.ITEM, Cardinality.ZERO_OR_MORE,
197+
catalogTxt),
198+
},
199+
new FunctionReturnSequenceType(Type.NODE, Cardinality.EXACTLY_ONE,
200+
Shared.xmlreportText)
138201
)
139-
202+
140203
};
141204

142205

206+
private final BrokerPool brokerPool;
207+
143208
public Jaxv(XQueryContext context, FunctionSignature signature) {
144209
super(context, signature);
210+
brokerPool = context.getBroker().getBrokerPool();
145211
}
146212

147213

148214
public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
149215

150216
// Check input parameters
151-
if (args.length != 2 && args.length != 3) {
217+
if (args.length != 2 && args.length != 3 && args.length != 4) {
152218
return Sequence.EMPTY_SEQUENCE;
153219
}
154220

@@ -160,13 +226,13 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
160226

161227
try {
162228
report.start();
163-
229+
164230
// Get inputstream for instance document
165231
instance=Shared.getStreamSource(args[0].itemAt(0), context);
166232

167233
// Validate using resource speciefied in second parameter
168234
grammars = Shared.getStreamSource(args[1], context);
169-
235+
170236
// Check input
171237
for (final StreamSource grammar : grammars) {
172238
final String grammarUrl = grammar.getSystemId();
@@ -177,23 +243,81 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
177243
}
178244

179245
// Fetch third argument if available, and override defailt value
180-
if (args.length == 3) {
246+
if (args.length >= 3) {
181247
schemaLang = args[2].getStringValue();
182248
}
183-
249+
184250
// Get language specific factory
185251
SchemaFactory factory = null;
186252
try {
187253
factory = SchemaFactory.newInstance(schemaLang);
188-
254+
189255
} catch (final IllegalArgumentException ex) {
190256
final String msg = "Schema language '" + schemaLang + "' is not supported. " + ex.getMessage();
191257
LOG.error(msg);
192258
throw new XPathException(this, msg);
193259
}
194-
195-
196-
// Create grammar
260+
261+
262+
// Handle catalog (fourth argument). Must be set on the SchemaFactory, and
263+
// BEFORE newSchema() is called: that's when xs:import/xs:include resolution
264+
// happens (schema compilation), not at validate() time. javax.xml.validation
265+
// only accepts an LSResourceResolver, so unlike validation:jaxp() the
266+
// directory-search/collection case (a catalog URL ending in '/') has no
267+
// equivalent here -- org.xmlresolver.Resolver implements LSResourceResolver
268+
// directly, but SearchResourceResolver only implements the Xerces-specific
269+
// XMLEntityResolver (XNI) interface.
270+
if (args.length == 4) {
271+
if (args[3].isEmpty()) {
272+
// Use system catalog
273+
LOG.debug("Using system catalog.");
274+
final Configuration config = brokerPool.getConfiguration();
275+
final Resolver resolver = (Resolver) config.getProperty(XMLReaderObjectFactory.CATALOG_RESOLVER);
276+
factory.setResourceResolver(resolver);
277+
278+
} else {
279+
final String[] catalogUrls = Shared.getUrls(args[3]);
280+
final String singleUrl = catalogUrls[0];
281+
282+
if (singleUrl.endsWith("/")) {
283+
LOG.warn("Directory-search catalogs ('{}') are not supported by validation:jaxv() -- " +
284+
"schema/entity resolution will proceed without a catalog.", singleUrl);
285+
286+
} else if (singleUrl.endsWith(".xml")) {
287+
LOG.debug("Using catalogs {}", getStrings(catalogUrls));
288+
289+
final List<Tuple2<String, Optional<SaxProducer>>> catalogs = new ArrayList<>();
290+
for (String catalogUrl : catalogUrls) {
291+
292+
/* NOTE(AR): Catalog URL if stored in database must start with
293+
URI Scheme xmldb:// so that the XML Resolver can use
294+
org.exist.protocolhandler.protocols.xmldb.Handler
295+
to resolve any relative URI resources from the database.
296+
*/
297+
final Optional<SaxProducer> maybeSaxProducer;
298+
if (catalogUrl.startsWith("xmldb:exist://")) {
299+
catalogUrl = ResolverFactory.fixupExistCatalogUri(catalogUrl);
300+
maybeSaxProducer = Optional.of(catalogSaxProducer(XmldbURI.create(catalogUrl)));
301+
} else if (catalogUrl.startsWith("/db")) {
302+
catalogUrl = ResolverFactory.fixupExistCatalogUri(catalogUrl);
303+
maybeSaxProducer = Optional.of(catalogSaxProducer(XmldbURI.create(catalogUrl)));
304+
} else {
305+
maybeSaxProducer = Optional.empty();
306+
}
307+
308+
catalogs.add(Tuple(catalogUrl, maybeSaxProducer));
309+
}
310+
final Resolver resolver = ResolverFactory.newResolverFromSax(catalogs);
311+
factory.setResourceResolver(resolver);
312+
313+
} else {
314+
LOG.error("Catalog URLs should end on / or .xml");
315+
}
316+
}
317+
}
318+
319+
// Create grammar -- xs:import/xs:include resolution (via the resolver set
320+
// above, if any) happens here, during schema compilation.
197321
final Schema schema = factory.newSchema(grammars);
198322

199323
// Setup validator
@@ -234,8 +358,54 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
234358
} finally {
235359
context.popDocumentContext();
236360
}
237-
}
361+
}
238362
}
239363

240-
364+
private static String getStrings(String[] data) {
365+
final StringBuilder sb = new StringBuilder();
366+
for (final String field : data) {
367+
sb.append(field);
368+
sb.append(" ");
369+
}
370+
return sb.toString();
371+
}
372+
373+
/**
374+
* Builds a {@link SaxProducer} that streams the SAX events of the catalog document stored
375+
* at {@code documentUri} directly to whatever {@link org.xml.sax.ContentHandler} the catalog
376+
* loader supplies, avoiding having to first serialize the document to a {@link String} and
377+
* have the catalog loader re-parse it from an {@link InputSource}.
378+
*
379+
* <p>The xmlresolver {@code ValidatingXmlLoader} invokes {@link SaxProducer#produce} twice
380+
* (once to validate the catalog against the OASIS XML Catalog RNG schema, once to actually
381+
* load the entries), so each invocation re-acquires the document lock and re-serializes.</p>
382+
*
383+
* @param documentUri the URI of the catalog document stored in the database.
384+
* @return a producer that re-serializes the document's SAX events on each invocation.
385+
*/
386+
private SaxProducer catalogSaxProducer(final XmldbURI documentUri) {
387+
return (contentHandler, dtdHandler, errorHandler) -> {
388+
try (final LockedDocument lockedDocument = context.getBroker().getXMLResource(documentUri, Lock.LockMode.READ_LOCK)) {
389+
if (lockedDocument == null) {
390+
throw new IOException("No such document: " + documentUri);
391+
}
392+
393+
final DocumentImpl doc = lockedDocument.getDocument();
394+
395+
final Properties outputProperties = new Properties();
396+
outputProperties.setProperty(OutputKeys.METHOD, "XML");
397+
outputProperties.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
398+
outputProperties.setProperty(OutputKeys.INDENT, "no");
399+
outputProperties.setProperty(OutputKeys.ENCODING, UTF_8.name());
400+
401+
final Serializer serializer = context.getBroker().getSerializer();
402+
serializer.reset();
403+
serializer.setProperties(outputProperties);
404+
serializer.setSAXHandlers(contentHandler, null);
405+
serializer.toSAX(doc);
406+
} catch (final PermissionDeniedException e) {
407+
throw new IOException(e.getMessage(), e);
408+
}
409+
};
410+
}
241411
}

exist-core/src/main/java/org/exist/xquery/functions/validation/ValidationModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ public class ValidationModule extends AbstractInternalModule {
5252
new FunctionDef(Jaxv.signatures[1], Jaxv.class),
5353
new FunctionDef(Jaxv.signatures[2], Jaxv.class),
5454
new FunctionDef(Jaxv.signatures[3], Jaxv.class),
55+
new FunctionDef(Jaxv.signatures[4], Jaxv.class),
56+
new FunctionDef(Jaxv.signatures[5], Jaxv.class),
5557

5658
new FunctionDef(Jing.signatures[0], Jing.class),
5759
new FunctionDef(Jing.signatures[1], Jing.class),

0 commit comments

Comments
 (0)