2121 */
2222package org .exist .xquery .functions .validation ;
2323
24+ import java .io .IOException ;
2425import java .net .MalformedURLException ;
26+ import java .util .ArrayList ;
27+ import java .util .List ;
28+ import java .util .Optional ;
29+ import java .util .Properties ;
2530
2631import javax .xml .XMLConstants ;
32+ import javax .xml .transform .OutputKeys ;
2733import javax .xml .transform .stream .StreamSource ;
2834import javax .xml .validation .Schema ;
2935import javax .xml .validation .SchemaFactory ;
3036import javax .xml .validation .Validator ;
3137
38+ import com .evolvedbinary .j8fu .tuple .Tuple2 ;
39+
3240import org .exist .dom .QName ;
3341import org .exist .dom .memtree .MemTreeBuilder ;
3442import org .exist .dom .memtree .NodeImpl ;
43+ import org .exist .dom .persistent .DocumentImpl ;
44+ import org .exist .dom .persistent .LockedDocument ;
45+ import org .exist .resolver .ResolverFactory ;
46+ import org .exist .security .PermissionDeniedException ;
47+ import org .exist .storage .BrokerPool ;
48+ import org .exist .storage .lock .Lock ;
49+ import org .exist .storage .serializers .Serializer ;
50+ import org .exist .util .Configuration ;
51+ import org .exist .util .XMLReaderObjectFactory ;
3552import org .exist .validation .ValidationReport ;
53+ import org .exist .xmldb .XmldbURI ;
3654import org .exist .xquery .BasicFunction ;
3755import org .exist .xquery .Cardinality ;
3856import org .exist .xquery .FunctionSignature ;
4664import org .exist .xquery .value .Type ;
4765import org .exist .xquery .value .ValueSequence ;
4866
67+ import org .xml .sax .InputSource ;
68+ import org .xmlresolver .Resolver ;
69+ import org .xmlresolver .utils .SaxProducer ;
70+
71+ import static com .evolvedbinary .j8fu .tuple .Tuple .Tuple ;
72+ import static java .nio .charset .StandardCharsets .UTF_8 ;
73+
4974/**
5075 * xQuery function for validation of XML instance documents
5176 * using grammars like XSDs and DTDs.
@@ -73,9 +98,14 @@ public class Jaxv extends BasicFunction {
7398 "The namespace URI to designate a schema language. Depending on the " +
7499 "jaxv.SchemaFactory implementation the following values are valid:" +
75100 "(XSD 1.0) http://www.w3.org/2001/XMLSchema http://www.w3.org/XML/XMLSchema/v1.0, " +
76- "(XSD 1.1) http://www.w3.org/XML/XMLSchema/v1.1, " +
101+ "(XSD 1.1) http://www.w3.org/XML/XMLSchema/v1.1, " +
77102 "(RELAX NG 1.0) http://relaxng.org/ns/structure/1.0" ;
78103
104+ private static final String catalogTxt = "The catalogs referenced as xs:anyURI's. An empty " +
105+ "sequence uses the system catalog. A directory-search catalog (a collection URI ending " +
106+ "in '/') is not supported here, as javax.xml.validation.Validator has no equivalent of " +
107+ "the SAX entity resolver used for that case in validation:jaxp()." ;
108+
79109 // Setup function signature
80110 public final static FunctionSignature [] signatures = {
81111
@@ -107,8 +137,24 @@ public class Jaxv extends BasicFunction {
107137 new FunctionReturnSequenceType (Type .BOOLEAN , Cardinality .EXACTLY_ONE ,
108138 Shared .simplereportText )
109139 ),
110-
111-
140+
141+ new FunctionSignature (
142+ new QName ("jaxv" , ValidationModule .NAMESPACE_URI , ValidationModule .PREFIX ),
143+ extendedFunctionTxt +" Optionally an XML catalog can be specified for schema/entity resolution." ,
144+ new SequenceType []{
145+ new FunctionParameterSequenceType ("instance" , Type .ITEM , Cardinality .EXACTLY_ONE ,
146+ instanceText ),
147+ new FunctionParameterSequenceType ("grammars" , Type .ITEM , Cardinality .ONE_OR_MORE ,
148+ grammarText ),
149+ new FunctionParameterSequenceType ("language" , Type .STRING , Cardinality .EXACTLY_ONE ,
150+ languageText ),
151+ new FunctionParameterSequenceType ("catalogs" , Type .ITEM , Cardinality .ZERO_OR_MORE ,
152+ catalogTxt ),
153+ },
154+ new FunctionReturnSequenceType (Type .BOOLEAN , Cardinality .EXACTLY_ONE ,
155+ Shared .simplereportText )
156+ ),
157+
112158 new FunctionSignature (
113159 new QName ("jaxv-report" , ValidationModule .NAMESPACE_URI , ValidationModule .PREFIX ),
114160 extendedFunctionTxt +" An XML report is returned." ,
@@ -121,7 +167,7 @@ public class Jaxv extends BasicFunction {
121167 new FunctionReturnSequenceType (Type .NODE , Cardinality .EXACTLY_ONE ,
122168 Shared .xmlreportText )
123169 ),
124-
170+
125171 new FunctionSignature (
126172 new QName ("jaxv-report" , ValidationModule .NAMESPACE_URI , ValidationModule .PREFIX ),
127173 extendedFunctionTxt +" An XML report is returned." ,
@@ -135,20 +181,40 @@ public class Jaxv extends BasicFunction {
135181 },
136182 new FunctionReturnSequenceType (Type .NODE , Cardinality .EXACTLY_ONE ,
137183 Shared .xmlreportText )
184+ ),
185+
186+ new FunctionSignature (
187+ new QName ("jaxv-report" , ValidationModule .NAMESPACE_URI , ValidationModule .PREFIX ),
188+ extendedFunctionTxt +" An XML report is returned. Optionally an XML catalog can be specified for schema/entity resolution." ,
189+ new SequenceType []{
190+ new FunctionParameterSequenceType ("instance" , Type .ITEM , Cardinality .EXACTLY_ONE ,
191+ instanceText ),
192+ new FunctionParameterSequenceType ("grammars" , Type .ITEM , Cardinality .ONE_OR_MORE ,
193+ grammarText ),
194+ new FunctionParameterSequenceType ("language" , Type .STRING , Cardinality .EXACTLY_ONE ,
195+ languageText ),
196+ new FunctionParameterSequenceType ("catalogs" , Type .ITEM , Cardinality .ZERO_OR_MORE ,
197+ catalogTxt ),
198+ },
199+ new FunctionReturnSequenceType (Type .NODE , Cardinality .EXACTLY_ONE ,
200+ Shared .xmlreportText )
138201 )
139-
202+
140203 };
141204
142205
206+ private final BrokerPool brokerPool ;
207+
143208 public Jaxv (XQueryContext context , FunctionSignature signature ) {
144209 super (context , signature );
210+ brokerPool = context .getBroker ().getBrokerPool ();
145211 }
146212
147213
148214 public Sequence eval (Sequence [] args , Sequence contextSequence ) throws XPathException {
149215
150216 // Check input parameters
151- if (args .length != 2 && args .length != 3 ) {
217+ if (args .length != 2 && args .length != 3 && args . length != 4 ) {
152218 return Sequence .EMPTY_SEQUENCE ;
153219 }
154220
@@ -160,13 +226,13 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
160226
161227 try {
162228 report .start ();
163-
229+
164230 // Get inputstream for instance document
165231 instance =Shared .getStreamSource (args [0 ].itemAt (0 ), context );
166232
167233 // Validate using resource speciefied in second parameter
168234 grammars = Shared .getStreamSource (args [1 ], context );
169-
235+
170236 // Check input
171237 for (final StreamSource grammar : grammars ) {
172238 final String grammarUrl = grammar .getSystemId ();
@@ -177,23 +243,81 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
177243 }
178244
179245 // Fetch third argument if available, and override defailt value
180- if (args .length = = 3 ) {
246+ if (args .length > = 3 ) {
181247 schemaLang = args [2 ].getStringValue ();
182248 }
183-
249+
184250 // Get language specific factory
185251 SchemaFactory factory = null ;
186252 try {
187253 factory = SchemaFactory .newInstance (schemaLang );
188-
254+
189255 } catch (final IllegalArgumentException ex ) {
190256 final String msg = "Schema language '" + schemaLang + "' is not supported. " + ex .getMessage ();
191257 LOG .error (msg );
192258 throw new XPathException (this , msg );
193259 }
194-
195-
196- // Create grammar
260+
261+
262+ // Handle catalog (fourth argument). Must be set on the SchemaFactory, and
263+ // BEFORE newSchema() is called: that's when xs:import/xs:include resolution
264+ // happens (schema compilation), not at validate() time. javax.xml.validation
265+ // only accepts an LSResourceResolver, so unlike validation:jaxp() the
266+ // directory-search/collection case (a catalog URL ending in '/') has no
267+ // equivalent here -- org.xmlresolver.Resolver implements LSResourceResolver
268+ // directly, but SearchResourceResolver only implements the Xerces-specific
269+ // XMLEntityResolver (XNI) interface.
270+ if (args .length == 4 ) {
271+ if (args [3 ].isEmpty ()) {
272+ // Use system catalog
273+ LOG .debug ("Using system catalog." );
274+ final Configuration config = brokerPool .getConfiguration ();
275+ final Resolver resolver = (Resolver ) config .getProperty (XMLReaderObjectFactory .CATALOG_RESOLVER );
276+ factory .setResourceResolver (resolver );
277+
278+ } else {
279+ final String [] catalogUrls = Shared .getUrls (args [3 ]);
280+ final String singleUrl = catalogUrls [0 ];
281+
282+ if (singleUrl .endsWith ("/" )) {
283+ LOG .warn ("Directory-search catalogs ('{}') are not supported by validation:jaxv() -- " +
284+ "schema/entity resolution will proceed without a catalog." , singleUrl );
285+
286+ } else if (singleUrl .endsWith (".xml" )) {
287+ LOG .debug ("Using catalogs {}" , getStrings (catalogUrls ));
288+
289+ final List <Tuple2 <String , Optional <SaxProducer >>> catalogs = new ArrayList <>();
290+ for (String catalogUrl : catalogUrls ) {
291+
292+ /* NOTE(AR): Catalog URL if stored in database must start with
293+ URI Scheme xmldb:// so that the XML Resolver can use
294+ org.exist.protocolhandler.protocols.xmldb.Handler
295+ to resolve any relative URI resources from the database.
296+ */
297+ final Optional <SaxProducer > maybeSaxProducer ;
298+ if (catalogUrl .startsWith ("xmldb:exist://" )) {
299+ catalogUrl = ResolverFactory .fixupExistCatalogUri (catalogUrl );
300+ maybeSaxProducer = Optional .of (catalogSaxProducer (XmldbURI .create (catalogUrl )));
301+ } else if (catalogUrl .startsWith ("/db" )) {
302+ catalogUrl = ResolverFactory .fixupExistCatalogUri (catalogUrl );
303+ maybeSaxProducer = Optional .of (catalogSaxProducer (XmldbURI .create (catalogUrl )));
304+ } else {
305+ maybeSaxProducer = Optional .empty ();
306+ }
307+
308+ catalogs .add (Tuple (catalogUrl , maybeSaxProducer ));
309+ }
310+ final Resolver resolver = ResolverFactory .newResolverFromSax (catalogs );
311+ factory .setResourceResolver (resolver );
312+
313+ } else {
314+ LOG .error ("Catalog URLs should end on / or .xml" );
315+ }
316+ }
317+ }
318+
319+ // Create grammar -- xs:import/xs:include resolution (via the resolver set
320+ // above, if any) happens here, during schema compilation.
197321 final Schema schema = factory .newSchema (grammars );
198322
199323 // Setup validator
@@ -234,8 +358,54 @@ public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathExce
234358 } finally {
235359 context .popDocumentContext ();
236360 }
237- }
361+ }
238362 }
239363
240-
364+ private static String getStrings (String [] data ) {
365+ final StringBuilder sb = new StringBuilder ();
366+ for (final String field : data ) {
367+ sb .append (field );
368+ sb .append (" " );
369+ }
370+ return sb .toString ();
371+ }
372+
373+ /**
374+ * Builds a {@link SaxProducer} that streams the SAX events of the catalog document stored
375+ * at {@code documentUri} directly to whatever {@link org.xml.sax.ContentHandler} the catalog
376+ * loader supplies, avoiding having to first serialize the document to a {@link String} and
377+ * have the catalog loader re-parse it from an {@link InputSource}.
378+ *
379+ * <p>The xmlresolver {@code ValidatingXmlLoader} invokes {@link SaxProducer#produce} twice
380+ * (once to validate the catalog against the OASIS XML Catalog RNG schema, once to actually
381+ * load the entries), so each invocation re-acquires the document lock and re-serializes.</p>
382+ *
383+ * @param documentUri the URI of the catalog document stored in the database.
384+ * @return a producer that re-serializes the document's SAX events on each invocation.
385+ */
386+ private SaxProducer catalogSaxProducer (final XmldbURI documentUri ) {
387+ return (contentHandler , dtdHandler , errorHandler ) -> {
388+ try (final LockedDocument lockedDocument = context .getBroker ().getXMLResource (documentUri , Lock .LockMode .READ_LOCK )) {
389+ if (lockedDocument == null ) {
390+ throw new IOException ("No such document: " + documentUri );
391+ }
392+
393+ final DocumentImpl doc = lockedDocument .getDocument ();
394+
395+ final Properties outputProperties = new Properties ();
396+ outputProperties .setProperty (OutputKeys .METHOD , "XML" );
397+ outputProperties .setProperty (OutputKeys .OMIT_XML_DECLARATION , "yes" );
398+ outputProperties .setProperty (OutputKeys .INDENT , "no" );
399+ outputProperties .setProperty (OutputKeys .ENCODING , UTF_8 .name ());
400+
401+ final Serializer serializer = context .getBroker ().getSerializer ();
402+ serializer .reset ();
403+ serializer .setProperties (outputProperties );
404+ serializer .setSAXHandlers (contentHandler , null );
405+ serializer .toSAX (doc );
406+ } catch (final PermissionDeniedException e ) {
407+ throw new IOException (e .getMessage (), e );
408+ }
409+ };
410+ }
241411}
0 commit comments