Skip to content

Commit e26e439

Browse files
committed
[bugfix] Detect XSD 1.1 up front; support directory-search catalogs
`jaxp()` now peeks at the schema's `vc:minVersion` before parsing and picks the right pipeline directly, instead of always failing once and retrying. The peek only follows same-origin (scheme+authority) locations, since the `schemaLocation` hint is document-controlled and unrestricted resolution would let any caller make the server fetch arbitrary files or URLs. `SearchResourceResolver` now also implements `LSResourceResolver`, so directory-search catalogs work with the XSD 1.1 validator and with `jaxv()`'s 4th argument too. Broadens the retry guard to fire whenever a catalog is configured, since catalogs can resolve by namespace alone with no `schemaLocation` hint present.
1 parent f142963 commit e26e439

5 files changed

Lines changed: 512 additions & 62 deletions

File tree

exist-core/src/main/java/org/exist/validation/resolver/SearchResourceResolver.java

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,17 @@
3333
import org.exist.storage.BrokerPool;
3434
import org.exist.validation.internal.DatabaseResources;
3535
import org.exist.xmldb.XmldbURI;
36+
import org.w3c.dom.ls.LSInput;
37+
import org.w3c.dom.ls.LSResourceResolver;
3638
import org.xml.sax.InputSource;
3739
import org.xml.sax.SAXException;
3840
import org.xmlresolver.Resolver;
3941
import org.xmlresolver.utils.SaxProducer;
4042

43+
import javax.annotation.Nullable;
4144
import java.io.IOException;
4245
import java.io.InputStream;
46+
import java.io.Reader;
4347
import java.net.URI;
4448
import java.net.URISyntaxException;
4549
import java.util.List;
@@ -52,9 +56,18 @@
5256
* Resolve a resource by searching in database. Schema's are queried
5357
* directly, DTD are searched in catalog files.
5458
*
59+
* <p>Implements both {@link XMLEntityResolver} (Xerces' XNI interface, used by the default
60+
* SAX-parser-based dynamic-discovery pipeline) and {@link LSResourceResolver} ({@code
61+
* javax.xml.validation}'s interface, used by the XSD-1.1-capable {@link javax.xml.validation.Validator}
62+
* pipeline and by {@code validation:jaxv()}'s {@link javax.xml.validation.SchemaFactory}) so that
63+
* directory-search catalogs work the same way regardless of which pipeline ends up validating.
64+
* {@link LSResourceResolver} is XSD-only (no DTD/catalog equivalent), so {@link #resolveResource}
65+
* only ever performs the XML-Schema-by-namespace search, mirroring {@link #resolveEntity}'s
66+
* namespace branch.</p>
67+
*
5568
* @author Dannes Wessels (dizzzz@exist-db.org)
5669
*/
57-
public class SearchResourceResolver implements XMLEntityResolver {
70+
public class SearchResourceResolver implements XMLEntityResolver, LSResourceResolver {
5871
private static final Logger LOG = LogManager.getLogger(SearchResourceResolver.class);
5972

6073
private final String collectionPath;
@@ -161,6 +174,147 @@ public XMLInputSource resolveEntity(final XMLResourceIdentifier xri) throws XNIE
161174
return xis;
162175
}
163176

177+
/**
178+
* Resolves an {@code xs:import}/{@code xs:include}, or the instance's own root schema
179+
* reference during dynamic discovery (confirmed by experiment: {@code javax.xml.validation}'s
180+
* dynamic-discovery {@link javax.xml.validation.Validator} consults the configured
181+
* {@link LSResourceResolver} for the root schema location too, not just nested imports), by
182+
* searching for an XSD declaring {@code namespaceURI} under {@code collectionPath} -- the same
183+
* lookup {@link #resolveEntity}'s namespace branch performs for the XNI pipeline.
184+
*
185+
* <p>{@code systemId}/{@code baseURI} are intentionally ignored: unlike {@link #resolveEntity},
186+
* there is no DTD/catalog case to consider here (LSResourceResolver is XSD-only), and the
187+
* directory-search contract is "find an XSD by namespace", not "fetch whatever URI is named" --
188+
* the result always comes from the permission-checked {@code findXSD} search, never from a
189+
* caller/document-supplied location.</p>
190+
*/
191+
@Override
192+
@Nullable
193+
public LSInput resolveResource(final String type, @Nullable final String namespaceURI,
194+
@Nullable final String publicId, @Nullable final String systemId, @Nullable final String baseURI) {
195+
if (namespaceURI == null) {
196+
return null;
197+
}
198+
199+
if (LOG.isDebugEnabled()) {
200+
LOG.debug("Searching namespace '{}' in database from {}... (LSResourceResolver)", namespaceURI, collectionPath);
201+
}
202+
203+
final DatabaseResources databaseResources = new DatabaseResources(brokerPool);
204+
String resourcePath = databaseResources.findXSD(collectionPath, namespaceURI, subject);
205+
if (resourcePath == null) {
206+
return null;
207+
}
208+
resourcePath = ResolverFactory.fixupExistCatalogUri(resourcePath);
209+
210+
try {
211+
final InputStream is = URI.create(resourcePath).toURL().openStream();
212+
return new DatabaseLSInput(publicId, systemId, baseURI, is);
213+
} catch (final IOException e) {
214+
LOG.error("Could not open resolved schema resource '{}': {}", resourcePath, e.getMessage());
215+
return null;
216+
}
217+
}
218+
219+
/**
220+
* Minimal {@link LSInput} wrapping an already-opened {@link InputStream} -- there is no
221+
* JDK-stock implementation of this interface available to depend on.
222+
*/
223+
private static final class DatabaseLSInput implements LSInput {
224+
private final String publicId;
225+
private final String systemId;
226+
private final String baseURI;
227+
private InputStream byteStream;
228+
229+
DatabaseLSInput(@Nullable final String publicId, @Nullable final String systemId,
230+
@Nullable final String baseURI, final InputStream byteStream) {
231+
this.publicId = publicId;
232+
this.systemId = systemId;
233+
this.baseURI = baseURI;
234+
this.byteStream = byteStream;
235+
}
236+
237+
@Override
238+
public Reader getCharacterStream() {
239+
return null;
240+
}
241+
242+
@Override
243+
public void setCharacterStream(final Reader characterStream) {
244+
// not used: this implementation only ever supplies a byte stream
245+
}
246+
247+
@Override
248+
public InputStream getByteStream() {
249+
return byteStream;
250+
}
251+
252+
@Override
253+
public void setByteStream(final InputStream byteStream) {
254+
this.byteStream = byteStream;
255+
}
256+
257+
@Override
258+
public String getStringData() {
259+
return null;
260+
}
261+
262+
@Override
263+
public void setStringData(final String stringData) {
264+
// not used: this implementation only ever supplies a byte stream
265+
}
266+
267+
@Override
268+
public String getSystemId() {
269+
return systemId;
270+
}
271+
272+
@Override
273+
public void setSystemId(final String systemId) {
274+
// immutable: this instance is only ever built once per resolveResource() call
275+
}
276+
277+
@Override
278+
public String getPublicId() {
279+
return publicId;
280+
}
281+
282+
@Override
283+
public void setPublicId(final String publicId) {
284+
// immutable: this instance is only ever built once per resolveResource() call
285+
}
286+
287+
@Override
288+
public String getBaseURI() {
289+
return baseURI;
290+
}
291+
292+
@Override
293+
public void setBaseURI(final String baseURI) {
294+
// immutable: this instance is only ever built once per resolveResource() call
295+
}
296+
297+
@Override
298+
public String getEncoding() {
299+
return null;
300+
}
301+
302+
@Override
303+
public void setEncoding(final String encoding) {
304+
// not used: the schema document carries its own encoding declaration, if any
305+
}
306+
307+
@Override
308+
public boolean getCertifiedText() {
309+
return false;
310+
}
311+
312+
@Override
313+
public void setCertifiedText(final boolean certifiedText) {
314+
// not used
315+
}
316+
}
317+
164318
private String getXriDetails(final XMLResourceIdentifier xrid) {
165319
return "PublicId='%s' BaseSystemId='%s' ExpandedSystemId='%s' LiteralSystemId='%s' Namespace='%s' ".formatted(
166320
xrid.getPublicId(), xrid.getBaseSystemId(), xrid.getExpandedSystemId(), xrid.getLiteralSystemId(), xrid.getNamespace());

0 commit comments

Comments
 (0)