Skip to content

Commit e66da0b

Browse files
joewizclaude
andcommitted
[feature] Decode-then-encode collection() and xmldb: read paths too (decision 2, all surfaces)
Extends the doc() decode-then-encode canonicalization to the remaining read surfaces so a literal '%' (and any decoded/descriptor-derived name) resolves consistently everywhere, not just in fn:doc (#6463, decisions 1+2+5): - fn:collection (ExtCollection.eval): canonicalize a bare db-path before asUri, so collection("/db/x/50%done") reaches the 50%25done key. asUri's new URI(path) rejected a literal '%' (malformed escape) before; a scheme-ful URI (xmldb:, file:, ...) is left untouched so its scheme/authority are not rewritten. - xmldb: collection reads (XMLDBAbstractCollectionManipulator.getLocalCollection): upgrade the earlier escape=true normalization to decode-then-encode, so xmldb:collection-available / get-child-resources / get-child-collections resolve a literal-% collection by its decoded name, matching fn:doc / fn:collection. decodeForURI is the exact inverse of encodeForURILenient, so each is idempotent on an already-encoded path. XmldbCollectionAddressByDecodedNameTest gains literalPercentCollectionResolvesByDecodedName (xmldb:collection-available and fn:collection over a literal-% collection). XPathQueryTest (150) stays green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 2fba91b commit e66da0b

3 files changed

Lines changed: 51 additions & 10 deletions

File tree

exist-core/src/main/java/org/exist/xquery/functions/fn/ExtCollection.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.exist.storage.lock.ManagedDocumentLock;
3535
import org.exist.util.LockException;
3636
import org.exist.xmldb.XmldbURI;
37+
import org.exist.xquery.util.URIUtils;
3738
import org.exist.xquery.*;
3839
import org.exist.xquery.functions.xmldb.XMLDBModule;
3940
import org.exist.xquery.value.*;
@@ -81,7 +82,15 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
8182
if (args.length == 0 || args[0].isEmpty()) {
8283
collectionUri = null;
8384
} else {
84-
collectionUri = asUri(args[0].itemAt(0).getStringValue());
85+
final String raw = args[0].itemAt(0).getStringValue();
86+
// Resource-naming contract (eXist-db/exist#6463, decisions 1 + 2 + 5): canonicalize a bare
87+
// db-path to its stored key by decode-then-encode, so collection("/db/x/café-col") and a
88+
// literal '%' resolve, mirroring fn:doc. A scheme-ful URI (xmldb:, file:, ...) is left
89+
// untouched so its scheme/authority are not rewritten.
90+
final String effective = raw.startsWith(XmldbURI.ROOT_COLLECTION)
91+
? URIUtils.encodePathForURILenient(URIUtils.decodePathForURI(raw))
92+
: raw;
93+
collectionUri = asUri(effective);
8594
}
8695

8796
return getCollectionItems(new URI[] { collectionUri });

exist-core/src/main/java/org/exist/xquery/functions/xmldb/XMLDBAbstractCollectionManipulator.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,16 @@ public static LocalCollection getLocalCollection(final Expression callingExpress
7575
try {
7676
return new InTxnLocalCollection(context.getSubject(), context.getBroker().getBrokerPool(), null, execAndAddErrorIfMissing(callingExpression, () -> {
7777
try {
78-
// Resource-naming contract (eXist-db/exist#6463, decision 5): resolve the collection
79-
// path with escape=true -- the same codec XmldbURI.create / xmldb:store apply when a
80-
// collection is written -- so a caller's decoded or descriptor-derived literal path
81-
// (e.g. "/db/system/repo/badver-${app.version}") resolves the percent-encoded key that
82-
// was actually stored ("badver-$%7Bapp.version%7D"). AnyURIValue.toXmldbURI() resolved
83-
// with escape=false, so an awkward name either missed or threw on a raw illegal char.
84-
// escape=true leaves a literal '%' alone, so it is idempotent on an already-encoded
85-
// path (such as the internal collection URI passed by the node branch of eval()).
86-
return XmldbURI.xmldbUriFor(name, true);
78+
// Resource-naming contract (eXist-db/exist#6463, decisions 1 + 2 + 5): canonicalize
79+
// the collection path to its stored key by decode-then-encode -- the same mapping
80+
// fn:doc / fn:collection apply -- so a caller's decoded or descriptor-derived literal
81+
// path (e.g. "/db/system/repo/badver-${app.version}") resolves the percent-encoded key
82+
// that was actually stored ("badver-$%7Bapp.version%7D"), and a literal '%' resolves by
83+
// its decoded form. AnyURIValue.toXmldbURI() resolved with escape=false, so an awkward
84+
// name either missed or threw on a raw illegal char. decodeForURI is the exact inverse
85+
// of encodeForURILenient, so this is idempotent on an already-encoded path (such as the
86+
// internal collection URI passed by the node branch of eval()).
87+
return XmldbURI.xmldbUriFor(URIUtils.encodePathForURILenient(URIUtils.decodePathForURI(name)), false);
8788
} catch (final URISyntaxException e) {
8889
throw new XPathException(callingExpression, org.exist.xquery.ErrorCodes.FORG0001,
8990
"failed to convert '" + name + "' into an XmldbURI: " + e.getMessage(), e);

exist-core/src/test/java/org/exist/xquery/functions/xmldb/XmldbCollectionAddressByDecodedNameTest.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,37 @@ public void getChildResourcesResolvesDecodedLiteralPath() throws XMLDBException
9191
assertEquals("1", rs.getResource(0).getContent());
9292
}
9393

94+
/**
95+
* Decision 2, read side, across the xmldb: and fn:collection surfaces: a collection whose name
96+
* literally contains '%' is stored bijectively (the '%' is escaped to %25) and is resolvable by
97+
* its decoded literal name via xmldb:collection-available and fn:collection -- the same
98+
* decode-then-encode canonicalization fn:doc applies. (The '%' here is not a valid escape, so
99+
* decoding is a no-op and the decoded form round-trips to its own stored key.)
100+
*/
101+
@Test
102+
public void literalPercentCollectionResolvesByDecodedName() throws XMLDBException {
103+
final String name = "pctcol-50%done";
104+
existEmbeddedServer.executeQuery(
105+
"declare variable $n external; xmldb:create-collection('/db', $n)",
106+
Map.of("n", new StringValue(name)));
107+
108+
// bijective lenient store escapes the literal '%' to %25 (no collision)
109+
assertTrue("precondition: the name must encode to a %25 stored key",
110+
URIUtils.encodeForURILenient(name).contains("%25"));
111+
112+
// xmldb:collection-available by the decoded literal name
113+
assertEquals("xmldb:collection-available by decoded literal-% name", "true", available("/db/" + name));
114+
115+
// fn:collection over the decoded literal path resolves the same collection
116+
existEmbeddedServer.executeQuery(
117+
"declare variable $c external; xmldb:store($c, 'd.xml', document { <d/> })",
118+
Map.of("c", new StringValue("/db/" + name)));
119+
final ResourceSet count = existEmbeddedServer.executeQuery(
120+
"declare variable $c external; count(collection($c))",
121+
Map.of("c", new StringValue("/db/" + name)));
122+
assertEquals("collection() by decoded literal-% path", "1", count.getResource(0).getContent());
123+
}
124+
94125
private static String available(final String path) throws XMLDBException {
95126
final ResourceSet rs = existEmbeddedServer.executeQuery(
96127
"declare variable $p external; xmldb:collection-available($p)", Map.of("p", new StringValue(path)));

0 commit comments

Comments
 (0)