diff --git a/exist-core/src/main/java/org/exist/xquery/functions/xmldb/XMLDBURIFunctions.java b/exist-core/src/main/java/org/exist/xquery/functions/xmldb/XMLDBURIFunctions.java
index 23aa8c1f01b..c9de6bdf10c 100644
--- a/exist-core/src/main/java/org/exist/xquery/functions/xmldb/XMLDBURIFunctions.java
+++ b/exist-core/src/main/java/org/exist/xquery/functions/xmldb/XMLDBURIFunctions.java
@@ -98,7 +98,8 @@ public Sequence eval(Sequence[] args, Sequence contextSequence)
} else if(isCalledAs("encode-uri")) {
return new AnyURIValue(this, URIUtils.encodeXmldbUriFor(args[0].getStringValue()));
} else {
- return new StringValue(this, URIUtils.urlDecodeUtf8(args[0].getStringValue()));
+ // RFC 3986 percent-decoding: '+' is a literal plus, not a space (#1824, #44)
+ return new StringValue(this, URIUtils.decodeForURI(args[0].getStringValue()));
}
} catch(final URISyntaxException e) {
logger.error(e.getMessage(), e);
diff --git a/exist-core/src/main/java/org/exist/xquery/util/URIUtils.java b/exist-core/src/main/java/org/exist/xquery/util/URIUtils.java
index b87ec3f25d8..b9c0c34609d 100644
--- a/exist-core/src/main/java/org/exist/xquery/util/URIUtils.java
+++ b/exist-core/src/main/java/org/exist/xquery/util/URIUtils.java
@@ -245,7 +245,77 @@ public static String encodeForURI(final String pathComponent) {
return new String(buf.buf, 0, buf.count);
}
-
+
+ /**
+ * Decodes a percent-encoded URI path component back to its literal form, the inverse of
+ * {@link #encodeForURI(String)}. Each {@code %XX} escape is decoded to a byte; consecutive
+ * escapes are interpreted together as a UTF-8 byte sequence. Every other character is left
+ * unchanged.
+ *
+ * Unlike {@link #urlDecodeUtf8(String)} (which wraps {@link java.net.URLDecoder} and therefore
+ * follows application/x-www-form-urlencoded rules), this method treats {@code '+'} as a literal
+ * plus sign, per RFC 3986. This is required for round-tripping names through the xmldb URI
+ * functions (see eXist-db/exist#1824, #44): {@code decodeForURI(encodeForURI(s))} equals
+ * {@code s} for every {@code s}.
+ *
+ *
This is deliberately a standalone percent-decoder rather than a call to
+ * {@link java.net.URI#getPath()}. {@code java.net.URI} is unsuitable as a general decoder for the
+ * arbitrary strings that {@code xmldb:decode}/{@code xmldb:decode-uri} accept: it throws
+ * {@code URISyntaxException} on inputs that are perfectly valid here (a literal space, a trailing
+ * or malformed {@code %}, characters such as { or }), and worse, it
+ * silently truncates at {@code '?'} and {@code '#'} (parsing the remainder as a query or
+ * fragment) — losing data with no error. This decoder never throws and never truncates: any
+ * {@code '%'} not followed by two hex digits is preserved verbatim. See {@code URIUtilsTest}.
+ *
+ * @param uriComponent the percent-encoded path component to decode.
+ *
+ * @return the decoded path component.
+ */
+ public static String decodeForURI(final String uriComponent) {
+ if (uriComponent.indexOf('%') == -1) {
+ // fast path: nothing percent-encoded, nothing to decode
+ return uriComponent;
+ }
+
+ final int len = uriComponent.length();
+ final StringBuilder out = new StringBuilder(len);
+ final java.io.ByteArrayOutputStream pending = new java.io.ByteArrayOutputStream();
+
+ int i = 0;
+ while (i < len) {
+ final char c = uriComponent.charAt(i);
+ if (c == '%' && i + 2 < len && isHexDigit(uriComponent.charAt(i + 1)) && isHexDigit(uriComponent.charAt(i + 2))) {
+ pending.write((hexValue(uriComponent.charAt(i + 1)) << 4) | hexValue(uriComponent.charAt(i + 2)));
+ i += 3;
+ } else {
+ if (pending.size() > 0) {
+ out.append(pending.toString(UTF_8));
+ pending.reset();
+ }
+ out.append(c);
+ i++;
+ }
+ }
+ if (pending.size() > 0) {
+ out.append(pending.toString(UTF_8));
+ }
+ return out.toString();
+ }
+
+ private static boolean isHexDigit(final char c) {
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+ }
+
+ private static int hexValue(final char c) {
+ if (c >= '0' && c <= '9') {
+ return c - '0';
+ }
+ if (c >= 'A' && c <= 'F') {
+ return c - 'A' + 10;
+ }
+ return c - 'a' + 10;
+ }
+
public static String iriToURI(String uriPart) {
String result = urlEncodeUtf8(uriPart);
result = result.replaceAll("%23", "#");
diff --git a/exist-core/src/test/java/org/exist/xquery/util/URIUtilsTest.java b/exist-core/src/test/java/org/exist/xquery/util/URIUtilsTest.java
index 7086a8c7080..6077ff3390e 100644
--- a/exist-core/src/test/java/org/exist/xquery/util/URIUtilsTest.java
+++ b/exist-core/src/test/java/org/exist/xquery/util/URIUtilsTest.java
@@ -192,4 +192,99 @@ void encodeForURIPathComponentUtf8() {
encoded = URIUtils.encodeForURI("\uD802\uDD07");
assertEquals("%F0%90%A4%87", encoded);
}
+
+ /**
+ * decodeForURI must treat '+' as a literal plus sign, not a space \u2014 the regression in
+ * eXist-db/exist#1824 and #44, where URLDecoder's form-encoding rules turned '+' into ' '.
+ */
+ @Test
+ void decodeForURIPlusIsLiteral() {
+ // a percent-encoded plus decodes back to a plus
+ assertEquals("1+2", URIUtils.decodeForURI("1%2B2"));
+
+ // a bare '+' (nothing percent-encoded) is returned literally
+ assertEquals("a+b", URIUtils.decodeForURI("a+b"));
+ }
+
+ @Test
+ void decodeForURISpaceAndPercent() {
+ // space
+ assertEquals("hello world", URIUtils.decodeForURI("hello%20world"));
+
+ // percent sign
+ assertEquals("99%", URIUtils.decodeForURI("99%25"));
+
+ // a literal "%2F" in a name encodes to "%252F" and must decode back to "%2F"
+ assertEquals("%2F", URIUtils.decodeForURI("%252F"));
+
+ // double percent sign
+ assertEquals("99%%100", URIUtils.decodeForURI("99%25%25100"));
+ }
+
+ @Test
+ void decodeForURIUnreservedUnchanged() {
+ assertEquals("ABCabc019-._~", URIUtils.decodeForURI("ABCabc019-._~"));
+ }
+
+ @Test
+ void decodeForURIUtf8() {
+ // 2 byte character - yen sign
+ assertEquals("\u00A5", URIUtils.decodeForURI("%C2%A5"));
+
+ // 3 byte character - samaritan letter tsasdiy
+ assertEquals("\u0811", URIUtils.decodeForURI("%E0%A0%91"));
+
+ // 4 byte character - phoenician letter het
+ assertEquals("\uD802\uDD07", URIUtils.decodeForURI("%F0%90%A4%87"));
+ }
+
+ /**
+ * decodeForURI is the exact inverse of encodeForURI for any input \u2014 the bijective property
+ * the xmldb URI functions rely on.
+ */
+ @Test
+ void decodeForURIRoundTripsEncodeForURI() {
+ final String[] names = {
+ "plain", "dash-case", "file.ext", "snake_case", "~home",
+ "hello world", "1+2", "99%", "%2F", "99%%100",
+ "a:b", "x/y", "Goodbye?", "#comment", "[predicate", "predicate]", "adam@work",
+ "Hello!", "$100", "Jack&Jill", "it's", "(comment", "comment)", "1*2", "x,y", "a;b", "n=1",
+ "caf\u00E9", "\u041F\u0440\u0438\u0432\u0435\u0442", "\u6587\u66F8", "\u00A5", "\u0811", "\uD802\uDD07"
+ };
+ for (final String name : names) {
+ assertEquals(name, URIUtils.decodeForURI(URIUtils.encodeForURI(name)),
+ "encode/decode round-trip failed for: " + name);
+ }
+ }
+
+ /**
+ * decodeForURI must never throw and never truncate, even on input that is not the output of
+ * encodeForURI — xmldb:decode/xmldb:decode-uri accept arbitrary user strings. Each case here
+ * is one that {@code new java.net.URI(s).getPath()} mishandles (throws URISyntaxException, or
+ * silently drops everything from a '?' or '#' onward), which is why this is a standalone decoder.
+ */
+ @Test
+ void decodeForURIRobustOnMalformedAndReservedInput() {
+ // a lone '%' not followed by two hex digits is preserved verbatim (URI: throws)
+ assertEquals("100%", URIUtils.decodeForURI("100%"));
+
+ // a truncated escape is preserved verbatim (URI: throws)
+ assertEquals("a%2", URIUtils.decodeForURI("a%2"));
+
+ // a '%' followed by non-hex is preserved verbatim (URI: throws)
+ assertEquals("a%ZZb", URIUtils.decodeForURI("a%ZZb"));
+
+ // a literal space is left as-is (URI: throws on an unencoded space)
+ assertEquals("a b", URIUtils.decodeForURI("a b"));
+
+ // '?' and '#' are ordinary characters here, not query/fragment delimiters (URI: truncates to "a")
+ assertEquals("a?b", URIUtils.decodeForURI("a?b"));
+ assertEquals("a#b", URIUtils.decodeForURI("a#b"));
+
+ // braces are ordinary characters (URI: throws)
+ assertEquals("a{b}c", URIUtils.decodeForURI("a{b}c"));
+
+ // valid escapes still decode even when mixed with characters URI would reject
+ assertEquals("a b?c", URIUtils.decodeForURI("a%20b?c"));
+ }
}
diff --git a/exist-core/src/test/xquery/xmldb/uri-encoding-tests.xql b/exist-core/src/test/xquery/xmldb/uri-encoding-tests.xql
new file mode 100644
index 00000000000..aa9e8438074
--- /dev/null
+++ b/exist-core/src/test/xquery/xmldb/uri-encoding-tests.xql
@@ -0,0 +1,62 @@
+(:
+ : eXist-db Open Source Native XML Database
+ : Copyright (C) 2001 The eXist-db Authors
+ :
+ : info@exist-db.org
+ : http://www.exist-db.org
+ :
+ : This library is free software; you can redistribute it and/or
+ : modify it under the terms of the GNU Lesser General Public
+ : License as published by the Free Software Foundation; either
+ : version 2.1 of the License, or (at your option) any later version.
+ :
+ : This library is distributed in the hope that it will be useful,
+ : but WITHOUT ANY WARRANTY; without even the implied warranty of
+ : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ : Lesser General Public License for more details.
+ :
+ : You should have received a copy of the GNU Lesser General Public
+ : License along with this library; if not, write to the Free Software
+ : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ :)
+xquery version "3.1";
+
+module namespace t="http://exist-db.org/testsuite/xmldb-uri-encoding";
+
+declare namespace test="http://exist-db.org/xquery/xqsuite";
+
+(: ~
+ : Regression tests for xmldb:decode / xmldb:decode-uri percent-decoding.
+ : eXist-db/exist#1824 and #44: a '+' must be decoded as a literal plus sign (RFC 3986),
+ : not turned into a space (which is application/x-www-form-urlencoded behavior).
+ :)
+
+declare
+ %test:assertEquals("a+b")
+function t:decode-uri-plus-is-literal-when-encoded() {
+ xmldb:decode-uri(xs:anyURI("a%2Bb"))
+};
+
+declare
+ %test:assertEquals("a+b")
+function t:decode-uri-bare-plus-is-literal() {
+ xmldb:decode-uri(xs:anyURI("a+b"))
+};
+
+declare
+ %test:assertEquals("a+b")
+function t:decode-plus-is-literal-when-encoded() {
+ xmldb:decode("a%2Bb")
+};
+
+declare
+ %test:assertEquals("a b")
+function t:decode-uri-percent-20-is-space() {
+ xmldb:decode-uri(xs:anyURI("a%20b"))
+};
+
+declare
+ %test:assertEquals("My Report (2024)+final.xml")
+function t:decode-uri-mixed() {
+ xmldb:decode-uri(xs:anyURI("My%20Report%20%282024%29%2Bfinal.xml"))
+};