From 6a0732bd9df64ede3fd5e3f264891674d799d687 Mon Sep 17 00:00:00 2001 From: Adam Anderson Date: Fri, 20 Oct 2023 12:00:30 -0500 Subject: [PATCH] [CXF-8947] - Avoid expensive regex operations in Rfc3986UriValidator if URI.getHost() returns a host name Signed-off-by: Adam Anderson --- .../cxf/jaxrs/impl/Rfc3986UriValidator.java | 13 ++++++-- .../org/apache/cxf/jaxrs/utils/HttpUtils.java | 31 +++++++++++-------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/impl/Rfc3986UriValidator.java b/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/impl/Rfc3986UriValidator.java index a7b27ffb435..af4a42943ad 100644 --- a/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/impl/Rfc3986UriValidator.java +++ b/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/impl/Rfc3986UriValidator.java @@ -39,7 +39,7 @@ final class Rfc3986UriValidator { private static final String LAST = "#(.*)"; - private static final Pattern HTTP_URL = Pattern.compile("^" + SCHEME + private static final Pattern HTTP_URL = Pattern.compile("^" + SCHEME + "(//(" + USERINFO + "@)?" + HOST + ")?" + PATH + "(\\?" + QUERY + ")?" + "(" + LAST + ")?"); @@ -47,13 +47,20 @@ private Rfc3986UriValidator() { } /** - * Validate the HTTP URL according to https://datatracker.ietf.org/doc/html/rfc3986#appendix-B + * Validate the HTTP URL according to https://datatracker.ietf.org/doc/html/rfc3986#appendix-B * @param uri HTTP schemed URI to validate * @return "true" if URI matches RFC-3986 validation rules, "false" otherwise */ public static boolean validate(final URI uri) { // Only validate the HTTP(s) URIs - if (HttpUtils.isHttpScheme(uri.getScheme())) { + if (HttpUtils.isHttpScheme(uri.getScheme())) { + // If URI.getHost() returns a host name, validate it and + // skip the expensive regular expression logic. + final String uriHost = uri.getHost(); + if (uriHost != null) { + return !StringUtils.isEmpty(uriHost); + } + final Matcher matcher = HTTP_URL.matcher(uri.toString()); if (matcher.matches()) { final String host = matcher.group(5); diff --git a/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/utils/HttpUtils.java b/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/utils/HttpUtils.java index e08e0b94e89..f76049323dc 100644 --- a/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/utils/HttpUtils.java +++ b/rt/frontend/jaxrs/src/main/java/org/apache/cxf/jaxrs/utils/HttpUtils.java @@ -93,13 +93,18 @@ public final class HttpUtils { // there are more of such characters, ex, '*' but '*' is not affected by UrlEncode private static final String PATH_RESERVED_CHARACTERS = "=@/:!$&\'(),;~"; private static final String QUERY_RESERVED_CHARACTERS = "?/,"; - + private static final Set KNOWN_HTTP_VERBS_WITH_NO_REQUEST_CONTENT = new HashSet<>(Arrays.asList(new String[]{"GET", "HEAD", "OPTIONS", "TRACE"})); private static final Set KNOWN_HTTP_VERBS_WITH_NO_RESPONSE_CONTENT = new HashSet<>(Arrays.asList(new String[]{"HEAD", "OPTIONS"})); - - private static final Pattern HTTP_SCHEME_PATTERN = Pattern.compile("^(?i)(http|https)$"); + + private static final Set HTTP_SCHEMES = new HashSet<>(); + + static { + HTTP_SCHEMES.add("http"); + HTTP_SCHEMES.add("https"); + } private HttpUtils() { } @@ -372,7 +377,7 @@ public static URI toAbsoluteUri(String relativePath, Message message) { (HttpServletRequest)message.get(AbstractHTTPDestination.HTTP_REQUEST)); return URI.create(base + relativePath); } - + public static void setHttpRequestURI(Message message, String uriTemplate) { HttpServletRequest request = (HttpServletRequest)message.get(AbstractHTTPDestination.HTTP_REQUEST); @@ -479,8 +484,8 @@ public static String getBaseAddress(Message m) { URI uri = new URI(endpointAddress); String path = uri.getRawPath(); String scheme = uri.getScheme(); - // RFC-3986: the scheme and host are case-insensitive and therefore should - // be normalized to lowercase. + // RFC-3986: the scheme and host are case-insensitive and therefore should + // be normalized to lowercase. if (scheme != null && !scheme.toLowerCase().startsWith(HttpUtils.HTTP_SCHEME) && HttpUtils.isHttpRequest(m)) { path = HttpUtils.toAbsoluteUri(path, m).getRawPath(); @@ -493,7 +498,7 @@ public static String getBaseAddress(Message m) { public static String getEndpointUri(Message m) { final Object servletRequest = m.get(AbstractHTTPDestination.HTTP_REQUEST); - + if (servletRequest != null) { final Object property = ((jakarta.servlet.http.HttpServletRequest)servletRequest) .getAttribute("org.apache.cxf.transport.endpoint.uri"); @@ -501,7 +506,7 @@ public static String getEndpointUri(Message m) { return property.toString(); } } - + return getEndpointAddress(m); } @@ -618,7 +623,7 @@ public static String getEncoding(MediaType mt, String defaultEncoding) { public static String getMediaTypeCharsetParameter(MediaType mt) { String charset = mt.getParameters().get(CHARSET_PARAMETER); - if (charset != null && charset.startsWith(DOUBLE_QUOTE) + if (charset != null && charset.startsWith(DOUBLE_QUOTE) && charset.endsWith(DOUBLE_QUOTE) && charset.length() > 1) { charset = charset.substring(1, charset.length() - 1); } @@ -699,7 +704,7 @@ public static boolean isPayloadEmpty(MultivaluedMap headers) { return false; } - + public static T createServletResourceValue(Message m, Class clazz) { Object value = null; @@ -721,12 +726,12 @@ public static T createServletResourceValue(Message m, Class clazz) { public static boolean isMethodWithNoRequestContent(String method) { return KNOWN_HTTP_VERBS_WITH_NO_REQUEST_CONTENT.contains(method); } - + public static boolean isMethodWithNoResponseContent(String method) { return KNOWN_HTTP_VERBS_WITH_NO_RESPONSE_CONTENT.contains(method); } - + public static boolean isHttpScheme(final String scheme) { - return scheme != null && HTTP_SCHEME_PATTERN.matcher(scheme).matches(); + return scheme != null && HTTP_SCHEMES.contains(scheme.toLowerCase()); } }