apache · theshoeshiner · Aug 15, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/src/main/java/org/apache/commons/text/StringTokenizer.java b/src/main/java/org/apache/commons/text/StringTokenizer.java
@@ -239,6 +239,9 @@ public static StringTokenizer getTSVInstance(final String input) {
     /** Whether to ignore empty tokens. */
     private boolean ignoreEmptyTokens = true;
 
+    /** Whether to omit delimiter matches from output. */
+    private boolean omitDelimiterMatches = true;
+
     /**
      * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer, but with no text to
      * tokenize.
@@ -751,8 +754,11 @@ private int readNextToken(final char[] srcChars, int start, final int len, final
         // handle empty token
         final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len);
         if (delimLen > 0) {
-            addToken(tokenList, StringUtils.EMPTY);
-            return start + delimLen;
+            //empty token is not possible if we are including delimiters in token
+            if (omitDelimiterMatches) {
+                addToken(tokenList, StringUtils.EMPTY);
+                return start + delimLen;
+            }
         }
 
         // handle found token
@@ -826,7 +832,14 @@ private int readWithQuotes(final char[] srcChars, final int start, final int len
                 if (delimLen > 0) {
                     // return condition when end of token found
                     addToken(tokenList, workArea.substring(0, trimStart));
-                    return pos + delimLen;
+                    if (omitDelimiterMatches) {
+                        return pos + delimLen;
+                    } else {
+                        //increment position only if we found a new delimiter
+                        if (pos > start) {
+                            return pos;
+                        }
+                    }
                 }
 
                 // check for quote, and thus back into quoting mode
@@ -1021,6 +1034,17 @@ public StringTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) {
         return this;
     }
 
+    /**
+     * Sets whether the tokenizer should omit the delimiter matches from the output tokens. Default is true.
+     *
+     * @param omitDelimiterMatches whether delimiter matches are omitted
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setOmitDelimiterMatches(final boolean omitDelimiterMatches) {
+        this.omitDelimiterMatches = omitDelimiterMatches;
+        return this;
+    }
+
     /**
      * Sets the quote character to use.
      * <p>

diff --git a/src/main/java/org/apache/commons/text/TokenFormatter.java b/src/main/java/org/apache/commons/text/TokenFormatter.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+public interface TokenFormatter {
+    String format(char[] prior, int tokenIndex, char[] token);
+}
diff --git a/src/main/java/org/apache/commons/text/TokenFormatterFactory.java b/src/main/java/org/apache/commons/text/TokenFormatterFactory.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import org.apache.commons.lang3.StringUtils;
+
+public class TokenFormatterFactory {
+
+    /**
+     * Token formatter that returns the token as is.
+     */
+    public static class NoOpFormatter implements TokenFormatter {
+        @Override
+        public String format(char[] prior, int tokenIndex, char[] token) {
+            return new String(token);
+        }
+
+    }
+
+    /**
+     * Token formatter that always returns a constant string, and optionally checks the passed in token
+     * for the constant and throws an error when found.
+     */
+    public static class ConstantTokenFormatter implements TokenFormatter {
+
+        /**
+         * The constant to return.
+         */
+        private char[] constant;
+
+        /**
+         * Whether or not to throw an exception if the constant is found.
+         */
+        private boolean failOnConstantFound = true;
+
+        public ConstantTokenFormatter(char constant) {
+            this(new char[] {constant}, true);
+        }
+
+        public ConstantTokenFormatter(char constant, boolean failOnConstantFound) {
+            this(new char[] {constant}, failOnConstantFound);
+        }
+
+        public ConstantTokenFormatter(String constant) {
+            this(constant, true);
+        }
+
+        public ConstantTokenFormatter(String constant, boolean failOnConstantFound) {
+            this(constant.toCharArray(), failOnConstantFound);
+        }
+
+        public ConstantTokenFormatter(char[] constant, boolean failOnConstantFound) {
+            this.constant = constant;
+            this.failOnConstantFound = failOnConstantFound;
+        }
+
+        @Override
+        public String format(char[] prior, int tokenIndex, char[] token) {
+            if (failOnConstantFound) {
+                for (int i = 0; i < token.length; i++) {
+                    boolean match = false;
+                    int t = i;
+                    for (int j = 0; j < constant.length; j++) {
+                        if (token[t] == constant[j]) {
+                            match = true;
+                        } else {
+                            match = false;
+                            break;
+                        }
+                        t++;
+                    }
+                    if (match) {
+                        throw new IllegalArgumentException("Token " + tokenIndex + " contains illegal character '" + new String(constant) + "' at index " + t);
+                    }
+                }
+            }
+
+            return new String(constant);
+        }
+
+        /**
+         * Set whether to check the token for the constant.
+         * @param checkTokenForConstant whether to check.
+         */
+        public void setFailOnConstantFound(boolean checkTokenForConstant) {
+            this.failOnConstantFound = checkTokenForConstant;
+        }
+
+    }
+
+    /**
+     * Reuseable NoOpFormatter instance.
+     */
+    private static final NoOpFormatter NOOP_FORMATTER = new NoOpFormatter();
+
+    /**
+     * Reuseable Empty String formatter instance.
+     */
+    private static final ConstantTokenFormatter EMPTY_STRING_FORMATTER = new ConstantTokenFormatter(StringUtils.EMPTY, false);
+
+    public static NoOpFormatter noOpFormatter() {
+        return NOOP_FORMATTER;
+    }
+
+    public static ConstantTokenFormatter constantFormatter(char[] constant, boolean failOnConstant) {
+        return new ConstantTokenFormatter(constant, failOnConstant);
+    }
+
+    public static ConstantTokenFormatter constantFormatter(char constant, boolean failOnConstant) {
+        return new ConstantTokenFormatter(constant, failOnConstant);
+    }
+
+    public static ConstantTokenFormatter emptyFormatter() {
+        return EMPTY_STRING_FORMATTER;
+    }
+}
diff --git a/src/main/java/org/apache/commons/text/TokenStringifier.java b/src/main/java/org/apache/commons/text/TokenStringifier.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+/**
+ * Takes a collection of String tokens and combines them into a single String.
+ * <p>
+ * This class functions as the inverse of {@link org.apache.commons.text.StringTokenizer}. All tokens are formatted
+ * by a {@link TokenFormatter} which allows fine grained control over the final output.
+ * </p>
+ */
+public class TokenStringifier {
+
+    /**
+     * The formatter for the delimiter.
+     */
+    private TokenFormatter delimiterFormatter;
+
+    /**
+     * The formatter for the tokens.
+     */
+    private TokenFormatter tokenFormatter;
+
+    /**
+     * Builder used to hold formatted tokens.
+     */
+    private StringBuilder builder;
+
+    /**
+     * The final string.
+     */
+    private String string;
+
+    /**
+     * The tokens to turn into a String.
+     */
+    private Iterable<String> tokens;
+
+    public TokenStringifier(TokenFormatter delimiterFormatter, TokenFormatter tokenFormatter) {
+        super();
+        this.delimiterFormatter = delimiterFormatter;
+        this.tokenFormatter = tokenFormatter;
+    }
+
+    public void reset(Iterable<String> tokens) {
+        this.tokens = tokens;
+        this.string = null;
+        this.builder = null;
+    }
+
+    public TokenStringifier() {
+        tokenFormatter = TokenFormatterFactory.noOpFormatter();
+        delimiterFormatter = TokenFormatterFactory.noOpFormatter();
+    }
+
+    private void stringify() {
+        builder = new StringBuilder();
+        char[] priorToken = null;
+        int i = 0;
+        for (String token : tokens) {
+            char[] tokenChars = token.toCharArray();
+            if (i > 0) {
+                String delimiter = delimiterFormatter.format(priorToken, i, tokenChars);
+                if (delimiter != null) {
+                    builder.append(delimiter);
+                }
+            }
+            String formatted = tokenFormatter.format(priorToken, i, tokenChars);
+            if (formatted != null) {
+                builder.append(formatted);
+            }
+            i++;
+        }
+        string = builder.toString();
+    }
+
+    public String getString() {
+        if (string == null) {
+            stringify();
+        }
+        return string;
+    }
+}
diff --git a/src/main/java/org/apache/commons/text/cases/CamelCase.java b/src/main/java/org/apache/commons/text/cases/CamelCase.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.cases;
+
+/**
+ * Case implementation that parses and formats strings of the form 'myCamelCase'
+ * <p>
+ * CamelCase is a case where tokens are delimited by upper case Unicode characters. The very first
+ * token should begin with a lower case character, and any subsequent tokens begin with an
+ * upper case character. All remaining characters will be lower case or non cased.
+ * </p>
+ */
+public final class CamelCase extends UpperCaseDelimitedCase {
+
+    /** Constant reusable instance of this case. */
+    public static final CamelCase INSTANCE = new CamelCase();
+
+    /**
+     * Constructs new CamelCase instance.
+     */
+
+      private CamelCase() {
+          super(true);
+      }
+
+}