xuemingshen-oracle
diff --git a/‎make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java‎
Lines changed: 79 additions & 23 deletions b/‎make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java‎
Lines changed: 79 additions & 23 deletions
diff --git a/‎make/modules/java.base/gensrc/GensrcCharacterData.gmk‎
Lines changed: 17 additions & 0 deletions b/‎make/modules/java.base/gensrc/GensrcCharacterData.gmk‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎make/modules/java.base/gensrc/GensrcRegex.gmk‎
Lines changed: 0 additions & 17 deletions b/‎make/modules/java.base/gensrc/GensrcRegex.gmk‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎src/java.base/share/classes/java/lang/String.java‎
Lines changed: 146 additions & 0 deletions b/‎src/java.base/share/classes/java/lang/String.java‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎src/java.base/share/classes/java/lang/StringCaseFoldedCharIterator.java‎
Lines changed: 60 additions & 0 deletions b/‎src/java.base/share/classes/java/lang/StringCaseFoldedCharIterator.java‎
Lines changed: 60 additions & 0 deletions
@@ -22,15 +22,14 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
-
 package build.tools.generatecharacter;
 
-import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
+import java.util.stream.IntStream;
 
 public class CaseFolding {
 
@@ -42,32 +41,89 @@ public static void main(String[] args) throws Throwable {
         var templateFile = Paths.get(args[0]);
         var caseFoldingTxt = Paths.get(args[1]);
         var genSrcFile = Paths.get(args[2]);
-        var supportedTypes = "^.*; [CTS]; .*$";
+
+        // java.lang
+        var supportedTypes = "^.*; [CF]; .*$";  // full/1:M case folding
         var caseFoldingEntries = Files.lines(caseFoldingTxt)
-            .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
-            .map(line -> {
-                String[] cols = line.split("; ");
-                return new String[] {cols[0], cols[1], cols[2]};
-            })
-            .filter(cols -> {
-                //  the folding case doesn't map back to the original char.
-                var cp1 = Integer.parseInt(cols[0], 16);
-                var cp2 = Integer.parseInt(cols[2], 16);
-                return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
-            })
-            .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
-            .collect(Collectors.joining(",\n", "", ""));
+                .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
+                .map(line -> {
+                    var fields = line.split("; ");
+                    var cp = Integer.parseInt(fields[0], 16);
+                    fields = fields[2].trim().split(" ");
+                    var folding = new int[fields.length];
+                    for (int i = 0; i < folding.length; i++) {
+                        folding[i] = Integer.parseInt(fields[i], 16);
+                    }
+                    var foldingChars = Arrays.stream(folding)
+                            .mapToObj(Character::toChars)
+                            .flatMapToInt(chars -> IntStream.range(0, chars.length).map(i -> (int) chars[i]))
+                            .toArray();
+                    return String.format("\t\tnew CaseFoldingEntry(0x%04x, %s)",
+                            cp,
+                            Arrays.stream(foldingChars)
+                                    .mapToObj(c -> String.format("0x%04x", c))
+                                    .collect(Collectors.joining(", ", "new char[] {", "}"))
+                    );
+                })
+                .collect(Collectors.joining(",\n", "", ""));
+        // util.regex
+        var expandedSupportedTypes = "^.*; [CTS]; .*$";
+        var expanded_caseFoldingEntries = Files.lines(caseFoldingTxt)
+                .filter(line -> !line.startsWith("#") && line.matches(expandedSupportedTypes))
+                .map(line -> {
+                    String[] cols = line.split("; ");
+                    return new String[]{cols[0], cols[1], cols[2]};
+                })
+                .filter(cols -> {
+                    //  the folding case doesn't map back to the original char.
+                    var cp1 = Integer.parseInt(cols[0], 16);
+                    var cp2 = Integer.parseInt(cols[2], 16);
+                    return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
+                })
+                .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
+                .collect(Collectors.joining(",\n", "", ""));
 
         // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
         // 0049; T; 0131; # LATIN CAPITAL LETTER I
         final String T_0x0131_0x49 = String.format("        entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
 
-        // Generate .java file
         Files.write(
-            genSrcFile,
-            Files.lines(templateFile)
-                .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line)
-                .collect(Collectors.toList()),
-            StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
+                genSrcFile,
+                Files.lines(templateFile)
+                        .map(line -> line.contains("%%%Entries") ? caseFoldingEntries : line)
+                        .map(line -> line.contains("%%%Expanded_Case_Map_Entries") ? T_0x0131_0x49 + expanded_caseFoldingEntries : line)
+                        .collect(Collectors.toList()),
+                StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
     }
 }
+        /*
+        } else {
+            var supportedTypes = "^.*; [CTS]; .*$";
+            var caseFoldingEntries = Files.lines(caseFoldingTxt)
+                    .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
+                    .map(line -> {
+                        String[] cols = line.split("; ");
+                        return new String[]{cols[0], cols[1], cols[2]};
+                    })
+                    .filter(cols -> {
+                        //  the folding case doesn't map back to the original char.
+                        var cp1 = Integer.parseInt(cols[0], 16);
+                        var cp2 = Integer.parseInt(cols[2], 16);
+                        return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
+                    })
+                    .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
+                    .collect(Collectors.joining(",\n", "", ""));
+
+            // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
+            // 0049; T; 0131; # LATIN CAPITAL LETTER I
+            final String T_0x0131_0x49 = String.format("        entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
+
+            // Generate .java file
+            Files.write(
+                    genSrcFile,
+                    Files.lines(templateFile)
+                            .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line)
+                            .collect(Collectors.toList()),
+                    StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
+        }
+         */
@@ -72,5 +72,22 @@ TARGETS += $(GENSRC_CHARACTERDATA)
 
 ################################################################################
 
+
+GENSRC_STRINGCASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/java/lang/CaseFolding.java
+
+STRINGCASEFOLDING_TEMPLATE := $(MODULE_SRC)/share/classes/jdk/internal/lang/CaseFolding.java.template
+CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
+
+$(GENSRC_STRINGCASEFOLDING): $(BUILD_TOOLS_JDK) $(STRINGCASEFOLDING_TEMPLATE) $(CASEFOLDINGTXT)
+	$(call LogInfo, Generating $@)
+	$(call MakeTargetDir)
+	$(TOOL_GENERATECASEFOLDING) \
+	    $(STRINGCASEFOLDING_TEMPLATE) \
+	    $(CASEFOLDINGTXT) \
+	    $(GENSRC_STRINGCASEFOLDING)
+
+TARGETS += $(GENSRC_STRINGCASEFOLDING)
+
+
 endif # include guard
 include MakeIncludeEnd.gmk
@@ -50,22 +50,5 @@ TARGETS += $(GENSRC_INDICCONJUNCTBREAK)
 
 ################################################################################
 
-GENSRC_CASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/CaseFolding.java
-
-CASEFOLDINGTEMP := $(MODULE_SRC)/share/classes/jdk/internal/util/regex/CaseFolding.java.template
-CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
-
-$(GENSRC_CASEFOLDING): $(BUILD_TOOLS_JDK) $(CASEFOLDINGTEMP) $(CASEFOLDINGTXT)
-	$(call LogInfo, Generating $@)
-	$(call MakeTargetDir)
-	$(TOOL_GENERATECASEFOLDING) \
-	    $(CASEFOLDINGTEMP) \
-	    $(CASEFOLDINGTXT) \
-	    $(GENSRC_CASEFOLDING)
-
-TARGETS += $(GENSRC_CASEFOLDING)
-
-################################################################################
-
 endif # include guard
 include MakeIncludeEnd.gmk
@@ -2039,6 +2039,73 @@ public boolean equalsIgnoreCase(String anotherString) {
                 && regionMatches(true, 0, anotherString, 0, length());
     }
 
+    /**
+     * Compares this {@code String} to another {@code String} for equality,
+     * using <em>Unicode case folding</em>.
+     * <p>
+     * Two strings are considered equal by this method if their case-folded
+     * forms are identical. Case folding is defined by the Unicode Standard in
+     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
+     * including 1:M mappings. For example, {@code "Maße".equalsCaseFold("MASSE")}
+     * returns {@code true}, since the character {@code U+00DF} (sharp s) folds
+     * to {@code "ss"}.
+     * <p>
+     * Case folding is locale-independent and language-neutral, unlike
+     * locale-sensitive transformations such as {@link #toLowerCase()} or
+     * {@link #toUpperCase()}. It is intended for caseless matching,
+     * searching, and indexing.
+     *
+     * @apiNote
+     * This method is the Unicode-compliant alternative to
+     * {@link #equalsIgnoreCase(String)}. It implements full case folding as
+     * defined by the Unicode Standard, which may differ from the simpler
+     * per-character mapping performed by {@code equalsIgnoreCase}.
+     * For example:
+     * <pre>{@code
+     * String a = "Maße";
+     * String b = "MASSE";
+     * boolean equalCaseFold = a.equalsCaseFold(b);       // returns true
+     * boolean equalIgnoreCase = a.equalsIgnoreCase(b);   // returns false
+     * }</pre>
+     *
+     * @param  anotherString
+     *         The {@code String} to compare this {@code String} against
+     *
+     * @return  {@code true} if the given object is a {@code String}
+     *          that represents the same sequence of characters as this
+     *          string under Unicode case folding; {@code false} otherwise.
+     *
+     * @see     #compareToCaseFold(String)
+     * @see     #equalsIgnoreCase(String)
+     * @see     java.text.Collator
+     * @since   26
+     */
+    public boolean equalsCaseFold(String anotherString) {
+        if (this == anotherString) {
+            return true;
+        }
+        if (anotherString == null) {
+            return false;
+        }
+        byte[] v1 = this.value;
+        byte[] v2 = anotherString.value;
+        var ltr1 = this.coder == LATIN1 ? StringCaseFoldedCharIterator.ofLatin1(v1)
+                                        : StringCaseFoldedCharIterator.ofUTF16(v1);
+        var ltr2 = this.coder == LATIN1 ? StringCaseFoldedCharIterator.ofLatin1(v2)
+                                        : StringCaseFoldedCharIterator.ofUTF16(v2);
+        while (ltr1.hasNext() && ltr2.hasNext()) {
+            int ch1 = ltr1.nextChar();
+            int ch2 = ltr2.nextChar();
+            if (ch1 != ch2) {
+                return false;
+            }
+        }
+        if (ltr1.hasNext() || ltr2.hasNext()) {
+            return false;
+        }
+        return true;
+    }
+
     /**
      * Compares two strings lexicographically.
      * The comparison is based on the Unicode value of each character in
@@ -2160,6 +2227,85 @@ public int compareToIgnoreCase(String str) {
         return CASE_INSENSITIVE_ORDER.compare(this, str);
     }
 
+    /**
+     * A Comparator that orders {@code String} objects as by
+     * {@link #compareToCaseFold(String) compareToCaseFold()}.
+     *
+     * @see     #compareToCaseFold(String)
+     * @since   26
+     */
+    public static final Comparator<String> CASE_FOLD_ORDER
+            = new CaseFoldComparator();
+
+    private static class CaseFoldComparator implements Comparator<String> {
+
+        @Override
+        public int compare(String s1, String s2) {
+            byte[] v1 = s1.value;
+            byte[] v2 = s2.value;
+            var ltr1 = s1.coder == LATIN1 ? StringCaseFoldedCharIterator.ofLatin1(v1)
+                                          : StringCaseFoldedCharIterator.ofUTF16(v1);
+            var ltr2 = s2.coder == LATIN1 ? StringCaseFoldedCharIterator.ofLatin1(v2)
+                                          : StringCaseFoldedCharIterator.ofUTF16(v2);
+            while (ltr1.hasNext() && ltr2.hasNext()) {
+                int ch1 = ltr1.nextChar();
+                int ch2 = ltr2.nextChar();
+                if (ch1 != ch2) {
+                    return ch1 - ch2;
+                }
+            }
+            if (ltr1.hasNext()) return 1;
+            if (ltr2.hasNext()) return -1;
+            return 0;
+        }
+    }
+
+    /**
+     * Compares two strings lexicographically using Unicode case folding.
+     * <p>
+     * This method returns an integer whose sign is that of calling {@code compareTo}
+     * on the case folded versions of the strings. Unicode Case folding eliminates
+     * differences in case according to the Unicode Standard, using the mappings
+     * defined in
+     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
+     * including 1:M mappings, such as {@code"ß"} → {@code }"ss"}.
+     * <p>
+     * Case folding is a locale-independent, language-neutral form of case mapping,
+     * primarily intended for caseless matching. Unlike {@link #compareToIgnoreCase(String)},
+     * which applies a simpler locale-insensitive uppercase mapping. This method
+     * follows the Unicode-defined <em>full</em> case folding, providing stable and
+     * consistent results across all environments.
+     * <p>
+     * Note that this method does <em>not</em> take locale into account, and may
+     * produce results that differ from locale-sensitive ordering. For locale-aware
+     * comparisons, use {@link java.text.Collator}.
+     *
+     * @apiNote
+     * This method is the Unicode-compliant alternative to
+     * {@link #compareToIgnoreCase(String)}. It implements full case folding
+     * as defined by the Unicode Standard, which may differ from the simpler
+     * per-character mapping performed by {@code compareToIgnoreCase}.
+     * For example:
+     * <pre>{@code
+     * String a = "Maße";
+     * String b = "MASSE";
+     * int cmpCaseFold = a.compareToCaseFold(b);     // returns 0
+     * int cmpIgnoreCase = a.compareToIgnoreCase(b); // returns > 0
+     * }</pre>
+     *
+     * @param   str   the {@code String} to be compared.
+     * @return  a negative integer, zero, or a positive integer as the specified
+     *          String is greater than, equal to, or less than this String,
+     *          ignoring case considerations by case folding.
+     * @see     #equalsCaseFold(String)
+     * @see     #compareToIgnoreCase(String)
+     * @see     java.text.Collator
+     * @since   26
+     */
+    public int compareToCaseFold(String str) {
+        return CASE_FOLD_ORDER.compare(this, str);
+    }
+
     /**
      * Tests if two string regions are equal.
      * <p>
 
@@ -0,0 +1,60 @@
+package java.lang;
+
+import jdk.internal.java.lang.CaseFolding;
+
+abstract class StringCaseFoldedCharIterator {
+
+    protected final byte[] value;  // underlying byte array
+    protected final int length;    // length of the char unit in byte array
+    protected int index;           // current position in byte array
+    protected char[] folded;       // buffer for folded expansion
+    protected int foldedIndex;     // position in folded[]
+
+    StringCaseFoldedCharIterator(byte[] value, int length) {
+        this.value = value;
+        this.length = length;
+        this.index = 0;
+        this.folded = null;
+        this.foldedIndex = 0;
+    }
+
+    public boolean hasNext() {
+        return (folded != null && foldedIndex < folded.length) || index < length;
+    }
+
+    public int nextChar() {
+        if (folded != null && foldedIndex < folded.length) {
+            return folded[foldedIndex++];
+        }
+        if (index >= length) {
+            return -1;
+        }
+        int cp = codePointAt(value, index);
+        index += Character.charCount(cp);
+        folded = CaseFolding.fold(cp);
+        foldedIndex = 0;
+        return folded[foldedIndex++];
+    }
+
+    protected abstract int codePointAt(byte[] value, int index);
+
+    // Factory for Latin1
+    static StringCaseFoldedCharIterator ofLatin1(byte[] value) {
+        return new StringCaseFoldedCharIterator(value, value.length) {
+            @Override
+            protected int codePointAt(byte[] value, int index) {
+                return StringLatin1.codePointAt(value, index, value.length);
+            }
+        };
+    }
+
+    // Factory for UTF16
+    static StringCaseFoldedCharIterator ofUTF16(byte[] value) {
+        return new StringCaseFoldedCharIterator(value, value.length >> 1) {
+            @Override
+            protected int codePointAt(byte[] value, int index) {
+                return StringUTF16.codePointAt(value, index, value.length);
+            }
+        };
+    }
+}