2222 * or visit www.oracle.com if you need additional information or have any
2323 * questions.
2424 */
25-
2625package build .tools .generatecharacter ;
2726
28- import java .io .IOException ;
2927import java .nio .file .Files ;
3028import java .nio .file .Paths ;
3129import java .nio .file .StandardOpenOption ;
30+ import java .util .Arrays ;
3231import java .util .stream .Collectors ;
33- import java .util .stream .Stream ;
32+ import java .util .stream .IntStream ;
3433
3534public class CaseFolding {
3635
@@ -42,32 +41,89 @@ public static void main(String[] args) throws Throwable {
4241 var templateFile = Paths .get (args [0 ]);
4342 var caseFoldingTxt = Paths .get (args [1 ]);
4443 var genSrcFile = Paths .get (args [2 ]);
45- var supportedTypes = "^.*; [CTS]; .*$" ;
44+
45+ // java.lang
46+ var supportedTypes = "^.*; [CF]; .*$" ; // full/1:M case folding
4647 var caseFoldingEntries = Files .lines (caseFoldingTxt )
47- .filter (line -> !line .startsWith ("#" ) && line .matches (supportedTypes ))
48- .map (line -> {
49- String [] cols = line .split ("; " );
50- return new String [] {cols [0 ], cols [1 ], cols [2 ]};
51- })
52- .filter (cols -> {
53- // the folding case doesn't map back to the original char.
54- var cp1 = Integer .parseInt (cols [0 ], 16 );
55- var cp2 = Integer .parseInt (cols [2 ], 16 );
56- return Character .toUpperCase (cp2 ) != cp1 && Character .toLowerCase (cp2 ) != cp1 ;
57- })
58- .map (cols -> String .format (" entry(0x%s, 0x%s)" , cols [0 ], cols [2 ]))
59- .collect (Collectors .joining (",\n " , "" , "" ));
48+ .filter (line -> !line .startsWith ("#" ) && line .matches (supportedTypes ))
49+ .map (line -> {
50+ var fields = line .split ("; " );
51+ var cp = Integer .parseInt (fields [0 ], 16 );
52+ fields = fields [2 ].trim ().split (" " );
53+ var folding = new int [fields .length ];
54+ for (int i = 0 ; i < folding .length ; i ++) {
55+ folding [i ] = Integer .parseInt (fields [i ], 16 );
56+ }
57+ var foldingChars = Arrays .stream (folding )
58+ .mapToObj (Character ::toChars )
59+ .flatMapToInt (chars -> IntStream .range (0 , chars .length ).map (i -> (int ) chars [i ]))
60+ .toArray ();
61+ return String .format ("\t \t new CaseFoldingEntry(0x%04x, %s)" ,
62+ cp ,
63+ Arrays .stream (foldingChars )
64+ .mapToObj (c -> String .format ("0x%04x" , c ))
65+ .collect (Collectors .joining (", " , "new char[] {" , "}" ))
66+ );
67+ })
68+ .collect (Collectors .joining (",\n " , "" , "" ));
69+ // util.regex
70+ var expandedSupportedTypes = "^.*; [CTS]; .*$" ;
71+ var expanded_caseFoldingEntries = Files .lines (caseFoldingTxt )
72+ .filter (line -> !line .startsWith ("#" ) && line .matches (expandedSupportedTypes ))
73+ .map (line -> {
74+ String [] cols = line .split ("; " );
75+ return new String []{cols [0 ], cols [1 ], cols [2 ]};
76+ })
77+ .filter (cols -> {
78+ // the folding case doesn't map back to the original char.
79+ var cp1 = Integer .parseInt (cols [0 ], 16 );
80+ var cp2 = Integer .parseInt (cols [2 ], 16 );
81+ return Character .toUpperCase (cp2 ) != cp1 && Character .toLowerCase (cp2 ) != cp1 ;
82+ })
83+ .map (cols -> String .format (" entry(0x%s, 0x%s)" , cols [0 ], cols [2 ]))
84+ .collect (Collectors .joining (",\n " , "" , "" ));
6085
6186 // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
6287 // 0049; T; 0131; # LATIN CAPITAL LETTER I
6388 final String T_0x0131_0x49 = String .format (" entry(0x%04x, 0x%04x),\n " , 0x0131 , 0x49 );
6489
65- // Generate .java file
6690 Files .write (
67- genSrcFile ,
68- Files .lines (templateFile )
69- .map (line -> line .contains ("%%%Entries" ) ? T_0x0131_0x49 + caseFoldingEntries : line )
70- .collect (Collectors .toList ()),
71- StandardOpenOption .CREATE , StandardOpenOption .TRUNCATE_EXISTING );
91+ genSrcFile ,
92+ Files .lines (templateFile )
93+ .map (line -> line .contains ("%%%Entries" ) ? caseFoldingEntries : line )
94+ .map (line -> line .contains ("%%%Expanded_Case_Map_Entries" ) ? T_0x0131_0x49 + expanded_caseFoldingEntries : line )
95+ .collect (Collectors .toList ()),
96+ StandardOpenOption .CREATE , StandardOpenOption .TRUNCATE_EXISTING );
7297 }
7398}
99+ /*
100+ } else {
101+ var supportedTypes = "^.*; [CTS]; .*$";
102+ var caseFoldingEntries = Files.lines(caseFoldingTxt)
103+ .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
104+ .map(line -> {
105+ String[] cols = line.split("; ");
106+ return new String[]{cols[0], cols[1], cols[2]};
107+ })
108+ .filter(cols -> {
109+ // the folding case doesn't map back to the original char.
110+ var cp1 = Integer.parseInt(cols[0], 16);
111+ var cp2 = Integer.parseInt(cols[2], 16);
112+ return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
113+ })
114+ .map(cols -> String.format(" entry(0x%s, 0x%s)", cols[0], cols[2]))
115+ .collect(Collectors.joining(",\n", "", ""));
116+
117+ // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
118+ // 0049; T; 0131; # LATIN CAPITAL LETTER I
119+ final String T_0x0131_0x49 = String.format(" entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
120+
121+ // Generate .java file
122+ Files.write(
123+ genSrcFile,
124+ Files.lines(templateFile)
125+ .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line)
126+ .collect(Collectors.toList()),
127+ StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
128+ }
129+ */
0 commit comments