|
29 | 29 | import java.nio.file.Files; |
30 | 30 | import java.nio.file.Paths; |
31 | 31 | import java.nio.file.StandardOpenOption; |
| 32 | +import java.util.Arrays; |
32 | 33 | import java.util.stream.Collectors; |
| 34 | +import java.util.stream.IntStream; |
33 | 35 | import java.util.stream.Stream; |
34 | 36 |
|
35 | 37 | public class CaseFolding { |
36 | 38 |
|
37 | 39 | public static void main(String[] args) throws Throwable { |
38 | | - if (args.length != 3) { |
39 | | - System.err.println("Usage: java CaseFolding TemplateFile CaseFolding.txt CaseFolding.java"); |
| 40 | + if (args.length != 4) { |
| 41 | + System.err.println("Usage: java CaseFolding TemplateFile CaseFolding.txt CaseFolding.java lang"); |
40 | 42 | System.exit(1); |
41 | 43 | } |
42 | 44 | var templateFile = Paths.get(args[0]); |
43 | 45 | var caseFoldingTxt = Paths.get(args[1]); |
44 | 46 | var genSrcFile = Paths.get(args[2]); |
45 | | - var supportedTypes = "^.*; [CTS]; .*$"; |
46 | | - var caseFoldingEntries = Files.lines(caseFoldingTxt) |
47 | | - .filter(line -> !line.startsWith("#") && line.matches(supportedTypes)) |
48 | | - .map(line -> { |
49 | | - String[] cols = line.split("; "); |
50 | | - return new String[] {cols[0], cols[1], cols[2]}; |
51 | | - }) |
52 | | - .filter(cols -> { |
53 | | - // the folding case doesn't map back to the original char. |
54 | | - var cp1 = Integer.parseInt(cols[0], 16); |
55 | | - var cp2 = Integer.parseInt(cols[2], 16); |
56 | | - return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1; |
57 | | - }) |
58 | | - .map(cols -> String.format(" entry(0x%s, 0x%s)", cols[0], cols[2])) |
59 | | - .collect(Collectors.joining(",\n", "", "")); |
| 47 | + var pkg = args[3]; |
60 | 48 |
|
61 | | - // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's. |
62 | | - // 0049; T; 0131; # LATIN CAPITAL LETTER I |
63 | | - final String T_0x0131_0x49 = String.format(" entry(0x%04x, 0x%04x),\n", 0x0131, 0x49); |
| 49 | + if ("lang_string".equals(pkg)) { |
| 50 | + var supportedTypes = "^.*; [CF]; .*$"; // full/1:M case folding |
| 51 | + var caseFoldingEntries = Files.lines(caseFoldingTxt) |
| 52 | + .filter(line -> !line.startsWith("#") && line.matches(supportedTypes)) |
| 53 | + .map(line -> { |
| 54 | + var fields = line.split("; "); |
| 55 | + var cp = Integer.parseInt(fields[0], 16); |
| 56 | + fields = fields[2].trim().split(" "); |
| 57 | + var folding = new int[fields.length]; |
| 58 | + for (int i = 0; i < folding.length; i++) { |
| 59 | + folding[i] = Integer.parseInt(fields[i], 16); |
| 60 | + } |
| 61 | + var foldingChars = Arrays.stream(folding) |
| 62 | + .mapToObj(Character::toChars) |
| 63 | + .flatMapToInt(chars -> IntStream.range(0, chars.length).map(i -> (int)chars[i])) |
| 64 | + .toArray(); |
| 65 | + return String.format("\t\tnew CaseFoldingEntry(0x%04x, %s)", |
| 66 | + cp, |
| 67 | + Arrays.stream(foldingChars) |
| 68 | + .mapToObj(c -> String.format("0x%04x", c)) |
| 69 | + .collect(Collectors.joining(", ", "new char[] {", "}")) |
| 70 | + ); |
| 71 | + }) |
| 72 | + .collect(Collectors.joining(",\n", "", "")); |
64 | 73 |
|
65 | | - // Generate .java file |
66 | | - Files.write( |
67 | | - genSrcFile, |
68 | | - Files.lines(templateFile) |
69 | | - .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line) |
70 | | - .collect(Collectors.toList()), |
71 | | - StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); |
| 74 | + Files.write( |
| 75 | + genSrcFile, |
| 76 | + Files.lines(templateFile) |
| 77 | + .map(line -> line.contains("%%%Entries") ? caseFoldingEntries : line) |
| 78 | + .collect(Collectors.toList()), |
| 79 | + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); |
| 80 | + } else { |
| 81 | + var supportedTypes = "^.*; [CTS]; .*$"; |
| 82 | + var caseFoldingEntries = Files.lines(caseFoldingTxt) |
| 83 | + .filter(line -> !line.startsWith("#") && line.matches(supportedTypes)) |
| 84 | + .map(line -> { |
| 85 | + String[] cols = line.split("; "); |
| 86 | + return new String[]{cols[0], cols[1], cols[2]}; |
| 87 | + }) |
| 88 | + .filter(cols -> { |
| 89 | + // the folding case doesn't map back to the original char. |
| 90 | + var cp1 = Integer.parseInt(cols[0], 16); |
| 91 | + var cp2 = Integer.parseInt(cols[2], 16); |
| 92 | + return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1; |
| 93 | + }) |
| 94 | + .map(cols -> String.format(" entry(0x%s, 0x%s)", cols[0], cols[2])) |
| 95 | + .collect(Collectors.joining(",\n", "", "")); |
| 96 | + |
| 97 | + // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's. |
| 98 | + // 0049; T; 0131; # LATIN CAPITAL LETTER I |
| 99 | + final String T_0x0131_0x49 = String.format(" entry(0x%04x, 0x%04x),\n", 0x0131, 0x49); |
| 100 | + |
| 101 | + // Generate .java file |
| 102 | + Files.write( |
| 103 | + genSrcFile, |
| 104 | + Files.lines(templateFile) |
| 105 | + .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line) |
| 106 | + .collect(Collectors.toList()), |
| 107 | + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); |
| 108 | + } |
72 | 109 | } |
73 | 110 | } |
0 commit comments