Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package org.unicode.cldr.util;

import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.ULocale;
import java.util.Comparator;

public final class CollatorHelper {
public static final RuleBasedCollator EMOJI_COLLATOR = makeEmojiCollator();
Expand All @@ -13,6 +15,7 @@ public final class CollatorHelper {
public static final RuleBasedCollator ROOT_PRIMARY = makeRootPrimary();
public static final RuleBasedCollator ROOT_PRIMARY_SHIFTED = makeRootPrimaryShifted();
public static final RuleBasedCollator ROOT_SECONDARY = makeRootSecondary();
public static final Comparator<String> ROOT_INSENSITIVE = makeROOT_INSENSITIVE();

private static RuleBasedCollator makeEmojiCollator() {
ULocale uLocale = ULocale.forLanguageTag("en-u-co-emoji");
Expand Down Expand Up @@ -64,4 +67,17 @@ private static RuleBasedCollator makeRootSecondary() {
col.setStrength(Collator.SECONDARY);
return (RuleBasedCollator) col.freeze();
}

private static Comparator<String> makeROOT_INSENSITIVE() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A better name would be CASE_FOLDED (it has nothing to do with root or collation).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will do that in a later round.
sorry for the noise, trying to move this forward

// make our own copy to avoid static ordering
final RuleBasedCollator secondary = makeRootSecondary();
return new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
String n1 = UCharacter.foldCase(o1, 0);
String n2 = UCharacter.foldCase(o2, 0);
return secondary.compare(n1, n2);
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.Ordering;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R3;
Expand All @@ -29,13 +30,16 @@
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.unicode.cldr.test.CoverageLevel2;
import org.unicode.cldr.util.Annotations;
import org.unicode.cldr.util.Annotations.AnnotationSet;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.CollatorHelper;
import org.unicode.cldr.util.Emoji;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.Level;
Expand Down Expand Up @@ -281,34 +285,41 @@
}
}

// TODO CLDR-16947 - this test should migrate into
// CheckDisplayCollisions-run-against-derived-annotations (see isuse)
// TODO CLDR-19189
public void TestUniqueness() {
// if (logKnownIssue(
// "CLDR-16947", "skip duplicate TestUniqueness in favor of
// CheckDisplayCollisions")) {
// return;
// }
Set<String> locales = new TreeSet<>();
locales.add("en");
if (!TEST_ONLY_ENGLISH_UNIQUENESS) {
locales.addAll(Annotations.getAvailable());
locales.remove("root");
}
/*
* Note: "problems" here is a work-around for what appears to be a deficiency
* in the function sourceLocation, involving the call stack. Seemingly sourceLocation
* can't handle the "->" notation used for parallelStream().forEach() if
* uniquePerLocale calls errln directly.
* "problems" is here to collect and sort issues in parallel,
* and avoid issues calling errln() from a lambda.
*/
Set<String> problems = new HashSet<>();
locales.parallelStream().forEach(locale -> uniquePerLocale(locale, problems));
Set<String> problems =
locales.parallelStream()
.flatMap(locale -> uniquePerLocale(locale))
.collect(Collectors.toCollection(() -> new TreeSet<>()));
if (!problems.isEmpty()) {
if (logKnownIssue(
"CLDR-19189",
"cased collision in annotations:\n" + String.join("\n", problems))) {
return;
}
problems.forEach(s -> errln(s));
}
}

private void uniquePerLocale(String locale, Set<String> problems) {
private Stream<String> uniquePerLocale(String locale) {
Set<String> problems = new TreeSet<>();
logln("uniqueness: " + locale);
Multimap<String, String> nameToEmoji = TreeMultimap.create();
// use a case insensitive collator
// 'value' is originalName -> emoji
Multimap<String, Pair<String, String>> nameToEmoji =
TreeMultimap.create(CollatorHelper.ROOT_INSENSITIVE, Ordering.natural());
AnnotationSet data = Annotations.getDataSet(locale);
for (String emoji : Emoji.getAllRgi()) {
String name = data.getShortName(emoji);
Expand All @@ -319,37 +330,42 @@
throw new IllegalArgumentException(
CldrUtility.INHERITANCE_MARKER + " in name of " + emoji + " in " + locale);
}
nameToEmoji.put(name, emoji);
nameToEmoji.put(name, Pair.of(name, emoji));
}
Multimap<String, String> duplicateNameToEmoji = null;
for (Entry<String, Collection<String>> entry : nameToEmoji.asMap().entrySet()) {
for (Entry<String, Collection<Pair<String, String>>> entry :
nameToEmoji.asMap().entrySet()) {
String name = entry.getKey();
Collection<String> emojis = entry.getValue();
final Collection<Pair<String, String>> emojis = entry.getValue();
if (duplicateNameToEmoji == null) {
duplicateNameToEmoji = TreeMultimap.create();
}
if (emojis.size() > 1) {
synchronized (problems) {
if (problems.add(
"Duplicate name in "
+ locale
+ ": “"
+ name
+ "” for "
+ Joiner.on(" & ").join(emojis))) {
int debug = 0;
final String prefix = "Duplicate name in " + locale + ": “" + name + "” for ";
final StringBuilder remainder = new StringBuilder();
for (final Pair<String, String> emoji : emojis) {
duplicateNameToEmoji.put(emoji.getFirst(), emoji.getSecond());
if (remainder.length() > 0) { // ampersand after the first item
remainder.append(" & ");
}
remainder.append("“").append(emoji.getSecond()).append("”");
if (!emoji.getFirst().equals(name)) {
// case-insensitive collision, so note that
remainder.append("(≈“" + emoji.getFirst() + "”) ");
}
}
if (duplicateNameToEmoji == null) {
duplicateNameToEmoji = TreeMultimap.create();
}
duplicateNameToEmoji.putAll(name, emojis);
problems.add(prefix + remainder.toString());
}
}
if (isVerbose() && duplicateNameToEmoji != null && !duplicateNameToEmoji.isEmpty()) {
// TODO CLDR-16947: the following will print out in an interleaved way due to threading.
System.out.println("\nCollisions");
for (Entry<String, String> entry : duplicateNameToEmoji.entries()) {
String emoji = entry.getValue();
System.out.println(locale + "\t" + eng.getShortName(emoji) + "\t" + emoji);
}
}
return problems.stream();
}

public void testAnnotationPaths() {
Expand Down Expand Up @@ -703,7 +719,7 @@
fillNamesAndSearchKeywords(
file, namesFound, searchKeywordsFound); // freezes the results

warnln(

Check warning on line 722 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestAnnotations.java

View workflow job for this annotation

GitHub Actions / build

(TestAnnotations.java:722) Warning: FYI, RGI: 3953 namesFound: 4342 searchKeywordsFound: 4322
Joiner.on('\t')
.join(
"FYI, RGI:",
Expand Down
Loading