Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions provider/data/collator/data/collation_reordering_v1.rs.data

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions provider/data/collator/data/collation_tailoring_v1.rs.data

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions provider/data/collator/fingerprints.csv
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ collation/metadata/v1, vi, -> ee
collation/metadata/v1, vi/trad, -> ee
collation/metadata/v1, wo, -> af
collation/metadata/v1, yo, -> af
collation/reordering/v1, <lookup>, 320B, 59 identifiers
collation/reordering/v1, <lookup>, 348B, 60 identifiers
collation/reordering/v1, <total>, 9464B, 8024B, 30 unique payloads
collation/reordering/v1, am, 320B, 272B, 3989bb25c1c894c
collation/reordering/v1, ar, 312B, 264B, 44ac22224ef1784d
Expand Down Expand Up @@ -167,6 +167,7 @@ collation/reordering/v1, ru, -> be
collation/reordering/v1, si, 316B, 268B, 76e6c2377e3f318f
collation/reordering/v1, si/dict, -> si
collation/reordering/v1, sr, -> be
collation/reordering/v1, sr-Cyrl-ME, -> az
collation/reordering/v1, sr-Latn, -> az
collation/reordering/v1, ta, 316B, 268B, b337e265c95fe58e
collation/reordering/v1, te, 316B, 268B, 53be041ee9b02cf9
Expand All @@ -182,7 +183,7 @@ collation/reordering/v1, und-Hant, -> und-Hani/stroke
collation/reordering/v1, ur, -> ar
collation/root/v1, <singleton>, 131040B, 130923B, 787ce37ea65e1e9
collation/special/primaries/v1, <singleton>, 72B, 42B, 46181a77c61fe445
collation/tailoring/v1, <lookup>, 608B, 102 identifiers
collation/tailoring/v1, <lookup>, 618B, 103 identifiers
collation/tailoring/v1, <total>, 920210B, 908458B, 93 unique payloads
collation/tailoring/v1, af, 1004B, 877B, 8e3ca7ba0c0efe4b
collation/tailoring/v1, ar, 8112B, 7987B, f87603c60cb612d4
Expand Down Expand Up @@ -260,6 +261,7 @@ collation/tailoring/v1, sk, 968B, 842B, ab8b5cec368b01e7
collation/tailoring/v1, sl, 1112B, 985B, 3d4815266923b35f
collation/tailoring/v1, sq, 1144B, 1018B, 60fb9f14b8173e05
collation/tailoring/v1, sr, -> bs-Cyrl
collation/tailoring/v1, sr-Cyrl-ME, -> bs
collation/tailoring/v1, sr-Latn, -> bs
collation/tailoring/v1, sv, 1742B, 1616B, 7ec4028488f2d504
collation/tailoring/v1, sv/trad, 1742B, 1616B, 31420d06d80b25bb
Expand Down
59 changes: 51 additions & 8 deletions provider/source/src/collator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ use crate::IterableDataProviderCached;
use crate::SourceDataProvider;
use icu::collator::provider::*;
use icu::collections::codepointtrie::CodePointTrie;
use icu::locale::subtags::{language, script};
use icu::locale::{
locale,
subtags::{language, script},
};
#[cfg(any(feature = "use_wasm", feature = "use_icu4c"))]
use icu_codepointtrie_builder::CodePointTrieBuilder;
use icu_provider::prelude::*;
Expand All @@ -30,16 +33,19 @@ fn id_to_file_name(id: DataIdentifierBorrowed) -> String {
.replace("posix", "POSIX")
};

// und-Hant -> zh_stroke
// und-Hans -> zh_pinyin
// und-Hani/x -> zh_x
// und_Hant -> zh_stroke
// und_Hans -> zh_pinyin
// und_Hani/x -> zh_x
// sr_Cyrl_ME -> sr_Latn

if s == "und_Hant" {
return "zh_stroke".into();
} else if s == "und_Hans" {
return "zh_pinyin".into();
} else if s == "und_Hani" {
s = "zh".into();
} else if s == "sr_Cyrl_ME" {
s = "sr_Latn".into();
}

s.push('_');
Expand All @@ -53,7 +59,7 @@ fn id_to_file_name(id: DataIdentifierBorrowed) -> String {
s
}

fn file_name_to_id(file_name: &str) -> Vec<DataIdentifierCow<'static>> {
fn file_name_to_ids(file_name: &str) -> Vec<DataIdentifierCow<'static>> {
let (mut language, mut variant) = file_name.rsplit_once('_').unwrap();
if language == "root" {
language = "und";
Expand All @@ -72,19 +78,27 @@ fn file_name_to_id(file_name: &str) -> Vec<DataIdentifierCow<'static>> {
// Pinyin is stored in both und-Hans and und-Hani/pinyin
r.push(DataIdentifierCow::from_borrowed_and_owned(
Default::default(),
"und-Hans".parse().unwrap(),
locale!("und-Hans").into(),
));
} else if variant == "stroke" {
// Stroke is stored in both und-Hans and und-Hani/stroke
r.push(DataIdentifierCow::from_borrowed_and_owned(
Default::default(),
"und-Hant".parse().unwrap(),
locale!("und-Hant").into(),
));
}
} else if variant == "standard" {
variant = "";
}

if language == "sr_Latn" {
// sr-Cyrl-ME falls back to sr-ME, which falls back to sr-Latn.
r.push(DataIdentifierCow::from_borrowed_and_owned(
Default::default(),
locale!("sr-Cyrl-ME").into(),
));
}

let marker_attributes = match variant {
"traditional" => DataMarkerAttributes::from_str_or_panic("trad").to_owned(),
"phonebook" => DataMarkerAttributes::from_str_or_panic("phonebk").to_owned(),
Expand All @@ -99,6 +113,35 @@ fn file_name_to_id(file_name: &str) -> Vec<DataIdentifierCow<'static>> {
r
}

#[test]
fn test_all_fallback_overrides_handled() {
let provider = SourceDataProvider::new_testing();
let required_overrides = provider
.cldr()
.unwrap()
.core()
.read_and_parse::<super::cldr_serde::parent_locales::Resource>(
"supplemental/parentLocales.json",
)
.unwrap()
.supplemental
.parent_locales
.collations
.keys()
.collect::<Vec<_>>();

let handled_overrides = [
"sr-Cyrl-ME",
"yue",
"yue-CN",
"yue-Hans",
"yue-Hans-CN",
"yue-Hant",
];

assert_eq!(required_overrides, handled_overrides);
}

impl SourceDataProvider {
fn load_toml<T>(&self, id: DataIdentifierBorrowed, suffix: &str) -> Result<&T, DataError>
where
Expand Down Expand Up @@ -130,7 +173,7 @@ impl SourceDataProvider {
file_name
})
})
.flat_map(|s| file_name_to_id(&s))
.flat_map(|s| file_name_to_ids(&s))
.collect())
}
}
Expand Down
Loading