Skip to content

Commit a8ef673

Browse files
authored
Provide APIs to get directionality of locale (#3474)
1 parent c010e99 commit a8ef673

File tree

22 files changed

+3387
-1
lines changed

22 files changed

+3387
-1
lines changed

components/icu/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ experimental = [
106106
"icu_casemapping",
107107
"icu_datetime_experimental",
108108
"icu_displaynames",
109+
"icu_locid_transform/experimental",
109110
"icu_relativetime",
110111
"icu_compactdecimal",
111112
]

components/locid_transform/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ std = []
5959
bench = ["serde"]
6060
serde = ["dep:serde", "icu_locid/serde", "tinystr/serde", "zerovec/serde", "icu_provider/serde"]
6161
datagen = ["serde", "dep:databake", "zerovec/databake", "icu_locid/databake", "tinystr/databake"]
62+
experimental = []
6263

6364
[[bench]]
6465
name = "locale_canonicalizer"

components/locid_transform/src/canonicalizer.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,11 @@ impl LocaleCanonicalizer {
249249
/// Creates a [`LocaleCanonicalizer`] with a custom [`LocaleExpander`] object.
250250
///
251251
/// For example, use this constructor if you wish to support all languages.
252+
///
253+
/// [📚 Help choosing a constructor](icu_provider::constructors)
254+
/// <div class="stab unstable">
255+
/// ⚠️ The bounds on this function may change over time, including in SemVer minor releases.
256+
/// </div>
252257
pub fn try_new_with_expander_unstable<P>(
253258
provider: &P,
254259
expander: LocaleExpander,
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
// This file is part of ICU4X. For terms of use, please see the file
2+
// called LICENSE at the top level of the ICU4X source tree
3+
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4+
5+
use crate::provider::*;
6+
use crate::{LocaleExpander, LocaleTransformError};
7+
use icu_locid::subtags::Script;
8+
use icu_locid::Locale;
9+
use icu_provider::prelude::*;
10+
11+
/// Represents the direction of a script.
12+
///
13+
/// [`LocaleDirectionality`] can be used to get this information.
14+
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
15+
#[non_exhaustive]
16+
pub enum Direction {
17+
/// The script is left-to-right.
18+
LeftToRight,
19+
/// The script is right-to-left.
20+
RightToLeft,
21+
}
22+
23+
/// The `LocaleDirectionality` provides methods to determine the direction of a locale based
24+
/// on [`CLDR`] data.
25+
///
26+
/// # Examples
27+
///
28+
/// ```
29+
/// use icu_locid::locale;
30+
/// use icu_locid_transform::{Direction, LocaleDirectionality};
31+
///
32+
/// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
33+
/// .expect("create failed");
34+
///
35+
/// assert_eq!(ld.get(&locale!("en")), Some(Direction::LeftToRight));
36+
/// ```
37+
///
38+
/// [`CLDR`]: http://cldr.unicode.org/
39+
#[derive(Debug)]
40+
pub struct LocaleDirectionality {
41+
script_direction: DataPayload<ScriptDirectionV1Marker>,
42+
expander: LocaleExpander,
43+
}
44+
45+
impl LocaleDirectionality {
46+
/// A constructor which takes a [`DataProvider`] and creates a [`LocaleDirectionality`].
47+
///
48+
/// [📚 Help choosing a constructor](icu_provider::constructors)
49+
/// <div class="stab unstable">
50+
/// ⚠️ The bounds on this function may change over time, including in SemVer minor releases.
51+
/// </div>
52+
pub fn try_new_unstable<P>(provider: &P) -> Result<LocaleDirectionality, LocaleTransformError>
53+
where
54+
P: DataProvider<ScriptDirectionV1Marker>
55+
+ DataProvider<LikelySubtagsForLanguageV1Marker>
56+
+ DataProvider<LikelySubtagsForScriptRegionV1Marker>
57+
+ ?Sized,
58+
{
59+
let expander = LocaleExpander::try_new_unstable(provider)?;
60+
Self::try_new_with_expander_unstable(provider, expander)
61+
}
62+
63+
// Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
64+
#[doc = icu_provider::gen_any_buffer_docs!(ANY, icu_provider, Self::try_new_unstable)]
65+
pub fn try_new_with_any_provider(
66+
provider: &(impl AnyProvider + ?Sized),
67+
) -> Result<LocaleDirectionality, LocaleTransformError> {
68+
let expander = LocaleExpander::try_new_with_any_provider(provider)?;
69+
Self::try_new_with_expander_unstable(&provider.as_downcasting(), expander)
70+
}
71+
72+
// Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice
73+
#[doc = icu_provider::gen_any_buffer_docs!(BUFFER, icu_provider, Self::try_new_unstable)]
74+
#[cfg(feature = "serde")]
75+
pub fn try_new_with_buffer_provider(
76+
provider: &(impl BufferProvider + ?Sized),
77+
) -> Result<LocaleDirectionality, LocaleTransformError> {
78+
let expander = LocaleExpander::try_new_with_buffer_provider(provider)?;
79+
Self::try_new_with_expander_unstable(&provider.as_deserializing(), expander)
80+
}
81+
82+
/// Creates a [`LocaleDirectionality`] with a custom [`LocaleExpander`] object.
83+
///
84+
/// For example, use this constructor if you wish to support all languages.
85+
///
86+
/// [📚 Help choosing a constructor](icu_provider::constructors)
87+
/// <div class="stab unstable">
88+
/// ⚠️ The bounds on this function may change over time, including in SemVer minor releases.
89+
/// </div>
90+
///
91+
/// # Examples
92+
///
93+
/// ```
94+
/// use icu_locid::locale;
95+
/// use icu_locid_transform::{Direction, LocaleDirectionality, LocaleExpander};
96+
///
97+
/// let ld_default = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
98+
/// .expect("create failed");
99+
///
100+
/// assert_eq!(ld_default.get(&locale!("jbn")), None);
101+
///
102+
/// let expander = LocaleExpander::try_new_extended_unstable(&icu_testdata::unstable())
103+
/// .expect("create failed");
104+
/// let ld_extended = LocaleDirectionality::try_new_with_expander_unstable(
105+
/// &icu_testdata::unstable(),
106+
/// expander,
107+
/// ).expect("create failed");
108+
///
109+
/// assert_eq!(ld_extended.get(&locale!("jbn")), Some(Direction::RightToLeft));
110+
/// ```
111+
pub fn try_new_with_expander_unstable<P>(
112+
provider: &P,
113+
expander: LocaleExpander,
114+
) -> Result<LocaleDirectionality, LocaleTransformError>
115+
where
116+
P: DataProvider<ScriptDirectionV1Marker> + ?Sized,
117+
{
118+
let script_direction = provider.load(Default::default())?.take_payload()?;
119+
120+
Ok(LocaleDirectionality {
121+
script_direction,
122+
expander,
123+
})
124+
}
125+
126+
/// Returns the script direction of the given locale.
127+
///
128+
/// Note that the direction is a property of the script of a locale, not of the language. As such,
129+
/// when given a locale without an associated script tag (i.e., `locale!("en")` vs. `locale!("en-Latn")`),
130+
/// this method first tries to infer the script using the language and region before returning its direction.
131+
///
132+
/// If you already have a script struct and want to get its direction, you should use
133+
/// `Locale::from(Some(my_script))` and call this method.
134+
///
135+
/// # Examples
136+
///
137+
/// Using an existing locale:
138+
///
139+
/// ```
140+
/// use icu_locid::locale;
141+
/// use icu_locid_transform::{Direction, LocaleDirectionality};
142+
///
143+
/// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
144+
/// .expect("create failed");
145+
///
146+
/// assert_eq!(ld.get(&locale!("en-US")), Some(Direction::LeftToRight));
147+
///
148+
/// assert_eq!(ld.get(&locale!("ar")), Some(Direction::RightToLeft));
149+
///
150+
/// assert_eq!(ld.get(&locale!("foo")), None);
151+
/// ```
152+
///
153+
/// Using a script directly:
154+
///
155+
/// ```
156+
/// use icu_locid::subtags_script as script;
157+
/// use icu_locid::Locale;
158+
/// use icu_locid_transform::{Direction, LocaleDirectionality};
159+
///
160+
/// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable())
161+
/// .expect("create failed");
162+
///
163+
/// assert_eq!(ld.get(&Locale::from(Some(script!("Latn")))), Some(Direction::LeftToRight));
164+
/// ```
165+
pub fn get(&self, locale: &Locale) -> Option<Direction> {
166+
let script = self.expander.get_likely_script(&locale.id)?;
167+
168+
if self.script_in_ltr(script) {
169+
Some(Direction::LeftToRight)
170+
} else if self.script_in_rtl(script) {
171+
Some(Direction::RightToLeft)
172+
} else {
173+
None
174+
}
175+
}
176+
177+
/// Returns true if the given locale is right-to-left.
178+
///
179+
/// Note that if this method returns `false`, it does not mean that the locale is left-to-right.
180+
/// You should use `LocaleDirectionality::get` if you need to differentiate between these cases.
181+
///
182+
/// See [`LocaleDirectionality::get`] for more information.
183+
pub fn is_right_to_left(&self, locale: &Locale) -> bool {
184+
self.expander
185+
.get_likely_script(&locale.id)
186+
.map(|s| self.script_in_rtl(s))
187+
.unwrap_or(false)
188+
}
189+
190+
/// Returns true if the given locale is left-to-right.
191+
///
192+
/// Note that if this method returns `false`, it does not mean that the locale is right-to-left.
193+
/// You should use `LocaleDirectionality::get` if you need to differentiate between these cases.
194+
///
195+
/// See [`LocaleDirectionality::get`] for more information.
196+
pub fn is_left_to_right(&self, locale: &Locale) -> bool {
197+
self.expander
198+
.get_likely_script(&locale.id)
199+
.map(|s| self.script_in_ltr(s))
200+
.unwrap_or(false)
201+
}
202+
203+
fn script_in_rtl(&self, script: Script) -> bool {
204+
self.script_direction
205+
.get()
206+
.rtl
207+
.binary_search(&script.into_tinystr().to_unvalidated())
208+
.is_ok()
209+
}
210+
211+
fn script_in_ltr(&self, script: Script) -> bool {
212+
self.script_direction
213+
.get()
214+
.ltr
215+
.binary_search(&script.into_tinystr().to_unvalidated())
216+
.is_ok()
217+
}
218+
}

components/locid_transform/src/expander.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,51 @@ impl LocaleExpander {
515515
TransformResult::Unmodified
516516
}
517517
}
518+
519+
// TODO(3492): consider turning this and a future get_likely_region/get_likely_language public
520+
#[allow(dead_code)]
521+
#[inline]
522+
pub(crate) fn get_likely_script<T: AsRef<LanguageIdentifier>>(
523+
&self,
524+
langid: T,
525+
) -> Option<Script> {
526+
let langid = langid.as_ref();
527+
langid
528+
.script
529+
.or_else(|| self.infer_likely_script(langid.language, langid.region))
530+
}
531+
532+
fn infer_likely_script(&self, language: Language, region: Option<Region>) -> Option<Script> {
533+
let data = self.as_borrowed();
534+
535+
// proceed through _all possible cases_ in order of specificity
536+
// (borrowed from LocaleExpander::maximize):
537+
// 1. language + region
538+
// 2. language
539+
// 3. region
540+
// we need to check all cases, because e.g. for "en-US" the default script is associated
541+
// with "en" but not "en-US"
542+
if language != Language::UND {
543+
if let Some(region) = region {
544+
// 1. we know both language and region
545+
if let Some(script) = data.get_lr(language, region) {
546+
return Some(script);
547+
}
548+
}
549+
// 2. we know language, but we either do not know region or knowing region did not help
550+
if let Some((script, _)) = data.get_l(language) {
551+
return Some(script);
552+
}
553+
}
554+
if let Some(region) = region {
555+
// 3. we know region, but we either do not know language or knowing language did not help
556+
if let Some((_, script)) = data.get_r(region) {
557+
return Some(script);
558+
}
559+
}
560+
// we could not figure out the script from the given locale
561+
None
562+
}
518563
}
519564

520565
#[cfg(feature = "serde")]

components/locid_transform/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,15 @@
9292
extern crate alloc;
9393

9494
mod canonicalizer;
95+
#[cfg(feature = "experimental")]
96+
mod directionality;
9597
mod error;
9698
mod expander;
9799
pub mod provider;
98100

99101
pub use canonicalizer::LocaleCanonicalizer;
102+
#[cfg(feature = "experimental")]
103+
pub use directionality::{Direction, LocaleDirectionality};
100104
pub use error::LocaleTransformError;
101105
pub use expander::LocaleExpander;
102106

components/locid_transform/src/provider.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use alloc::borrow::Cow;
1919
use icu_locid::subtags::{Language, Region, Script, Variant};
2020
use icu_provider::prelude::*;
2121
use tinystr::{TinyAsciiStr, UnvalidatedTinyAsciiStr};
22-
use zerovec::{VarZeroVec, ZeroMap, ZeroSlice};
22+
use zerovec::{VarZeroVec, ZeroMap, ZeroSlice, ZeroVec};
2323

2424
// We use raw TinyAsciiStrs for map keys, as we then don't have to
2525
// validate them as subtags on deserialization. Map lookup can be
@@ -135,6 +135,31 @@ pub struct AliasesV1<'data> {
135135
pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>,
136136
}
137137

138+
#[icu_provider::data_struct(ScriptDirectionV1Marker = "locid_transform/script_dir@1")]
139+
#[derive(Debug, PartialEq, Clone)]
140+
#[cfg_attr(
141+
feature = "datagen",
142+
derive(serde::Serialize, databake::Bake),
143+
databake(path = icu_locid_transform::provider),
144+
)]
145+
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
146+
/// This directionality data is used to determine the script directionality of a locale.
147+
///
148+
/// <div class="stab unstable">
149+
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
150+
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
151+
/// to be stable, their Rust representation might not be. Use with caution.
152+
/// </div>
153+
#[yoke(prove_covariance_manually)]
154+
pub struct ScriptDirectionV1<'data> {
155+
/// Scripts in right-to-left direction.
156+
#[cfg_attr(feature = "serde", serde(borrow))]
157+
pub rtl: ZeroVec<'data, UnvalidatedScript>,
158+
/// Scripts in left-to-right direction.
159+
#[cfg_attr(feature = "serde", serde(borrow))]
160+
pub ltr: ZeroVec<'data, UnvalidatedScript>,
161+
}
162+
138163
#[icu_provider::data_struct(LikelySubtagsV1Marker = "locid_transform/likelysubtags@1")]
139164
#[derive(Debug, PartialEq, Clone)]
140165
#[cfg_attr(

ffi/diplomat/tests/missing_apis.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@
1414
# Please check in with @Manishearth, @robertbastian, or @sffc if you have questions
1515

1616

17+
icu::locid_transform::Direction#Enum
18+
icu::locid_transform::LocaleDirectionality#Struct
19+
icu::locid_transform::LocaleDirectionality::get#FnInStruct
20+
icu::locid_transform::LocaleDirectionality::is_left_to_right#FnInStruct
21+
icu::locid_transform::LocaleDirectionality::is_right_to_left#FnInStruct
22+
icu::locid_transform::LocaleDirectionality::try_new_unstable#FnInStruct
23+
icu::locid_transform::LocaleDirectionality::try_new_with_expander_unstable#FnInStruct
1724
icu::plurals::PluralRules::try_new#FnInStruct
1825
icu::plurals::PluralRules::try_new_cardinal#FnInStruct
1926
icu::plurals::PluralRules::try_new_ordinal#FnInStruct

provider/datagen/src/registry.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ registry!(
281281
RegionDisplayNamesV1Marker,
282282
LanguageDisplayNamesV1Marker,
283283
LocaleDisplayNamesV1Marker,
284+
ScriptDirectionV1Marker,
284285
ScriptDisplayNamesV1Marker,
285286
VariantDisplayNamesV1Marker,
286287
LongSecondRelativeTimeFormatDataV1Marker,

0 commit comments

Comments
 (0)