Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion components/collections/codepointtrie_builder/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_collections::codepointtrie::TrieType;
use icu_collections::codepointtrie::{TrieType, TrieValue};

use crate::{CodePointTrieBuilder, CodePointTrieBuilderData};

/// Returns the type and width arguments for `umutablecptrie_buildImmutable`
pub(crate) fn args_for_build_immutable<U>(trie_type: TrieType) -> (u32, u32) {
Expand All @@ -18,3 +20,37 @@ pub(crate) fn args_for_build_immutable<U>(trie_type: TrieType) -> (u32, u32) {
};
(trie_type, width)
}

impl<T> CodePointTrieBuilder<'_, T>
where
T: TrieValue,
{
pub(crate) fn for_each_code_point(&self, mut f: impl FnMut((u32, T))) {
match self.data {
CodePointTrieBuilderData::ValuesByCodePoint(values) => {
for (cp, &value) in values.iter().enumerate() {
if value != self.default_value {
f((cp as u32, value))
}
}
}
CodePointTrieBuilderData::ByCodePoint(ref v) => {
for cp in 0..=(char::MAX as u32) {
let Some(value) = v(cp) else {
continue;
};
if value != self.default_value {
f((cp, value))
}
}
}
CodePointTrieBuilderData::Map(ref map) => {
for (&cp, &value) in map {
if value != self.default_value {
f((cp, value))
}
}
}
}
}
}
23 changes: 22 additions & 1 deletion components/collections/codepointtrie_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
)
)]

use std::collections::HashMap;
use std::panic::RefUnwindSafe;
use std::panic::UnwindSafe;

use icu_collections::codepointtrie::TrieType;
use icu_collections::codepointtrie::TrieValue;

Expand All @@ -103,13 +107,30 @@ mod native;
///
/// [`CodePointTrie`]: icu_collections::codepointtrie::CodePointTrie
#[non_exhaustive]
#[derive(Debug)]
pub enum CodePointTrieBuilderData<'a, T> {
/// A list of values for each code point, starting from code point 0.
///
/// For example, the value for U+0020 (space) should be at index 32 in the slice.
/// Index 0 sets the value for the U+0000 (NUL).
ValuesByCodePoint(&'a [T]),
/// A closure that returns a value for a code point.
///
/// This is called for every code point.
ByCodePoint(Box<dyn Fn(u32) -> Option<T> + Send + Sync + UnwindSafe + RefUnwindSafe + 'a>),
/// A map from code points to values.
Map(HashMap<u32, T>),
}

impl<'a, T: std::fmt::Debug> std::fmt::Debug for CodePointTrieBuilderData<'a, T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ValuesByCodePoint(arg0) => {
f.debug_tuple("ValuesByCodePoint").field(arg0).finish()
}
Self::ByCodePoint(_) => f.debug_tuple("ByCodePoint").finish(),
Self::Map(arg0) => f.debug_tuple("Map").field(arg0).finish(),
}
}
}

/// Settings for building a [`CodePointTrie`].
Expand Down
24 changes: 9 additions & 15 deletions components/collections/codepointtrie_builder/src/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::CodePointTrieBuilder;
use crate::CodePointTrieBuilderData;

use icu_collections::codepointtrie::TrieType;
use icu_collections::codepointtrie::TrieValue;
Expand Down Expand Up @@ -94,21 +93,16 @@ where
panic!("cpt builder returned error code {error}");
}

let CodePointTrieBuilderData::ValuesByCodePoint(values) = cpt_builder.data;

for (cp, value) in values.iter().enumerate() {
let value = value.to_u32();
if value != cpt_builder.default_value.to_u32() {
unsafe {
// safety: builder is a valid UMutableCPTrie
// safety: we're passing a valid error pointer
umutablecptrie_set(builder, cp as u32, value, &mut error);
}
if error != 0 {
panic!("cpt builder returned error code {error}");
}
cpt_builder.for_each_code_point(|(cp, value)| {
unsafe {
// safety: builder is a valid UMutableCPTrie
// safety: we're passing a valid error pointer
umutablecptrie_set(builder, cp, value.to_u32(), &mut error);
}
}
if error != 0 {
panic!("cpt builder returned error code {error}");
}
});

let (trie_type, width) =
crate::common::args_for_build_immutable::<T::ULE>(cpt_builder.trie_type);
Expand Down
11 changes: 3 additions & 8 deletions components/collections/codepointtrie_builder/src/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::CodePointTrieBuilder;
use crate::CodePointTrieBuilderData;
use icu_collections::codepointtrie::CodePointTrie;
use icu_collections::codepointtrie::CodePointTrieHeader;
use icu_collections::codepointtrie::TrieValue;
Expand Down Expand Up @@ -229,13 +228,9 @@ where
&error_code_ptr,
);

let CodePointTrieBuilderData::ValuesByCodePoint(values) = builder.data;
for (cp, value) in values.iter().enumerate() {
let num = value.to_u32();
if num != builder.default_value.to_u32() {
wasm.umutablecptrie_set(&trie_ptr, cp as u32, num, &error_code_ptr);
}
}
builder.for_each_code_point(|(cp, value)| {
wasm.umutablecptrie_set(&trie_ptr, cp, value.to_u32(), &error_code_ptr);
});

let (trie_type, width) = crate::common::args_for_build_immutable::<T::ULE>(builder.trie_type);

Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions provider/data/properties/fingerprints.csv
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ property/binary/white/space/v1, <singleton>, 92B, 62B, 8f045d65fbaa6724
property/binary/xdigit/v1, <singleton>, 488B, 459B, 334cc78d29ee5f0e
property/binary/xid/continue/v1, <singleton>, 4868B, 4839B, 386b045eb6522e2f
property/binary/xid/start/v1, <singleton>, 4178B, 4149B, 5090f2f1b95a3480
property/enum/bidi/class/v1, <singleton>, 9884B, 9828B, 327160efebade591
property/enum/bidi/class/v1, <singleton>, 9888B, 9832B, 6e461ef365a53dcc
property/enum/bidi/mirroring/glyph/v1, <singleton>, 4739B, 4673B, 34ba0898d34a25fb
property/enum/canonical/combining/class/v1, <singleton>, 5452B, 5394B, 6c5392d6fc5f2889
property/enum/east/asian/width/v1, <singleton>, 5004B, 4947B, 881a7ee1402b1937
Expand All @@ -81,7 +81,7 @@ property/enum/joining/group/v1, <singleton>, 1428B, 1370B, a24c126ff86065d8
property/enum/joining/type/v1, <singleton>, 7136B, 7079B, 39f05bd0b9931b55
property/enum/line/break/v1, <singleton>, 15328B, 15272B, d392409dfdd00a74
property/enum/numeric/type/v1, <singleton>, 5900B, 5842B, 9ffe79ebbb54f863
property/enum/script/v1, <singleton>, 25902B, 25847B, 1b972fe75ad7b369
property/enum/script/v1, <singleton>, 25900B, 25845B, f1ad61c965485250
property/enum/sentence/break/v1, <singleton>, 13964B, 13907B, 36e6dafc005907f8
property/enum/vertical/orientation/v1, <singleton>, 3280B, 3222B, 36f945b6b4d90bc8
property/enum/word/break/v1, <singleton>, 11136B, 11079B, ae4831ff49f66415
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions provider/source/data/debug/property/enum/bidi/class/v1.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions provider/source/data/debug/property/enum/script/v1.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 40 additions & 32 deletions provider/source/src/properties/bidi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,41 +40,49 @@ impl DataProvider<PropertyEnumBidiMirroringGlyphV1> for SourceDataProvider {
let bpt_trie = bpt.build_codepointtrie::<u16>()?;
let bpt_lookup = bpt.values_to_names_long();

let trie_vals = (0..=(char::MAX as u32)).map(|cp| {
let mut r = BidiMirroringGlyph::default();
if !bidi_m_cpinvlist.contains32(cp) {
return r;
}
r.mirrored = true;
r.mirroring_glyph = Some(bmg_trie.get32(cp)).filter(|&cp| cp as u32 != 0);
r.paired_bracket_type = match bpt_lookup[&(bpt_trie.get32(cp))] {
"Open" => BidiPairedBracketType::Open,
"Close" => BidiPairedBracketType::Close,
_ => BidiPairedBracketType::None,
};
if r.mirrored && r.mirroring_glyph.is_none() {
log::trace!(
"Missing mirroring glyph: U+{cp:X}: {}",
char::try_from_u32(cp).unwrap()
);
}
r
});
let code_point_trie = CodePointTrieBuilder {
data: CodePointTrieBuilderData::ByCodePoint(Box::new(|cp| {
if !bidi_m_cpinvlist.contains32(cp) {
return None;
}
Some(BidiMirroringGlyph {
mirrored: true,
mirroring_glyph: {
let m = bmg_trie.get32(cp);
if m as u32 == 0 {
log::trace!(
"Missing mirroring glyph: U+{cp:X}: {}",
char::try_from_u32(cp).unwrap()
);
None
} else {
Some(m)
}
},
paired_bracket_type: match bpt_lookup.get(&(bpt_trie.get32(cp))).copied() {
Some("Open") => BidiPairedBracketType::Open,
Some("Close") => BidiPairedBracketType::Close,
Some("None") => BidiPairedBracketType::None,
_ => {
log::trace!(
"Missing paired-bracket-type: U+{cp:X}: {}",
char::try_from_u32(cp).unwrap()
);
BidiPairedBracketType::None
}
},
})
})),
default_value: BidiMirroringGlyph::default(),
error_value: BidiMirroringGlyph::default(),
trie_type: TrieType::Small,
}
.build();

Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(
icu::properties::provider::PropertyCodePointMap::CodePointTrie(
CodePointTrieBuilder {
data: CodePointTrieBuilderData::ValuesByCodePoint(
&trie_vals.collect::<Vec<_>>(),
),
default_value: BidiMirroringGlyph::default(),
error_value: BidiMirroringGlyph::default(),
trie_type: TrieType::Small,
}
.build(),
),
icu::properties::provider::PropertyCodePointMap::CodePointTrie(code_point_trie),
),
})
}
Expand All @@ -86,7 +94,7 @@ impl DataProvider<PropertyEnumBidiMirroringGlyphV1> for SourceDataProvider {
) -> Result<DataResponse<PropertyEnumBidiMirroringGlyphV1>, DataError> {
self.check_req::<PropertyEnumBidiMirroringGlyphV1>(req)?;
return Err(DataError::custom(
"icu_provider_source must be built with use_icu4c or use_wasm to build Bidi auxiliary properties data",
"icu_provider_source must be built with `use_icu4c` or `use_wasm` to enumerated properties data",
));
}
}
Expand Down
34 changes: 23 additions & 11 deletions provider/source/src/properties/enum_codepointtrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use icu::properties::provider::{names::*, *};
use icu_provider::prelude::*;
use std::collections::BTreeMap;
use std::collections::HashSet;
use std::convert::TryFrom;
use zerotrie::ZeroTrieSimpleAscii;
use zerovec::ule::NichedOption;

Expand Down Expand Up @@ -67,21 +66,34 @@ impl SourceDataProvider {
}

impl super::uprops_serde::enumerated::EnumeratedPropertyMap {
pub(crate) fn build_codepointtrie<T: TrieValue>(
pub(crate) fn build_codepointtrie<T: TrieValue + Copy>(
&self,
) -> Result<CodePointTrie<'static, T>, DataError> {
let code_point_trie = CodePointTrie::try_from(&self.code_point_trie)
.map_err(|e| DataError::custom("CPT").with_display_context(&e))?;
let values = self.ranges.iter().flat_map(|r| {
let v = T::try_from_u32(r.v as u32).ok().unwrap();
(r.a..=r.b).map(move |x| (x, v))
});
#[cfg(not(any(feature = "use_wasm", feature = "use_icu4c")))]
{
drop(values);
return Err(DataError::custom(
"icu_provider_source must be built with `use_icu4c` or `use_wasm` to enumerated properties data",
));
}

for (cpt_range, raw_range) in code_point_trie.iter_ranges().zip(&self.ranges) {
if (cpt_range.range, TrieValue::to_u32(cpt_range.value))
!= (raw_range.a..=raw_range.b, raw_range.v as u32)
{
return Err(DataError::custom("precomputed CPT doesn't match ranges"));
#[cfg(any(feature = "use_wasm", feature = "use_icu4c"))]
{
use icu::collections::codepointtrie::TrieType;
use icu_codepointtrie_builder::{CodePointTrieBuilder, CodePointTrieBuilderData};

Ok(CodePointTrieBuilder {
data: CodePointTrieBuilderData::Map(values.collect()),
default_value: T::try_from_u32(0).ok().unwrap(),
error_value: T::try_from_u32(0).ok().unwrap(),
trie_type: TrieType::Small,
}
.build())
}

Ok(code_point_trie)
}

pub(crate) fn names_to_values(&self) -> BTreeMap<&str, u16> {
Expand Down
1 change: 0 additions & 1 deletion provider/source/src/properties/uprops_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ pub(crate) mod enumerated {
#[serde(default)]
pub(crate) values: Vec<super::PropertyValue<u16>>,
pub(crate) ranges: Vec<EnumeratedPropertyMapRange<u16>>,
pub(crate) code_point_trie: super::CodePointTrieToml,
}

#[derive(serde::Deserialize)]
Expand Down
Loading