Skip to content

Commit 82918ac

Browse files
committed
Build CodePointTries in datagen
1 parent 1fbe90c commit 82918ac

File tree

14 files changed

+149
-81
lines changed

14 files changed

+149
-81
lines changed

components/collections/codepointtrie_builder/src/common.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// called LICENSE at the top level of the ICU4X source tree
33
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
44

5-
use icu_collections::codepointtrie::TrieType;
5+
use icu_collections::codepointtrie::{TrieType, TrieValue};
6+
7+
use crate::{CodePointTrieBuilder, CodePointTrieBuilderData};
68

79
/// Returns the type and width arguments for `umutablecptrie_buildImmutable`
810
pub(crate) fn args_for_build_immutable<U>(trie_type: TrieType) -> (u32, u32) {
@@ -18,3 +20,37 @@ pub(crate) fn args_for_build_immutable<U>(trie_type: TrieType) -> (u32, u32) {
1820
};
1921
(trie_type, width)
2022
}
23+
24+
impl<T> CodePointTrieBuilder<'_, T>
25+
where
26+
T: TrieValue,
27+
{
28+
pub(crate) fn for_each_code_point(&self, mut f: impl FnMut((u32, T))) {
29+
match self.data {
30+
CodePointTrieBuilderData::ValuesByCodePoint(values) => {
31+
for (cp, &value) in values.iter().enumerate() {
32+
if value != self.default_value {
33+
f((cp as u32, value))
34+
}
35+
}
36+
}
37+
CodePointTrieBuilderData::ByCodePoint(ref v) => {
38+
for cp in 0..=(char::MAX as u32) {
39+
let Some(value) = v(cp) else {
40+
continue;
41+
};
42+
if value != self.default_value {
43+
f((cp, value))
44+
}
45+
}
46+
}
47+
CodePointTrieBuilderData::Map(ref map) => {
48+
for (&cp, &value) in map {
49+
if value != self.default_value {
50+
f((cp, value))
51+
}
52+
}
53+
}
54+
}
55+
}
56+
}

components/collections/codepointtrie_builder/src/lib.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@
8585
)
8686
)]
8787

88+
use std::collections::HashMap;
89+
use std::panic::RefUnwindSafe;
90+
use std::panic::UnwindSafe;
91+
8892
use icu_collections::codepointtrie::TrieType;
8993
use icu_collections::codepointtrie::TrieValue;
9094

@@ -103,13 +107,30 @@ mod native;
103107
///
104108
/// [`CodePointTrie`]: icu_collections::codepointtrie::CodePointTrie
105109
#[non_exhaustive]
106-
#[derive(Debug)]
107110
pub enum CodePointTrieBuilderData<'a, T> {
108111
/// A list of values for each code point, starting from code point 0.
109112
///
110113
/// For example, the value for U+0020 (space) should be at index 32 in the slice.
111114
/// Index 0 sets the value for the U+0000 (NUL).
112115
ValuesByCodePoint(&'a [T]),
116+
/// A closure that returns a value for a code point.
117+
///
118+
/// This is called for every code point.
119+
ByCodePoint(Box<dyn Fn(u32) -> Option<T> + Send + Sync + UnwindSafe + RefUnwindSafe + 'a>),
120+
/// A map from code points to values.
121+
Map(HashMap<u32, T>),
122+
}
123+
124+
impl<'a, T: std::fmt::Debug> std::fmt::Debug for CodePointTrieBuilderData<'a, T> {
125+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126+
match self {
127+
Self::ValuesByCodePoint(arg0) => {
128+
f.debug_tuple("ValuesByCodePoint").field(arg0).finish()
129+
}
130+
Self::ByCodePoint(_) => f.debug_tuple("ByCodePoint").finish(),
131+
Self::Map(arg0) => f.debug_tuple("Map").field(arg0).finish(),
132+
}
133+
}
113134
}
114135

115136
/// Settings for building a [`CodePointTrie`].

components/collections/codepointtrie_builder/src/native.rs

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
44

55
use crate::CodePointTrieBuilder;
6-
use crate::CodePointTrieBuilderData;
76

87
use icu_collections::codepointtrie::TrieType;
98
use icu_collections::codepointtrie::TrieValue;
@@ -94,21 +93,16 @@ where
9493
panic!("cpt builder returned error code {error}");
9594
}
9695

97-
let CodePointTrieBuilderData::ValuesByCodePoint(values) = cpt_builder.data;
98-
99-
for (cp, value) in values.iter().enumerate() {
100-
let value = value.to_u32();
101-
if value != cpt_builder.default_value.to_u32() {
102-
unsafe {
103-
// safety: builder is a valid UMutableCPTrie
104-
// safety: we're passing a valid error pointer
105-
umutablecptrie_set(builder, cp as u32, value, &mut error);
106-
}
107-
if error != 0 {
108-
panic!("cpt builder returned error code {error}");
109-
}
96+
cpt_builder.for_each_code_point(|(cp, value)| {
97+
unsafe {
98+
// safety: builder is a valid UMutableCPTrie
99+
// safety: we're passing a valid error pointer
100+
umutablecptrie_set(builder, cp, value.to_u32(), &mut error);
110101
}
111-
}
102+
if error != 0 {
103+
panic!("cpt builder returned error code {error}");
104+
}
105+
});
112106

113107
let (trie_type, width) =
114108
crate::common::args_for_build_immutable::<T::ULE>(cpt_builder.trie_type);

components/collections/codepointtrie_builder/src/wasm.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
44

55
use crate::CodePointTrieBuilder;
6-
use crate::CodePointTrieBuilderData;
76
use icu_collections::codepointtrie::CodePointTrie;
87
use icu_collections::codepointtrie::CodePointTrieHeader;
98
use icu_collections::codepointtrie::TrieValue;
@@ -229,13 +228,9 @@ where
229228
&error_code_ptr,
230229
);
231230

232-
let CodePointTrieBuilderData::ValuesByCodePoint(values) = builder.data;
233-
for (cp, value) in values.iter().enumerate() {
234-
let num = value.to_u32();
235-
if num != builder.default_value.to_u32() {
236-
wasm.umutablecptrie_set(&trie_ptr, cp as u32, num, &error_code_ptr);
237-
}
238-
}
231+
builder.for_each_code_point(|(cp, value)| {
232+
wasm.umutablecptrie_set(&trie_ptr, cp, value.to_u32(), &error_code_ptr);
233+
});
239234

240235
let (trie_type, width) = crate::common::args_for_build_immutable::<T::ULE>(builder.trie_type);
241236

provider/data/properties/data/property_enum_bidi_class_v1.rs.data

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/data/properties/data/property_enum_script_v1.rs.data

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/data/properties/fingerprints.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ property/binary/white/space/v1, <singleton>, 92B, 62B, 8f045d65fbaa6724
6868
property/binary/xdigit/v1, <singleton>, 488B, 459B, 334cc78d29ee5f0e
6969
property/binary/xid/continue/v1, <singleton>, 4868B, 4839B, 386b045eb6522e2f
7070
property/binary/xid/start/v1, <singleton>, 4178B, 4149B, 5090f2f1b95a3480
71-
property/enum/bidi/class/v1, <singleton>, 9884B, 9828B, 327160efebade591
71+
property/enum/bidi/class/v1, <singleton>, 9888B, 9832B, 6e461ef365a53dcc
7272
property/enum/bidi/mirroring/glyph/v1, <singleton>, 4739B, 4673B, 34ba0898d34a25fb
7373
property/enum/canonical/combining/class/v1, <singleton>, 5452B, 5394B, 6c5392d6fc5f2889
7474
property/enum/east/asian/width/v1, <singleton>, 5004B, 4947B, 881a7ee1402b1937
@@ -81,7 +81,7 @@ property/enum/joining/group/v1, <singleton>, 1428B, 1370B, a24c126ff86065d8
8181
property/enum/joining/type/v1, <singleton>, 7136B, 7079B, 39f05bd0b9931b55
8282
property/enum/line/break/v1, <singleton>, 15328B, 15272B, d392409dfdd00a74
8383
property/enum/numeric/type/v1, <singleton>, 5900B, 5842B, 9ffe79ebbb54f863
84-
property/enum/script/v1, <singleton>, 25902B, 25847B, 1b972fe75ad7b369
84+
property/enum/script/v1, <singleton>, 25900B, 25845B, f1ad61c965485250
8585
property/enum/sentence/break/v1, <singleton>, 13964B, 13907B, 36e6dafc005907f8
8686
property/enum/vertical/orientation/v1, <singleton>, 3280B, 3222B, 36f945b6b4d90bc8
8787
property/enum/word/break/v1, <singleton>, 11136B, 11079B, ae4831ff49f66415

provider/data/properties/stubdata/property_enum_bidi_class_v1.rs.data

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/data/properties/stubdata/property_enum_script_v1.rs.data

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/source/data/debug/property/enum/bidi/class/v1.json

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)