unicode-org · hsivonen · Dec 4, 2025 · Dec 5, 2025 · Dec 12, 2025 · Dec 16, 2025
@@ -262,9 +262,9 @@ smallvec = { version = "1.10.0", default-features = false }
 stable_deref_trait = { version = "1.2.0", default-features = false }
 twox-hash = { version = "2.0.0", default-features = false, features = ["xxhash64"] }
 unicode-bidi = { version = "0.3.11", default-features = false }
-utf16_iter = { version = "1.0.2", default-features = false }
-utf8_iter = { version = "1.0.2", default-features = false }
-write16 = { version = "1.0.0", default-features = false }
+utf16_iter = { path = "../utf16_iter", default-features = false }
+utf8_iter = { path = "../utf8_iter", default-features = false }
+write16 = { path = "../write16", default-features = false }
 
 ## External Deps Group 2: Heavy Dev and Datagen deps. No default features.
 zip = { version = "2", default-features = false }

@@ -24,8 +24,8 @@ icu_normalizer = { workspace = true, features = ["utf8_iter", "utf16_iter"] }
 icu_locale_core = { workspace = true, features = ["alloc"] }
 icu_properties = { workspace = true }
 icu_provider = { workspace = true }
-utf8_iter = { workspace = true }
-utf16_iter = { workspace = true }
+utf16_iter = { path = "../../../utf16_iter", features = ["icu_collections"] }
+utf8_iter = { path = "../../../utf8_iter", features = ["icu_collections"] }
 smallvec = { workspace = true, features = ["union", "const_generics", "const_new"] } # alloc
 zerovec = { workspace = true }
 

@@ -44,19 +44,32 @@ use crate::provider::CollationSpecialPrimariesValidated;
 use crate::provider::CollationTailoringV1;
 use core::cmp::Ordering;
 use core::convert::{Infallible, TryFrom};
+use icu_collections::codepointtrie::AbstractCodePointTrie;
+use icu_collections::codepointtrie::CharsWithTrieDefaultForAsciiEx;
+#[cfg(feature = "serde")]
+use icu_collections::codepointtrie::CodePointTrie;
+#[cfg(not(feature = "serde"))]
+use icu_collections::codepointtrie::FastCodePointTrie;
+#[cfg(feature = "latin1")]
+use icu_collections::codepointtrie::Latin1CharsWithTrieEx;
+use icu_collections::codepointtrie::WithTrie;
 use icu_normalizer::provider::DecompositionData;
 use icu_normalizer::provider::DecompositionTables;
 use icu_normalizer::provider::NormalizerNfdDataV1;
 use icu_normalizer::provider::NormalizerNfdTablesV1;
-use icu_normalizer::DecomposingNormalizerBorrowed;
-use icu_normalizer::Decomposition;
 use icu_provider::marker::ErasedMarker;
 use icu_provider::prelude::*;
 use smallvec::SmallVec;
-use utf16_iter::Utf16CharsEx;
-use utf8_iter::Utf8CharsEx;
+use utf16_iter::Utf16CharsWithTrieEx;
+use utf8_iter::Utf8CharsWithTrieDefaultForAsciiEx;
 use zerovec::ule::AsULE;
 
+#[cfg(feature = "serde")]
+type NormTrie<'trie> = CodePointTrie<'trie, u32>;
+
+#[cfg(not(feature = "serde"))]
+type NormTrie<'trie> = FastCodePointTrie<'trie, u32>;
+
 // Special sort key bytes for all levels.
 const LEVEL_SEPARATOR_BYTE: u8 = 1;
 
@@ -187,21 +200,6 @@ fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
     i.wrapping_sub(start) <= (end - start)
 }
 
-/// Helper trait for getting a `char` iterator from Latin1 data.
-///
-/// ✨ *Enabled with the `latin1` Cargo feature.*
-#[cfg(feature = "latin1")]
-trait Latin1Chars {
-    fn latin1_chars(&self) -> impl DoubleEndedIterator<Item = char>;
-}
-
-#[cfg(feature = "latin1")]
-impl Latin1Chars for [u8] {
-    fn latin1_chars(&self) -> impl DoubleEndedIterator<Item = char> {
-        self.iter().map(|b| char::from(*b))
-    }
-}
-
 /// Finds the identical prefix of `left` and `right` containing
 /// Latin1.
 ///
@@ -712,10 +710,12 @@ macro_rules! compare {
             if left_tail.is_empty() && right_tail.is_empty() {
                 return Ordering::Equal;
             }
-            let ret = self.compare_impl(left_tail.$left_to_iter(), right_tail.$right_to_iter(), head.$left_to_iter().rev());
+            let norm_trie = self.norm_trie();
+            let ret = self.compare_impl(left_tail.$left_to_iter(norm_trie), right_tail.$right_to_iter(norm_trie), head.$left_to_iter(norm_trie).rev());
             if self.options.strength() == Strength::Identical && ret == Ordering::Equal {
-                return Decomposition::new(left_tail.$left_to_iter(), self.decompositions, self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }).cmp(
-                    Decomposition::new(right_tail.$right_to_iter(), self.decompositions, self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }),
+                // We don't need to remove the leading U+0000, because it compares equal anyway.
+                return icu_normalizer::new_decomposition(left_tail.$left_to_iter(norm_trie), self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }).cmp(
+                    icu_normalizer::new_decomposition(right_tail.$right_to_iter(norm_trie), self.tables).map(|c| if c != MERGE_SEPARATOR { c as i32 } else { -1i32 }),
                 );
             }
             ret
@@ -886,30 +886,35 @@ macro_rules! collation_elements {
             $tailoring,
             jamo,
             &$self.diacritics.secondaries,
-            $self.decompositions,
             $self.tables,
             $numeric_primary,
             $self.lithuanian_dot_above,
         )
     }};
 }
 
-impl CollatorBorrowed<'_> {
+impl<'data> CollatorBorrowed<'data> {
     /// The resolved options showing how the default options, the requested options,
     /// and the options from locale data were combined.
     pub fn resolved_options(&self) -> ResolvedCollatorOptions {
         self.options.into()
     }
 
+    fn norm_trie(&self) -> &'data NormTrie<'data> {
+        #[allow(clippy::useless_conversion)]
+        <&NormTrie<'data>>::try_from(&self.decompositions.trie)
+            .unwrap_or_else(|_| unreachable!("Incompatible data"))
+    }
+
     compare!(
         /// Compare guaranteed well-formed UTF-8 slices.
         ,
         compare,
         str,
         str,
         split_prefix,
-        chars,
-        chars,
+        chars_with_trie_default_for_ascii,
+        chars_with_trie_default_for_ascii,
     );
 
     compare!(
@@ -921,8 +926,8 @@ impl CollatorBorrowed<'_> {
         [u8],
         [u8],
         split_prefix_u8,
-        chars,
-        chars,
+        chars_with_trie_default_for_ascii,
+        chars_with_trie_default_for_ascii,
     );
 
     compare!(
@@ -933,8 +938,8 @@ impl CollatorBorrowed<'_> {
         [u16],
         [u16],
         split_prefix_u16,
-        chars,
-        chars,
+        chars_with_trie,
+        chars_with_trie,
     );
 
     compare!(
@@ -947,8 +952,8 @@ impl CollatorBorrowed<'_> {
         [u8],
         [u8],
         split_prefix_latin1,
-        latin1_chars,
-        latin1_chars,
+        latin1_chars_with_trie,
+        latin1_chars_with_trie,
     );
 
     compare!(
@@ -966,8 +971,8 @@ impl CollatorBorrowed<'_> {
         [u8],
         [u16],
         split_prefix_latin1_utf16,
-        latin1_chars,
-        chars,
+        latin1_chars_with_trie,
+        chars_with_trie,
     );
 
     #[inline(always)]
@@ -1018,16 +1023,18 @@ impl CollatorBorrowed<'_> {
     /// `head_chars` is an iterator _backward_ over the identical
     /// prefix and `left_chars` and `right_chars` are iterators
     /// _forward_ over the parts after the identical prefix.
-    fn compare_impl<
-        L: Iterator<Item = char>,
-        R: Iterator<Item = char>,
-        H: Iterator<Item = char>,
-    >(
-        &self,
+    fn compare_impl<L, R, H, T>(
+        &'data self,
         left_chars: L,
         right_chars: R,
         mut head_chars: H,
-    ) -> Ordering {
+    ) -> Ordering
+    where
+        L: Iterator<Item = (char, u32)> + WithTrie<'data, T, u32> + 'data,
+        R: Iterator<Item = (char, u32)> + WithTrie<'data, T, u32> + 'data,
+        H: Iterator<Item = (char, u32)> + 'data,
+        T: AbstractCodePointTrie<'data, u32> + 'data,
+    {
         // Sadly, it looks like variable CEs and backward second level
         // require us to store the full 64-bit CEs instead of storing only
         // the NonPrimary part.
@@ -1052,6 +1059,7 @@ impl CollatorBorrowed<'_> {
 
         let tailoring = self.tailoring_or_root();
         let numeric_primary = self.numeric_primary();
+
         let mut left = collation_elements!(self, left_chars, tailoring, numeric_primary);
         let mut right = collation_elements!(self, right_chars, tailoring, numeric_primary);
 
@@ -1077,11 +1085,10 @@ impl CollatorBorrowed<'_> {
         // This loop is only broken out of as goto forward.
         #[expect(clippy::never_loop)]
         'prefix: loop {
-            if let Some(mut head_last_c) = head_chars.next() {
-                let norm_trie = &self.decompositions.trie;
+            if let Some((mut head_last_c, head_last_trie_val)) = head_chars.next() {
                 let mut head_last = CharacterAndClassAndTrieValue::new_with_trie_val(
                     head_last_c,
-                    norm_trie.get(head_last_c),
+                    head_last_trie_val,
                 );
                 let mut head_last_ce32 = CollationElement32::default();
                 let mut head_last_ok = false;
@@ -1274,14 +1281,12 @@ impl CollatorBorrowed<'_> {
                     tail_first_ce32 = head_last_ce32;
                     tail_first_ok = head_last_ok;
 
-                    head_last_c = if let Some(head_last_c) = head_chars.next() {
-                        head_last_c
-                    } else {
+                    let Some((head_last_c_new, decomposition)) = head_chars.next() else {
                         // We need to step back beyond the start of the prefix.
                         // Treat as good boundary.
                         break 'prefix;
                     };
-                    let decomposition = norm_trie.get(head_last_c);
+                    head_last_c = head_last_c_new;
                     head_last = CharacterAndClassAndTrieValue::new_with_trie_val(
                         head_last_c,
                         decomposition,
@@ -1852,7 +1857,7 @@ impl CollatorBorrowed<'_> {
         S: CollationKeySink + ?Sized,
         S::State: Default,
     {
-        self.write_sort_key_impl(s.chars(), sink)
+        self.write_sort_key_impl(s.chars_with_trie_default_for_ascii(self.norm_trie()), sink)
     }
 
     /// Given potentially invalid UTF-8, write the sort key bytes up to the collator's strength.
@@ -1863,7 +1868,7 @@ impl CollatorBorrowed<'_> {
         S: CollationKeySink + ?Sized,
         S::State: Default,
     {
-        self.write_sort_key_impl(s.chars(), sink)
+        self.write_sort_key_impl(s.chars_with_trie_default_for_ascii(self.norm_trie()), sink)
     }
 
     /// Given potentially invalid UTF-16, write the sort key bytes up to the collator's strength.
@@ -1874,12 +1879,17 @@ impl CollatorBorrowed<'_> {
         S: CollationKeySink + ?Sized,
         S::State: Default,
     {
-        self.write_sort_key_impl(s.chars(), sink)
+        self.write_sort_key_impl(s.chars_with_trie(self.norm_trie()), sink)
     }
 
-    fn write_sort_key_impl<I, S>(&self, iter: I, sink: &mut S) -> Result<S::Output, S::Error>
+    fn write_sort_key_impl<I, T, S>(
+        &'data self,
+        iter: I,
+        sink: &mut S,
+    ) -> Result<S::Output, S::Error>
     where
-        I: Iterator<Item = char> + Clone,
+        I: Iterator<Item = (char, u32)> + WithTrie<'data, T, u32> + Clone + 'data,
+        T: AbstractCodePointTrie<'data, u32> + 'data,
         S: CollationKeySink + ?Sized,
         S::State: Default,
     {
@@ -1893,11 +1903,10 @@ impl CollatorBorrowed<'_> {
         self.write_sort_key_up_to_quaternary(iter, sink, &mut state)?;
 
         if let Some(iter) = identical {
-            let nfd =
-                DecomposingNormalizerBorrowed::new_with_data(self.decompositions, self.tables);
             sink.write_byte(&mut state, LEVEL_SEPARATOR_BYTE)?;
 
-            let iter = nfd.normalize_iter(iter);
+            let mut iter = icu_normalizer::new_decomposition(iter, self.tables);
+            let _ = iter.next(); // Discard the U+0000.
             write_identical_level(iter, sink, &mut state)?;
         }
 
@@ -1908,14 +1917,15 @@ impl CollatorBorrowed<'_> {
     ///
     /// Optionally write the case level.  Separate levels with the `LEVEL_SEPARATOR_BYTE`, but
     /// do not write a terminating zero as with a C string.
-    fn write_sort_key_up_to_quaternary<I, S>(
-        &self,
+    fn write_sort_key_up_to_quaternary<I, S, T>(
+        &'data self,
         iter: I,
         sink: &mut S,
         state: &mut S::State,
     ) -> Result<(), S::Error>
     where
-        I: Iterator<Item = char>,
+        I: Iterator<Item = (char, u32)> + WithTrie<'data, T, u32> + Clone + 'data,
+        T: AbstractCodePointTrie<'data, u32> + 'data,
         S: CollationKeySink + ?Sized,
     {
         // This algorithm comes from `CollationKeys::writeSortKeyUpToQuaternary` in ICU4C.