@@ -588,20 +588,22 @@ impl CollationMetadata {
588588/// to be stable, their Rust representation might not be. Use with caution.
589589/// </div>
590590#[ derive( Debug , PartialEq , Clone , yoke:: Yokeable , zerofrom:: ZeroFrom ) ]
591- #[ cfg_attr( feature = "datagen" , derive( serde :: Serialize , databake:: Bake ) ) ]
591+ #[ cfg_attr( feature = "datagen" , derive( databake:: Bake ) ) ]
592592#[ cfg_attr( feature = "datagen" , databake( path = icu_collator:: provider) ) ]
593593pub struct CollationSpecialPrimaries < ' data > {
594594 /// The primaries corresponding to `MaxVariable`
595595 /// character classes packed so that each fits in
596596 /// 16 bits. Length must match the number of enum
597597 /// variants in `MaxVariable`, currently 4.
598- ///
599- /// This is potentially followed by 256 bits
600- /// (packed in 16 u16s) to classify every possible
601- /// byte into compressible or non-compressible.
602598 pub last_primaries : ZeroVec < ' data , u16 > ,
603599 /// The high 8 bits of the numeric primary
604600 pub numeric_primary : u8 ,
601+ /// 256 bits (packed in 16 u16s) to classify every possible
602+ /// byte into compressible or non-compressible.
603+ ///
604+ /// In the serde encoding, this is appended to `last_primaries`,
605+ /// or might be missing.
606+ pub compressible_bytes : ZeroVec < ' data , u16 > ,
605607}
606608
607609#[ cfg( feature = "serde" ) ]
@@ -613,126 +615,87 @@ impl<'de> serde::Deserialize<'de> for CollationSpecialPrimaries<'de> {
613615 #[ derive( serde:: Deserialize ) ]
614616 struct Raw < ' data > {
615617 #[ cfg_attr( feature = "serde" , serde( borrow) ) ]
616- last_primaries : ZeroVec < ' data , u16 > ,
618+ concatenated : & ' data ZeroSlice < u16 > ,
617619 numeric_primary : u8 ,
618620 }
619621
620622 let Raw {
621- last_primaries ,
623+ concatenated ,
622624 numeric_primary,
623625 } = Raw :: deserialize ( deserializer) ?;
624626
625- // `variant_count` isn't stable yet:
626- // https://github.com/rust-lang/rust/issues/73662
627- if last_primaries. len ( ) <= ( MaxVariable :: Currency as usize ) {
627+ let Some ( ( l, c) ) = concatenated
628+ . as_ule_slice ( )
629+ // `variant_count` isn't stable yet:
630+ // https://github.com/rust-lang/rust/issues/73662
631+ . split_at_checked ( MaxVariable :: Currency as usize )
632+ else {
628633 return Err ( serde:: de:: Error :: custom ( "invalid" ) ) ;
634+ } ;
635+
636+ let last_primaries = ZeroSlice :: from_ule_slice ( l) . as_zerovec ( ) ;
637+ let mut compressible_bytes = ZeroSlice :: from_ule_slice ( c) . as_zerovec ( ) ;
638+
639+ if c. len ( ) != 16 {
640+ compressible_bytes = zerovec:: zerovec!(
641+ u16 ; <u16 as AsULE >:: ULE :: from_unsigned; [
642+ 0b0000_0000_0000_0000 ,
643+ 0b0000_0000_0000_0000 ,
644+ 0b0000_0000_0000_0000 ,
645+ 0b0000_0000_0000_0000 ,
646+ 0b0000_0000_0000_0000 ,
647+ 0b0000_0000_0000_0000 ,
648+ 0b1111_1111_1111_1110 ,
649+ 0b1111_1111_1111_1111 ,
650+ 0b0000_0000_0000_0001 ,
651+ 0b0000_0000_0000_0000 ,
652+ 0b0000_0000_0000_0000 ,
653+ 0b0000_0000_0000_0000 ,
654+ 0b0000_0000_0000_0000 ,
655+ 0b0000_0000_0000_0000 ,
656+ 0b0000_0000_0000_0000 ,
657+ 0b0100_0000_0000_0000 ,
658+ ] ) ;
629659 }
630660
631661 Ok ( Self {
632662 last_primaries,
633663 numeric_primary,
664+ compressible_bytes,
634665 } )
635666 }
636667}
637668
638- #[ derive( Debug , PartialEq , Clone , yoke:: Yokeable , zerofrom:: ZeroFrom ) ]
639- pub ( crate ) struct CollationSpecialPrimariesValidated < ' data > {
640- /// The primaries corresponding to `MaxVariable`
641- /// character classes packed so that each fits in
642- /// 16 bits. Length must match the number of enum
643- /// variants in `MaxVariable`, currently 4.
644- pub last_primaries : ZeroVec < ' data , u16 > ,
645- /// The high 8 bits of the numeric primary
646- pub numeric_primary : u8 ,
647- /// 256 bits (packed in 16 u16s) to classify every possible
648- /// byte into compressible or non-compressible.
649- pub compressible_bytes : & ' data [ <u16 as AsULE >:: ULE ; 16 ] ,
650- }
651-
652- impl < ' a > CollationSpecialPrimaries < ' a > {
653- const HARDCODED_COMPRESSIBLE_BYTES_FALLBACK : & ' static [ <u16 as AsULE >:: ULE ; 16 ] = & [
654- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
655- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
656- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
657- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
658- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
659- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
660- <u16 as AsULE >:: ULE :: from_unsigned ( 0b1111_1111_1111_1110 ) ,
661- <u16 as AsULE >:: ULE :: from_unsigned ( 0b1111_1111_1111_1111 ) ,
662- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0001 ) ,
663- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
664- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
665- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
666- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
667- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
668- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0000_0000_0000_0000 ) ,
669- <u16 as AsULE >:: ULE :: from_unsigned ( 0b0100_0000_0000_0000 ) ,
670- ] ;
671-
672- pub ( crate ) fn validated ( self ) -> CollationSpecialPrimariesValidated < ' a > {
673- let ( last_primaries, compressible_bytes) =
674- if let Some ( borrowed) = self . last_primaries . as_maybe_borrowed ( ) {
675- let ( l, c) = borrowed
676- . as_ule_slice ( )
677- // by invariant
678- . split_at ( MaxVariable :: Currency as usize ) ;
679- (
680- l,
681- c. try_into ( )
682- . unwrap_or ( Self :: HARDCODED_COMPRESSIBLE_BYTES_FALLBACK ) ,
683- )
684- } else {
685- (
686- self . last_primaries . as_slice ( ) . as_ule_slice ( ) ,
687- Self :: HARDCODED_COMPRESSIBLE_BYTES_FALLBACK ,
688- )
689- } ;
690-
691- let last_primaries_truncate_len = last_primaries. len ( ) ;
692- CollationSpecialPrimariesValidated {
693- last_primaries : self . last_primaries . truncated ( last_primaries_truncate_len) ,
694- numeric_primary : self . numeric_primary ,
695- compressible_bytes,
669+ #[ cfg( feature = "datagen" ) ]
670+ impl serde:: Serialize for CollationSpecialPrimaries < ' _ > {
671+ fn serialize < S > ( & self , serializer : S ) -> Result < S :: Ok , S :: Error >
672+ where
673+ S : serde:: Serializer ,
674+ {
675+ #[ derive( serde:: Serialize ) ]
676+ struct Raw {
677+ concatenated : ZeroVec < ' static , u16 > ,
678+ numeric_primary : u8 ,
696679 }
697- }
698680
699- pub ( crate ) const fn const_validated ( & ' static self ) -> CollationSpecialPrimariesValidated < ' a > {
700- let borrowed = self . last_primaries . as_slice ( ) ;
701- let ( last_primaries, compressible_bytes) = borrowed
702- . as_ule_slice ( )
703- // by invariant
704- . split_at ( MaxVariable :: Currency as usize ) ;
705- // TODO: use c.as_array() on MSRV 1.93
706- let compressible_bytes = if compressible_bytes. len ( ) == 16 {
707- unsafe { & * ( compressible_bytes. as_ptr ( ) as * const [ <u16 as AsULE >:: ULE ; 16 ] ) }
708- } else {
709- Self :: HARDCODED_COMPRESSIBLE_BYTES_FALLBACK
710- } ;
711-
712- CollationSpecialPrimariesValidated {
713- last_primaries : ZeroSlice :: from_ule_slice ( last_primaries) . as_zerovec ( ) ,
681+ Raw {
682+ concatenated : self
683+ . last_primaries
684+ . iter ( )
685+ . chain ( self . compressible_bytes . iter ( ) )
686+ . collect ( ) ,
714687 numeric_primary : self . numeric_primary ,
715- compressible_bytes,
716688 }
689+ . serialize ( serializer)
717690 }
718691}
719692
720- #[ test]
721- fn compressible_bytes ( ) {
722- assert_eq ! (
723- Baked :: SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1
724- . clone( )
725- . validated( ) ,
726- Baked :: SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1 . const_validated( ) ,
727- ) ;
728- }
729-
730693icu_provider:: data_struct!(
731694 CollationSpecialPrimaries <' _>,
732695 #[ cfg( feature = "datagen" ) ]
733696) ;
734697
735- impl CollationSpecialPrimariesValidated < ' _ > {
698+ impl CollationSpecialPrimaries < ' _ > {
736699 #[ expect( clippy:: unwrap_used) ]
737700 pub ( crate ) fn last_primary_for_group ( & self , max_variable : MaxVariable ) -> u32 {
738701 // `unwrap` is OK, because `Collator::try_new` validates the length.
@@ -744,11 +707,10 @@ impl CollationSpecialPrimariesValidated<'_> {
744707
745708 #[ allow( dead_code) ]
746709 pub ( crate ) fn is_compressible ( & self , b : u8 ) -> bool {
747- // Indexing slicing OK by construction and pasting this
748- // into Compiler Explorer shows that the panic
749- // is optimized away.
750- #[ expect( clippy:: indexing_slicing) ]
751- let field = u16:: from_unaligned ( self . compressible_bytes [ usize:: from ( b >> 4 ) ] ) ;
710+ let field = self
711+ . compressible_bytes
712+ . get ( usize:: from ( b >> 4 ) )
713+ . unwrap_or_default ( ) ;
752714 let mask = 1 << ( b & 0b1111 ) ;
753715 ( field & mask) != 0
754716 }
0 commit comments